1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 #endif 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatDenseGetLocalMatrix" 16 /*@ 17 18 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 19 matrix that represents the operator. For sequential matrices it returns itself. 20 21 Input Parameter: 22 . A - the Seq or MPI dense matrix 23 24 Output Parameter: 25 . B - the inner matrix 26 27 Level: intermediate 28 29 @*/ 30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 31 { 32 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 33 PetscErrorCode ierr; 34 PetscTruth flg; 35 36 PetscFunctionBegin; 37 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 38 if (flg) { 39 *B = mat->A; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatGetRow_MPIDense" 48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 49 { 50 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 51 PetscErrorCode ierr; 52 PetscInt lrow,rstart = A->rmap.rstart,rend = A->rmap.rend; 53 54 PetscFunctionBegin; 55 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 56 lrow = row - rstart; 57 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRestoreRow_MPIDense" 63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 64 { 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 69 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 70 PetscFunctionReturn(0); 71 } 72 73 EXTERN_C_BEGIN 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 79 PetscErrorCode ierr; 80 PetscInt m = A->rmap.n,rstart = A->rmap.rstart; 81 PetscScalar *array; 82 MPI_Comm comm; 83 84 PetscFunctionBegin; 85 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 86 87 /* The reuse aspect is not implemented efficiently */ 88 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 89 90 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 91 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatCreate(comm,B);CHKERRQ(ierr); 93 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 94 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 95 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 96 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 97 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 98 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 99 100 *iscopy = PETSC_TRUE; 101 PetscFunctionReturn(0); 102 } 103 EXTERN_C_END 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIDense" 107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 110 PetscErrorCode ierr; 111 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 112 PetscTruth roworiented = A->roworiented; 113 114 PetscFunctionBegin; 115 for (i=0; i<m; i++) { 116 if (idxm[i] < 0) continue; 117 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 118 if (idxm[i] >= rstart && idxm[i] < rend) { 119 row = idxm[i] - rstart; 120 if (roworiented) { 121 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 122 } else { 123 for (j=0; j<n; j++) { 124 if (idxn[j] < 0) continue; 125 if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 126 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 127 } 128 } 129 } else { 130 if (!A->donotstash) { 131 if (roworiented) { 132 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 133 } else { 134 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 135 } 136 } 137 } 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetValues_MPIDense" 144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 145 { 146 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 147 PetscErrorCode ierr; 148 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 149 150 PetscFunctionBegin; 151 for (i=0; i<m; i++) { 152 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 153 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 154 if (idxm[i] >= rstart && idxm[i] < rend) { 155 row = idxm[i] - rstart; 156 for (j=0; j<n; j++) { 157 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 158 if (idxn[j] >= mat->cmap.N) { 159 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 160 } 161 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 162 } 163 } else { 164 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 165 } 166 } 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatGetArray_MPIDense" 172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 173 { 174 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 175 PetscErrorCode ierr; 176 177 PetscFunctionBegin; 178 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 185 { 186 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 187 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 188 PetscErrorCode ierr; 189 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 190 PetscScalar *av,*bv,*v = lmat->v; 191 Mat newmat; 192 193 PetscFunctionBegin; 194 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 195 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 196 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 197 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 198 199 /* No parallel redistribution currently supported! Should really check each index set 200 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 201 original matrix! */ 202 203 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 204 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 205 206 /* Check submatrix call */ 207 if (scall == MAT_REUSE_MATRIX) { 208 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 209 /* Really need to test rows and column sizes! */ 210 newmat = *B; 211 } else { 212 /* Create and fill new matrix */ 213 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 214 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 215 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 216 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 217 } 218 219 /* Now extract the data pointers and do the copy, column at a time */ 220 newmatd = (Mat_MPIDense*)newmat->data; 221 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 222 223 for (i=0; i<ncols; i++) { 224 av = v + nlrows*icol[i]; 225 for (j=0; j<nrows; j++) { 226 *bv++ = av[irow[j] - rstart]; 227 } 228 } 229 230 /* Assemble the matrices so that the correct flags are set */ 231 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 232 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 234 /* Free work space */ 235 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 236 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 237 *B = newmat; 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatRestoreArray_MPIDense" 243 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 244 { 245 PetscFunctionBegin; 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 251 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 252 { 253 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 254 MPI_Comm comm = ((PetscObject)mat)->comm; 255 PetscErrorCode ierr; 256 PetscInt nstash,reallocs; 257 InsertMode addv; 258 259 PetscFunctionBegin; 260 /* make sure all processors are either in INSERTMODE or ADDMODE */ 261 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 262 if (addv == (ADD_VALUES|INSERT_VALUES)) { 263 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 264 } 265 mat->insertmode = addv; /* in case this processor had no cache */ 266 267 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 268 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 269 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 275 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 276 { 277 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,*row,*col,flg,j,rstart,ncols; 280 PetscMPIInt n; 281 PetscScalar *val; 282 InsertMode addv=mat->insertmode; 283 284 PetscFunctionBegin; 285 /* wait on receives */ 286 while (1) { 287 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 288 if (!flg) break; 289 290 for (i=0; i<n;) { 291 /* Now identify the consecutive vals belonging to the same row */ 292 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 293 if (j < n) ncols = j-i; 294 else ncols = n-i; 295 /* Now assemble all these values with a single function call */ 296 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 297 i = j; 298 } 299 } 300 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 301 302 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 303 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 304 305 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 306 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 307 } 308 PetscFunctionReturn(0); 309 } 310 311 #undef __FUNCT__ 312 #define __FUNCT__ "MatZeroEntries_MPIDense" 313 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 314 { 315 PetscErrorCode ierr; 316 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 317 318 PetscFunctionBegin; 319 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 /* the code does not do the diagonal entries correctly unless the 324 matrix is square and the column and row owerships are identical. 325 This is a BUG. The only way to fix it seems to be to access 326 mdn->A and mdn->B directly and not through the MatZeroRows() 327 routine. 328 */ 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatZeroRows_MPIDense" 331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 332 { 333 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 334 PetscErrorCode ierr; 335 PetscInt i,*owners = A->rmap.range; 336 PetscInt *nprocs,j,idx,nsends; 337 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 338 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 339 PetscInt *lens,*lrows,*values; 340 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 341 MPI_Comm comm = ((PetscObject)A)->comm; 342 MPI_Request *send_waits,*recv_waits; 343 MPI_Status recv_status,*send_status; 344 PetscTruth found; 345 346 PetscFunctionBegin; 347 /* first count number of contributors to each processor */ 348 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 349 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 351 for (i=0; i<N; i++) { 352 idx = rows[i]; 353 found = PETSC_FALSE; 354 for (j=0; j<size; j++) { 355 if (idx >= owners[j] && idx < owners[j+1]) { 356 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 357 } 358 } 359 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 360 } 361 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 362 363 /* inform other processors of number of messages and max length*/ 364 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 365 366 /* post receives: */ 367 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 368 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 369 for (i=0; i<nrecvs; i++) { 370 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 371 } 372 373 /* do sends: 374 1) starts[i] gives the starting index in svalues for stuff going to 375 the ith processor 376 */ 377 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 378 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 379 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 380 starts[0] = 0; 381 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 382 for (i=0; i<N; i++) { 383 svalues[starts[owner[i]]++] = rows[i]; 384 } 385 386 starts[0] = 0; 387 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 388 count = 0; 389 for (i=0; i<size; i++) { 390 if (nprocs[2*i+1]) { 391 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 392 } 393 } 394 ierr = PetscFree(starts);CHKERRQ(ierr); 395 396 base = owners[rank]; 397 398 /* wait on receives */ 399 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 400 source = lens + nrecvs; 401 count = nrecvs; slen = 0; 402 while (count) { 403 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 404 /* unpack receives into our local space */ 405 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 406 source[imdex] = recv_status.MPI_SOURCE; 407 lens[imdex] = n; 408 slen += n; 409 count--; 410 } 411 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 412 413 /* move the data into the send scatter */ 414 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 415 count = 0; 416 for (i=0; i<nrecvs; i++) { 417 values = rvalues + i*nmax; 418 for (j=0; j<lens[i]; j++) { 419 lrows[count++] = values[j] - base; 420 } 421 } 422 ierr = PetscFree(rvalues);CHKERRQ(ierr); 423 ierr = PetscFree(lens);CHKERRQ(ierr); 424 ierr = PetscFree(owner);CHKERRQ(ierr); 425 ierr = PetscFree(nprocs);CHKERRQ(ierr); 426 427 /* actually zap the local rows */ 428 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 429 ierr = PetscFree(lrows);CHKERRQ(ierr); 430 431 /* wait on sends */ 432 if (nsends) { 433 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 434 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 435 ierr = PetscFree(send_status);CHKERRQ(ierr); 436 } 437 ierr = PetscFree(send_waits);CHKERRQ(ierr); 438 ierr = PetscFree(svalues);CHKERRQ(ierr); 439 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatMult_MPIDense" 445 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 446 { 447 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 448 PetscErrorCode ierr; 449 450 PetscFunctionBegin; 451 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 452 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 453 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatMultAdd_MPIDense" 459 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 460 { 461 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 462 PetscErrorCode ierr; 463 464 PetscFunctionBegin; 465 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "MatMultTranspose_MPIDense" 473 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 474 { 475 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 476 PetscErrorCode ierr; 477 PetscScalar zero = 0.0; 478 479 PetscFunctionBegin; 480 ierr = VecSet(yy,zero);CHKERRQ(ierr); 481 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 482 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 483 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 489 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 490 { 491 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 496 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 497 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 498 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #undef __FUNCT__ 503 #define __FUNCT__ "MatGetDiagonal_MPIDense" 504 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 505 { 506 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 507 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 508 PetscErrorCode ierr; 509 PetscInt len,i,n,m = A->rmap.n,radd; 510 PetscScalar *x,zero = 0.0; 511 512 PetscFunctionBegin; 513 ierr = VecSet(v,zero);CHKERRQ(ierr); 514 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 515 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 516 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 517 len = PetscMin(a->A->rmap.n,a->A->cmap.n); 518 radd = A->rmap.rstart*m; 519 for (i=0; i<len; i++) { 520 x[i] = aloc->v[radd + i*m + i]; 521 } 522 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatDestroy_MPIDense" 528 PetscErrorCode MatDestroy_MPIDense(Mat mat) 529 { 530 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 531 PetscErrorCode ierr; 532 #if defined(PETSC_HAVE_PLAPACK) 533 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 534 #endif 535 536 PetscFunctionBegin; 537 538 #if defined(PETSC_USE_LOG) 539 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 540 #endif 541 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 542 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 543 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 544 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 545 #if defined(PETSC_HAVE_PLAPACK) 546 if (lu->CleanUpPlapack) { 547 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 548 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 549 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 550 551 552 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 553 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 554 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 555 } 556 #endif 557 558 ierr = PetscFree(mdn);CHKERRQ(ierr); 559 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 560 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 561 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 563 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 565 PetscFunctionReturn(0); 566 } 567 568 #undef __FUNCT__ 569 #define __FUNCT__ "MatView_MPIDense_Binary" 570 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 571 { 572 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 573 PetscErrorCode ierr; 574 PetscViewerFormat format; 575 int fd; 576 PetscInt header[4],mmax,N = mat->cmap.N,i,j,m,k; 577 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 578 PetscScalar *work,*v,*vv; 579 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 580 MPI_Status status; 581 582 PetscFunctionBegin; 583 if (mdn->size == 1) { 584 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 585 } else { 586 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 587 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 588 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 589 590 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 591 if (format == PETSC_VIEWER_BINARY_NATIVE) { 592 593 if (!rank) { 594 /* store the matrix as a dense matrix */ 595 header[0] = MAT_FILE_COOKIE; 596 header[1] = mat->rmap.N; 597 header[2] = N; 598 header[3] = MATRIX_BINARY_FORMAT_DENSE; 599 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 600 601 /* get largest work array needed for transposing array */ 602 mmax = mat->rmap.n; 603 for (i=1; i<size; i++) { 604 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 605 } 606 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 607 608 /* write out local array, by rows */ 609 m = mat->rmap.n; 610 v = a->v; 611 for (j=0; j<N; j++) { 612 for (i=0; i<m; i++) { 613 work[j + i*N] = *v++; 614 } 615 } 616 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 617 /* get largest work array to receive messages from other processes, excludes process zero */ 618 mmax = 0; 619 for (i=1; i<size; i++) { 620 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 621 } 622 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 623 v = vv; 624 for (k=1; k<size; k++) { 625 m = mat->rmap.range[k+1] - mat->rmap.range[k]; 626 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 627 628 for (j=0; j<N; j++) { 629 for (i=0; i<m; i++) { 630 work[j + i*N] = *v++; 631 } 632 } 633 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 634 } 635 ierr = PetscFree(work);CHKERRQ(ierr); 636 ierr = PetscFree(vv);CHKERRQ(ierr); 637 } else { 638 ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 639 } 640 } 641 } 642 PetscFunctionReturn(0); 643 } 644 645 #undef __FUNCT__ 646 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 647 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 648 { 649 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 650 PetscErrorCode ierr; 651 PetscMPIInt size = mdn->size,rank = mdn->rank; 652 PetscViewerType vtype; 653 PetscTruth iascii,isdraw; 654 PetscViewer sviewer; 655 PetscViewerFormat format; 656 #if defined(PETSC_HAVE_PLAPACK) 657 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 658 #endif 659 660 PetscFunctionBegin; 661 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 662 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 663 if (iascii) { 664 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 665 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 666 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 667 MatInfo info; 668 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 669 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n, 670 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 671 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 672 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 673 #if defined(PETSC_HAVE_PLAPACK) 674 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 675 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 676 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 677 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 678 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 679 #endif 680 PetscFunctionReturn(0); 681 } else if (format == PETSC_VIEWER_ASCII_INFO) { 682 PetscFunctionReturn(0); 683 } 684 } else if (isdraw) { 685 PetscDraw draw; 686 PetscTruth isnull; 687 688 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 689 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 690 if (isnull) PetscFunctionReturn(0); 691 } 692 693 if (size == 1) { 694 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 695 } else { 696 /* assemble the entire matrix onto first processor. */ 697 Mat A; 698 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz; 699 PetscInt *cols; 700 PetscScalar *vals; 701 702 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 703 if (!rank) { 704 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 705 } else { 706 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 707 } 708 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 709 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 710 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 711 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 712 713 /* Copy the matrix ... This isn't the most efficient means, 714 but it's quick for now */ 715 A->insertmode = INSERT_VALUES; 716 row = mat->rmap.rstart; m = mdn->A->rmap.n; 717 for (i=0; i<m; i++) { 718 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 719 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 720 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 721 row++; 722 } 723 724 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 725 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 726 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 727 if (!rank) { 728 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 729 } 730 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 731 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 732 ierr = MatDestroy(A);CHKERRQ(ierr); 733 } 734 PetscFunctionReturn(0); 735 } 736 737 #undef __FUNCT__ 738 #define __FUNCT__ "MatView_MPIDense" 739 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 740 { 741 PetscErrorCode ierr; 742 PetscTruth iascii,isbinary,isdraw,issocket; 743 744 PetscFunctionBegin; 745 746 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 747 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 748 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 749 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 750 751 if (iascii || issocket || isdraw) { 752 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 753 } else if (isbinary) { 754 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 755 } else { 756 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 757 } 758 PetscFunctionReturn(0); 759 } 760 761 #undef __FUNCT__ 762 #define __FUNCT__ "MatGetInfo_MPIDense" 763 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 764 { 765 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 766 Mat mdn = mat->A; 767 PetscErrorCode ierr; 768 PetscReal isend[5],irecv[5]; 769 770 PetscFunctionBegin; 771 info->rows_global = (double)A->rmap.N; 772 info->columns_global = (double)A->cmap.N; 773 info->rows_local = (double)A->rmap.n; 774 info->columns_local = (double)A->cmap.N; 775 info->block_size = 1.0; 776 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 777 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 778 isend[3] = info->memory; isend[4] = info->mallocs; 779 if (flag == MAT_LOCAL) { 780 info->nz_used = isend[0]; 781 info->nz_allocated = isend[1]; 782 info->nz_unneeded = isend[2]; 783 info->memory = isend[3]; 784 info->mallocs = isend[4]; 785 } else if (flag == MAT_GLOBAL_MAX) { 786 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 787 info->nz_used = irecv[0]; 788 info->nz_allocated = irecv[1]; 789 info->nz_unneeded = irecv[2]; 790 info->memory = irecv[3]; 791 info->mallocs = irecv[4]; 792 } else if (flag == MAT_GLOBAL_SUM) { 793 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 794 info->nz_used = irecv[0]; 795 info->nz_allocated = irecv[1]; 796 info->nz_unneeded = irecv[2]; 797 info->memory = irecv[3]; 798 info->mallocs = irecv[4]; 799 } 800 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 801 info->fill_ratio_needed = 0; 802 info->factor_mallocs = 0; 803 PetscFunctionReturn(0); 804 } 805 806 #undef __FUNCT__ 807 #define __FUNCT__ "MatSetOption_MPIDense" 808 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 809 { 810 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 811 PetscErrorCode ierr; 812 813 PetscFunctionBegin; 814 switch (op) { 815 case MAT_NEW_NONZERO_LOCATIONS: 816 case MAT_NEW_NONZERO_LOCATION_ERR: 817 case MAT_NEW_NONZERO_ALLOCATION_ERR: 818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 819 break; 820 case MAT_ROW_ORIENTED: 821 a->roworiented = flg; 822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 823 break; 824 case MAT_NEW_DIAGONALS: 825 case MAT_USE_HASH_TABLE: 826 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 827 break; 828 case MAT_IGNORE_OFF_PROC_ENTRIES: 829 a->donotstash = flg; 830 break; 831 case MAT_SYMMETRIC: 832 case MAT_STRUCTURALLY_SYMMETRIC: 833 case MAT_HERMITIAN: 834 case MAT_SYMMETRY_ETERNAL: 835 case MAT_IGNORE_LOWER_TRIANGULAR: 836 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 837 break; 838 default: 839 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 840 } 841 PetscFunctionReturn(0); 842 } 843 844 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatDiagonalScale_MPIDense" 847 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 848 { 849 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 850 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 851 PetscScalar *l,*r,x,*v; 852 PetscErrorCode ierr; 853 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n; 854 855 PetscFunctionBegin; 856 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 857 if (ll) { 858 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 859 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 860 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 861 for (i=0; i<m; i++) { 862 x = l[i]; 863 v = mat->v + i; 864 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 865 } 866 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 867 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 868 } 869 if (rr) { 870 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 871 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 872 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 875 for (i=0; i<n; i++) { 876 x = r[i]; 877 v = mat->v + i*m; 878 for (j=0; j<m; j++) { (*v++) *= x;} 879 } 880 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 881 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 882 } 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatNorm_MPIDense" 888 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 889 { 890 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 891 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 892 PetscErrorCode ierr; 893 PetscInt i,j; 894 PetscReal sum = 0.0; 895 PetscScalar *v = mat->v; 896 897 PetscFunctionBegin; 898 if (mdn->size == 1) { 899 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 900 } else { 901 if (type == NORM_FROBENIUS) { 902 for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) { 903 #if defined(PETSC_USE_COMPLEX) 904 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 905 #else 906 sum += (*v)*(*v); v++; 907 #endif 908 } 909 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 910 *nrm = sqrt(*nrm); 911 ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr); 912 } else if (type == NORM_1) { 913 PetscReal *tmp,*tmp2; 914 ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 915 tmp2 = tmp + A->cmap.N; 916 ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 917 *nrm = 0.0; 918 v = mat->v; 919 for (j=0; j<mdn->A->cmap.n; j++) { 920 for (i=0; i<mdn->A->rmap.n; i++) { 921 tmp[j] += PetscAbsScalar(*v); v++; 922 } 923 } 924 ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 925 for (j=0; j<A->cmap.N; j++) { 926 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 927 } 928 ierr = PetscFree(tmp);CHKERRQ(ierr); 929 ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr); 930 } else if (type == NORM_INFINITY) { /* max row norm */ 931 PetscReal ntemp; 932 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 933 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 934 } else { 935 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 936 } 937 } 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNCT__ 942 #define __FUNCT__ "MatTranspose_MPIDense" 943 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 944 { 945 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 946 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 947 Mat B; 948 PetscInt M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart; 949 PetscErrorCode ierr; 950 PetscInt j,i; 951 PetscScalar *v; 952 953 PetscFunctionBegin; 954 if (A == *matout && M != N) { 955 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 956 } 957 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 958 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 959 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 960 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 961 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 962 } else { 963 B = *matout; 964 } 965 966 m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v; 967 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 968 for (i=0; i<m; i++) rwork[i] = rstart + i; 969 for (j=0; j<n; j++) { 970 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 971 v += m; 972 } 973 ierr = PetscFree(rwork);CHKERRQ(ierr); 974 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 975 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 if (*matout != A) { 977 *matout = B; 978 } else { 979 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 980 } 981 PetscFunctionReturn(0); 982 } 983 984 #include "petscblaslapack.h" 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatScale_MPIDense" 987 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 988 { 989 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 990 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 991 PetscScalar oalpha = alpha; 992 PetscErrorCode ierr; 993 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N); 994 995 PetscFunctionBegin; 996 BLASscal_(&nz,&oalpha,a->v,&one); 997 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1005 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1006 { 1007 PetscErrorCode ierr; 1008 1009 PetscFunctionBegin; 1010 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1016 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1017 { 1018 PetscErrorCode ierr; 1019 PetscInt m=A->rmap.n,n=B->cmap.n; 1020 Mat Cmat; 1021 1022 PetscFunctionBegin; 1023 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 1024 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1025 ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr); 1026 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1027 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 1028 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1029 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1030 *C = Cmat; 1031 PetscFunctionReturn(0); 1032 } 1033 1034 #if defined(PETSC_HAVE_PLAPACK) 1035 1036 #undef __FUNCT__ 1037 #define __FUNCT__ "MatMPIDenseCreatePlapack" 1038 PetscErrorCode MatMPIDenseCreatePlapack(Mat A) 1039 { 1040 Mat_Plapack *lu; 1041 PetscErrorCode ierr; 1042 PetscInt M=A->rmap.N; 1043 MPI_Comm comm=((PetscObject)A)->comm; 1044 PetscMPIInt size; 1045 1046 PetscFunctionBegin; 1047 lu = (Mat_Plapack*)(A->spptr); 1048 1049 /* Set default Plapack parameters */ 1050 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1051 lu->nb = M/size; 1052 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1053 1054 /* Set runtime options */ 1055 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1056 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1057 PetscOptionsEnd(); 1058 1059 /* Create object distribution template */ 1060 lu->templ = NULL; 1061 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1062 1063 /* Set the datatype */ 1064 #if defined(PETSC_USE_COMPLEX) 1065 lu->datatype = MPI_DOUBLE_COMPLEX; 1066 #else 1067 lu->datatype = MPI_DOUBLE; 1068 #endif 1069 1070 ierr = PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1071 1072 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1073 lu->CleanUpPlapack = PETSC_TRUE; 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1079 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1080 { 1081 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1082 PetscErrorCode ierr; 1083 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,rend; 1084 PetscScalar *array; 1085 PetscReal one = 1.0; 1086 1087 PetscFunctionBegin; 1088 /* Copy A into lu->A */ 1089 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1090 ierr = PLA_API_begin();CHKERRQ(ierr); 1091 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1092 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1093 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1094 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1095 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1096 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1097 ierr = PLA_API_end();CHKERRQ(ierr); 1098 lu->rstart = rstart; 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatSolve_MPIDense" 1104 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1105 { 1106 MPI_Comm comm = ((PetscObject)A)->comm,dummy_comm; 1107 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1108 PetscErrorCode ierr; 1109 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1110 PetscScalar *array; 1111 PetscReal one = 1.0; 1112 PetscMPIInt size,r_rank,r_nproc,c_rank,c_nproc;; 1113 PLA_Obj v_pla = NULL; 1114 PetscScalar *loc_buf; 1115 Vec loc_x; 1116 1117 PetscFunctionBegin; 1118 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1119 1120 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1121 ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr); 1122 ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr); 1123 1124 /* Copy b into rhs_pla */ 1125 ierr = PLA_API_begin();CHKERRQ(ierr); 1126 ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr); 1127 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1128 ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr); 1129 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1130 ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr); 1131 ierr = PLA_API_end();CHKERRQ(ierr); 1132 1133 if (A->factor == FACTOR_LU){ 1134 /* Apply the permutations to the right hand sides */ 1135 ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr); 1136 1137 /* Solve L y = b, overwriting b with y */ 1138 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1139 1140 /* Solve U x = y (=b), overwriting b with x */ 1141 ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1142 } else { /* FACTOR_CHOLESKY */ 1143 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1144 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1145 } 1146 1147 /* Copy PLAPACK x into Petsc vector x */ 1148 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1149 ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr); 1150 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1151 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1152 if (!lu->pla_solved){ 1153 1154 ierr = PLA_Temp_comm_row_info(lu->templ,&dummy_comm,&r_rank,&r_nproc);CHKERRQ(ierr); 1155 ierr = PLA_Temp_comm_col_info(lu->templ,&dummy_comm,&c_rank,&c_nproc);CHKERRQ(ierr); 1156 /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */ 1157 1158 /* Create IS and cts for VecScatterring */ 1159 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1160 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1161 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1162 idx_petsc = idx_pla + loc_m; 1163 1164 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1165 for (i=0; i<loc_m; i+=lu->nb){ 1166 j = 0; 1167 while (j < lu->nb && i+j < loc_m){ 1168 idx_petsc[i+j] = rstart + j; j++; 1169 } 1170 rstart += size*lu->nb; 1171 } 1172 1173 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1174 1175 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1176 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1177 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1178 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1179 } 1180 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1181 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1182 1183 /* Free data */ 1184 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1185 ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr); 1186 1187 lu->pla_solved = PETSC_TRUE; 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1193 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1194 { 1195 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1196 PetscErrorCode ierr; 1197 PetscInt info_pla; 1198 1199 PetscFunctionBegin; 1200 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1201 1202 /* Copy A into F->lu->A */ 1203 ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr); 1204 1205 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1206 info_pla = PLA_LU(lu->A,lu->pivots); 1207 if (info_pla) SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1208 1209 (*F)->assembled = PETSC_TRUE; 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1215 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1216 { 1217 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1218 PetscErrorCode ierr; 1219 PetscInt info_pla; 1220 1221 PetscFunctionBegin; 1222 1223 /* Copy A into F->lu->A */ 1224 ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr); 1225 1226 /* Factor P A -> Chol */ 1227 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1228 if (info_pla) SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1229 1230 (*F)->assembled = PETSC_TRUE; 1231 PetscFunctionReturn(0); 1232 } 1233 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private" 1237 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F) 1238 { 1239 PetscErrorCode ierr; 1240 1241 PetscFunctionBegin; 1242 /* Create the factorization matrix */ 1243 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1244 ierr = MatSetSizes(*F,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1245 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1246 ierr = MatMPIDenseCreatePlapack(*F);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 /* Note the Petsc r and c permutations are ignored */ 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1253 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 1254 { 1255 PetscErrorCode ierr; 1256 PetscInt M = A->rmap.N; 1257 Mat_Plapack *lu; 1258 1259 PetscFunctionBegin; 1260 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1261 lu = (Mat_Plapack*)(*F)->spptr; 1262 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1263 (*F)->factor = FACTOR_LU; 1264 PetscFunctionReturn(0); 1265 } 1266 1267 /* Note the Petsc perm permutation is ignored */ 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1270 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F) 1271 { 1272 PetscErrorCode ierr; 1273 PetscTruth issymmetric,set; 1274 1275 PetscFunctionBegin; 1276 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1277 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1278 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1279 (*F)->factor = FACTOR_CHOLESKY; 1280 PetscFunctionReturn(0); 1281 } 1282 #endif 1283 1284 /* -------------------------------------------------------------------*/ 1285 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1286 MatGetRow_MPIDense, 1287 MatRestoreRow_MPIDense, 1288 MatMult_MPIDense, 1289 /* 4*/ MatMultAdd_MPIDense, 1290 MatMultTranspose_MPIDense, 1291 MatMultTransposeAdd_MPIDense, 1292 #if defined(PETSC_HAVE_PLAPACK) 1293 MatSolve_MPIDense, 1294 #else 1295 0, 1296 #endif 1297 0, 1298 0, 1299 /*10*/ 0, 1300 0, 1301 0, 1302 0, 1303 MatTranspose_MPIDense, 1304 /*15*/ MatGetInfo_MPIDense, 1305 MatEqual_MPIDense, 1306 MatGetDiagonal_MPIDense, 1307 MatDiagonalScale_MPIDense, 1308 MatNorm_MPIDense, 1309 /*20*/ MatAssemblyBegin_MPIDense, 1310 MatAssemblyEnd_MPIDense, 1311 0, 1312 MatSetOption_MPIDense, 1313 MatZeroEntries_MPIDense, 1314 /*25*/ MatZeroRows_MPIDense, 1315 #if defined(PETSC_HAVE_PLAPACK) 1316 MatLUFactorSymbolic_MPIDense, 1317 MatLUFactorNumeric_MPIDense, 1318 MatCholeskyFactorSymbolic_MPIDense, 1319 MatCholeskyFactorNumeric_MPIDense, 1320 #else 1321 0, 1322 0, 1323 0, 1324 0, 1325 #endif 1326 /*30*/ MatSetUpPreallocation_MPIDense, 1327 0, 1328 0, 1329 MatGetArray_MPIDense, 1330 MatRestoreArray_MPIDense, 1331 /*35*/ MatDuplicate_MPIDense, 1332 0, 1333 0, 1334 0, 1335 0, 1336 /*40*/ 0, 1337 MatGetSubMatrices_MPIDense, 1338 0, 1339 MatGetValues_MPIDense, 1340 0, 1341 /*45*/ 0, 1342 MatScale_MPIDense, 1343 0, 1344 0, 1345 0, 1346 /*50*/ 0, 1347 0, 1348 0, 1349 0, 1350 0, 1351 /*55*/ 0, 1352 0, 1353 0, 1354 0, 1355 0, 1356 /*60*/ MatGetSubMatrix_MPIDense, 1357 MatDestroy_MPIDense, 1358 MatView_MPIDense, 1359 0, 1360 0, 1361 /*65*/ 0, 1362 0, 1363 0, 1364 0, 1365 0, 1366 /*70*/ 0, 1367 0, 1368 0, 1369 0, 1370 0, 1371 /*75*/ 0, 1372 0, 1373 0, 1374 0, 1375 0, 1376 /*80*/ 0, 1377 0, 1378 0, 1379 0, 1380 /*84*/ MatLoad_MPIDense, 1381 0, 1382 0, 1383 0, 1384 0, 1385 0, 1386 /*90*/ 0, 1387 MatMatMultSymbolic_MPIDense_MPIDense, 1388 0, 1389 0, 1390 0, 1391 /*95*/ 0, 1392 0, 1393 0, 1394 0}; 1395 1396 EXTERN_C_BEGIN 1397 #undef __FUNCT__ 1398 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1399 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1400 { 1401 Mat_MPIDense *a; 1402 PetscErrorCode ierr; 1403 1404 PetscFunctionBegin; 1405 mat->preallocated = PETSC_TRUE; 1406 /* Note: For now, when data is specified above, this assumes the user correctly 1407 allocates the local dense storage space. We should add error checking. */ 1408 1409 a = (Mat_MPIDense*)mat->data; 1410 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1411 ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr); 1412 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1413 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1414 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1415 PetscFunctionReturn(0); 1416 } 1417 EXTERN_C_END 1418 1419 EXTERN_C_BEGIN 1420 #if defined(PETSC_HAVE_PLAPACK) 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK" 1423 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type) 1424 { 1425 PetscFunctionBegin; 1426 /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */ 1427 PetscFunctionReturn(0); 1428 } 1429 #endif 1430 EXTERN_C_END 1431 1432 /*MC 1433 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1434 1435 Options Database Keys: 1436 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1437 1438 Level: beginner 1439 1440 MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR) 1441 for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed 1442 (run config/configure.py with the option --download-plapack) 1443 1444 1445 Options Database Keys: 1446 . -mat_plapack_nprows <n> - number of rows in processor partition 1447 . -mat_plapack_npcols <n> - number of columns in processor partition 1448 . -mat_plapack_nb <n> - block size of template vector 1449 . -mat_plapack_nb_alg <n> - algorithmic block size 1450 - -mat_plapack_ckerror <n> - error checking flag 1451 1452 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE 1453 M*/ 1454 1455 EXTERN_C_BEGIN 1456 #undef __FUNCT__ 1457 #define __FUNCT__ "MatCreate_MPIDense" 1458 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1459 { 1460 Mat_MPIDense *a; 1461 PetscErrorCode ierr; 1462 #if defined(PETSC_HAVE_PLAPACK) 1463 Mat_Plapack *lu; 1464 #endif 1465 1466 PetscFunctionBegin; 1467 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1468 mat->data = (void*)a; 1469 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1470 mat->factor = 0; 1471 mat->mapping = 0; 1472 1473 mat->insertmode = NOT_SET_VALUES; 1474 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1475 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1476 1477 mat->rmap.bs = mat->cmap.bs = 1; 1478 ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr); 1479 ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr); 1480 a->nvec = mat->cmap.n; 1481 1482 /* build cache for off array entries formed */ 1483 a->donotstash = PETSC_FALSE; 1484 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1485 1486 /* stuff used for matrix vector multiply */ 1487 a->lvec = 0; 1488 a->Mvctx = 0; 1489 a->roworiented = PETSC_TRUE; 1490 1491 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1492 "MatGetDiagonalBlock_MPIDense", 1493 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1494 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1495 "MatMPIDenseSetPreallocation_MPIDense", 1496 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1497 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1498 "MatMatMult_MPIAIJ_MPIDense", 1499 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1500 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1501 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1502 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1503 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1504 "MatMatMultNumeric_MPIAIJ_MPIDense", 1505 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1506 #if defined(PETSC_HAVE_PLAPACK) 1507 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C", 1508 "MatSetSolverType_MPIDense_PLAPACK", 1509 MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr); 1510 #endif 1511 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1512 1513 #if defined(PETSC_HAVE_PLAPACK) 1514 ierr = PetscNewLog(mat,Mat_Plapack,&lu);CHKERRQ(ierr); 1515 lu->CleanUpPlapack = PETSC_FALSE; 1516 mat->spptr = (void*)lu; 1517 #endif 1518 PetscFunctionReturn(0); 1519 } 1520 EXTERN_C_END 1521 1522 /*MC 1523 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1524 1525 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1526 and MATMPIDENSE otherwise. 1527 1528 Options Database Keys: 1529 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1530 1531 Level: beginner 1532 1533 1534 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1535 M*/ 1536 1537 EXTERN_C_BEGIN 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatCreate_Dense" 1540 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1541 { 1542 PetscErrorCode ierr; 1543 PetscMPIInt size; 1544 1545 PetscFunctionBegin; 1546 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1547 if (size == 1) { 1548 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1549 } else { 1550 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1551 } 1552 PetscFunctionReturn(0); 1553 } 1554 EXTERN_C_END 1555 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1558 /*@C 1559 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1560 1561 Not collective 1562 1563 Input Parameters: 1564 . A - the matrix 1565 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1566 to control all matrix memory allocation. 1567 1568 Notes: 1569 The dense format is fully compatible with standard Fortran 77 1570 storage by columns. 1571 1572 The data input variable is intended primarily for Fortran programmers 1573 who wish to allocate their own matrix memory space. Most users should 1574 set data=PETSC_NULL. 1575 1576 Level: intermediate 1577 1578 .keywords: matrix,dense, parallel 1579 1580 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1581 @*/ 1582 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1583 { 1584 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1585 1586 PetscFunctionBegin; 1587 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1588 if (f) { 1589 ierr = (*f)(mat,data);CHKERRQ(ierr); 1590 } 1591 PetscFunctionReturn(0); 1592 } 1593 1594 #undef __FUNCT__ 1595 #define __FUNCT__ "MatCreateMPIDense" 1596 /*@C 1597 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1598 1599 Collective on MPI_Comm 1600 1601 Input Parameters: 1602 + comm - MPI communicator 1603 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1604 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1605 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1606 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1607 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1608 to control all matrix memory allocation. 1609 1610 Output Parameter: 1611 . A - the matrix 1612 1613 Notes: 1614 The dense format is fully compatible with standard Fortran 77 1615 storage by columns. 1616 1617 The data input variable is intended primarily for Fortran programmers 1618 who wish to allocate their own matrix memory space. Most users should 1619 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1620 1621 The user MUST specify either the local or global matrix dimensions 1622 (possibly both). 1623 1624 Level: intermediate 1625 1626 .keywords: matrix,dense, parallel 1627 1628 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1629 @*/ 1630 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1631 { 1632 PetscErrorCode ierr; 1633 PetscMPIInt size; 1634 1635 PetscFunctionBegin; 1636 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1637 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1638 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1639 if (size > 1) { 1640 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1641 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1642 } else { 1643 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1644 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1645 } 1646 PetscFunctionReturn(0); 1647 } 1648 1649 #undef __FUNCT__ 1650 #define __FUNCT__ "MatDuplicate_MPIDense" 1651 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1652 { 1653 Mat mat; 1654 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1655 PetscErrorCode ierr; 1656 1657 PetscFunctionBegin; 1658 *newmat = 0; 1659 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1660 ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1661 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1662 a = (Mat_MPIDense*)mat->data; 1663 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1664 mat->factor = A->factor; 1665 mat->assembled = PETSC_TRUE; 1666 mat->preallocated = PETSC_TRUE; 1667 1668 mat->rmap.rstart = A->rmap.rstart; 1669 mat->rmap.rend = A->rmap.rend; 1670 a->size = oldmat->size; 1671 a->rank = oldmat->rank; 1672 mat->insertmode = NOT_SET_VALUES; 1673 a->nvec = oldmat->nvec; 1674 a->donotstash = oldmat->donotstash; 1675 1676 ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1677 ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1678 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1679 1680 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1681 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1682 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1683 1684 #if defined(PETSC_HAVE_PLAPACK) 1685 ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr); 1686 #endif 1687 *newmat = mat; 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #include "petscsys.h" 1692 1693 #undef __FUNCT__ 1694 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1695 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1696 { 1697 PetscErrorCode ierr; 1698 PetscMPIInt rank,size; 1699 PetscInt *rowners,i,m,nz,j; 1700 PetscScalar *array,*vals,*vals_ptr; 1701 MPI_Status status; 1702 1703 PetscFunctionBegin; 1704 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1705 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1706 1707 /* determine ownership of all rows */ 1708 m = M/size + ((M % size) > rank); 1709 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1710 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1711 rowners[0] = 0; 1712 for (i=2; i<=size; i++) { 1713 rowners[i] += rowners[i-1]; 1714 } 1715 1716 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1717 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1718 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1719 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1720 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1721 1722 if (!rank) { 1723 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1724 1725 /* read in my part of the matrix numerical values */ 1726 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1727 1728 /* insert into matrix-by row (this is why cannot directly read into array */ 1729 vals_ptr = vals; 1730 for (i=0; i<m; i++) { 1731 for (j=0; j<N; j++) { 1732 array[i + j*m] = *vals_ptr++; 1733 } 1734 } 1735 1736 /* read in other processors and ship out */ 1737 for (i=1; i<size; i++) { 1738 nz = (rowners[i+1] - rowners[i])*N; 1739 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1740 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1741 } 1742 } else { 1743 /* receive numeric values */ 1744 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1745 1746 /* receive message of values*/ 1747 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1748 1749 /* insert into matrix-by row (this is why cannot directly read into array */ 1750 vals_ptr = vals; 1751 for (i=0; i<m; i++) { 1752 for (j=0; j<N; j++) { 1753 array[i + j*m] = *vals_ptr++; 1754 } 1755 } 1756 } 1757 ierr = PetscFree(rowners);CHKERRQ(ierr); 1758 ierr = PetscFree(vals);CHKERRQ(ierr); 1759 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1760 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatLoad_MPIDense" 1766 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1767 { 1768 Mat A; 1769 PetscScalar *vals,*svals; 1770 MPI_Comm comm = ((PetscObject)viewer)->comm; 1771 MPI_Status status; 1772 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1773 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1774 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1775 PetscInt i,nz,j,rstart,rend; 1776 int fd; 1777 PetscErrorCode ierr; 1778 1779 PetscFunctionBegin; 1780 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1781 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1782 if (!rank) { 1783 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1784 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1785 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1786 } 1787 1788 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1789 M = header[1]; N = header[2]; nz = header[3]; 1790 1791 /* 1792 Handle case where matrix is stored on disk as a dense matrix 1793 */ 1794 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1795 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1796 PetscFunctionReturn(0); 1797 } 1798 1799 /* determine ownership of all rows */ 1800 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1801 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1802 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1803 rowners[0] = 0; 1804 for (i=2; i<=size; i++) { 1805 rowners[i] += rowners[i-1]; 1806 } 1807 rstart = rowners[rank]; 1808 rend = rowners[rank+1]; 1809 1810 /* distribute row lengths to all processors */ 1811 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1812 offlens = ourlens + (rend-rstart); 1813 if (!rank) { 1814 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1815 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1816 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1817 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1818 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1819 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1820 } else { 1821 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1822 } 1823 1824 if (!rank) { 1825 /* calculate the number of nonzeros on each processor */ 1826 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1827 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1828 for (i=0; i<size; i++) { 1829 for (j=rowners[i]; j< rowners[i+1]; j++) { 1830 procsnz[i] += rowlengths[j]; 1831 } 1832 } 1833 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1834 1835 /* determine max buffer needed and allocate it */ 1836 maxnz = 0; 1837 for (i=0; i<size; i++) { 1838 maxnz = PetscMax(maxnz,procsnz[i]); 1839 } 1840 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1841 1842 /* read in my part of the matrix column indices */ 1843 nz = procsnz[0]; 1844 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1845 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1846 1847 /* read in every one elses and ship off */ 1848 for (i=1; i<size; i++) { 1849 nz = procsnz[i]; 1850 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1851 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1852 } 1853 ierr = PetscFree(cols);CHKERRQ(ierr); 1854 } else { 1855 /* determine buffer space needed for message */ 1856 nz = 0; 1857 for (i=0; i<m; i++) { 1858 nz += ourlens[i]; 1859 } 1860 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1861 1862 /* receive message of column indices*/ 1863 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1864 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1865 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1866 } 1867 1868 /* loop over local rows, determining number of off diagonal entries */ 1869 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1870 jj = 0; 1871 for (i=0; i<m; i++) { 1872 for (j=0; j<ourlens[i]; j++) { 1873 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1874 jj++; 1875 } 1876 } 1877 1878 /* create our matrix */ 1879 for (i=0; i<m; i++) { 1880 ourlens[i] -= offlens[i]; 1881 } 1882 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1883 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1884 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1885 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1886 A = *newmat; 1887 for (i=0; i<m; i++) { 1888 ourlens[i] += offlens[i]; 1889 } 1890 1891 if (!rank) { 1892 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1893 1894 /* read in my part of the matrix numerical values */ 1895 nz = procsnz[0]; 1896 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1897 1898 /* insert into matrix */ 1899 jj = rstart; 1900 smycols = mycols; 1901 svals = vals; 1902 for (i=0; i<m; i++) { 1903 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1904 smycols += ourlens[i]; 1905 svals += ourlens[i]; 1906 jj++; 1907 } 1908 1909 /* read in other processors and ship out */ 1910 for (i=1; i<size; i++) { 1911 nz = procsnz[i]; 1912 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1913 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 1914 } 1915 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1916 } else { 1917 /* receive numeric values */ 1918 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1919 1920 /* receive message of values*/ 1921 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 1922 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1923 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1924 1925 /* insert into matrix */ 1926 jj = rstart; 1927 smycols = mycols; 1928 svals = vals; 1929 for (i=0; i<m; i++) { 1930 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1931 smycols += ourlens[i]; 1932 svals += ourlens[i]; 1933 jj++; 1934 } 1935 } 1936 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1937 ierr = PetscFree(vals);CHKERRQ(ierr); 1938 ierr = PetscFree(mycols);CHKERRQ(ierr); 1939 ierr = PetscFree(rowners);CHKERRQ(ierr); 1940 1941 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1942 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1943 PetscFunctionReturn(0); 1944 } 1945 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "MatEqual_MPIDense" 1948 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 1949 { 1950 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1951 Mat a,b; 1952 PetscTruth flg; 1953 PetscErrorCode ierr; 1954 1955 PetscFunctionBegin; 1956 a = matA->A; 1957 b = matB->A; 1958 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1959 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #if defined(PETSC_HAVE_PLAPACK) 1964 1965 #undef __FUNCT__ 1966 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 1967 /*@C 1968 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 1969 Level: developer 1970 1971 .keywords: Petsc, destroy, package, PLAPACK 1972 .seealso: PetscFinalize() 1973 @*/ 1974 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 1975 { 1976 PetscErrorCode ierr; 1977 1978 PetscFunctionBegin; 1979 ierr = PLA_Finalize();CHKERRQ(ierr); 1980 PetscFunctionReturn(0); 1981 } 1982 1983 #undef __FUNCT__ 1984 #define __FUNCT__ "PetscPLAPACKInitializePackage" 1985 /*@C 1986 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 1987 called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize() 1988 when using static libraries. 1989 1990 Input Parameter: 1991 path - The dynamic library path, or PETSC_NULL 1992 1993 Level: developer 1994 1995 .keywords: Petsc, initialize, package, PLAPACK 1996 .seealso: PetscInitializePackage(), PetscInitialize() 1997 @*/ 1998 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[]) 1999 { 2000 MPI_Comm comm = PETSC_COMM_WORLD; 2001 PetscMPIInt size; 2002 PetscErrorCode ierr; 2003 2004 PetscFunctionBegin; 2005 if (!PLA_Initialized(PETSC_NULL)) { 2006 2007 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2008 Plapack_nprows = 1; 2009 Plapack_npcols = size; 2010 2011 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2012 ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2013 ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2014 ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2015 if (Plapack_ierror){ 2016 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2017 } else { 2018 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2019 } 2020 2021 Plapack_nb_alg = 0; 2022 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2023 if (Plapack_nb_alg) { 2024 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2025 } 2026 PetscOptionsEnd(); 2027 2028 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2029 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2030 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2031 } 2032 PetscFunctionReturn(0); 2033 } 2034 2035 2036 #endif 2037