1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 #endif 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatDenseGetLocalMatrix" 16 /*@ 17 18 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 19 matrix that represents the operator. For sequential matrices it returns itself. 20 21 Input Parameter: 22 . A - the Seq or MPI dense matrix 23 24 Output Parameter: 25 . B - the inner matrix 26 27 Level: intermediate 28 29 @*/ 30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 31 { 32 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 33 PetscErrorCode ierr; 34 PetscTruth flg; 35 36 PetscFunctionBegin; 37 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 38 if (flg) { 39 *B = mat->A; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatGetRow_MPIDense" 48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 49 { 50 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 51 PetscErrorCode ierr; 52 PetscInt lrow,rstart = A->rmap.rstart,rend = A->rmap.rend; 53 54 PetscFunctionBegin; 55 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 56 lrow = row - rstart; 57 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRestoreRow_MPIDense" 63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 64 { 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 69 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 70 PetscFunctionReturn(0); 71 } 72 73 EXTERN_C_BEGIN 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 79 PetscErrorCode ierr; 80 PetscInt m = A->rmap.n,rstart = A->rmap.rstart; 81 PetscScalar *array; 82 MPI_Comm comm; 83 84 PetscFunctionBegin; 85 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 86 87 /* The reuse aspect is not implemented efficiently */ 88 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 89 90 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 91 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatCreate(comm,B);CHKERRQ(ierr); 93 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 94 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 95 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 96 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 97 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 98 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 99 100 *iscopy = PETSC_TRUE; 101 PetscFunctionReturn(0); 102 } 103 EXTERN_C_END 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIDense" 107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 110 PetscErrorCode ierr; 111 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 112 PetscTruth roworiented = A->roworiented; 113 114 PetscFunctionBegin; 115 for (i=0; i<m; i++) { 116 if (idxm[i] < 0) continue; 117 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 118 if (idxm[i] >= rstart && idxm[i] < rend) { 119 row = idxm[i] - rstart; 120 if (roworiented) { 121 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 122 } else { 123 for (j=0; j<n; j++) { 124 if (idxn[j] < 0) continue; 125 if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 126 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 127 } 128 } 129 } else { 130 if (!A->donotstash) { 131 if (roworiented) { 132 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 133 } else { 134 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 135 } 136 } 137 } 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetValues_MPIDense" 144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 145 { 146 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 147 PetscErrorCode ierr; 148 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 149 150 PetscFunctionBegin; 151 for (i=0; i<m; i++) { 152 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 153 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 154 if (idxm[i] >= rstart && idxm[i] < rend) { 155 row = idxm[i] - rstart; 156 for (j=0; j<n; j++) { 157 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 158 if (idxn[j] >= mat->cmap.N) { 159 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 160 } 161 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 162 } 163 } else { 164 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 165 } 166 } 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatGetArray_MPIDense" 172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 173 { 174 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 175 PetscErrorCode ierr; 176 177 PetscFunctionBegin; 178 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 185 { 186 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 187 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 188 PetscErrorCode ierr; 189 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 190 PetscScalar *av,*bv,*v = lmat->v; 191 Mat newmat; 192 193 PetscFunctionBegin; 194 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 195 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 196 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 197 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 198 199 /* No parallel redistribution currently supported! Should really check each index set 200 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 201 original matrix! */ 202 203 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 204 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 205 206 /* Check submatrix call */ 207 if (scall == MAT_REUSE_MATRIX) { 208 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 209 /* Really need to test rows and column sizes! */ 210 newmat = *B; 211 } else { 212 /* Create and fill new matrix */ 213 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 214 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 215 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 216 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 217 } 218 219 /* Now extract the data pointers and do the copy, column at a time */ 220 newmatd = (Mat_MPIDense*)newmat->data; 221 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 222 223 for (i=0; i<ncols; i++) { 224 av = v + nlrows*icol[i]; 225 for (j=0; j<nrows; j++) { 226 *bv++ = av[irow[j] - rstart]; 227 } 228 } 229 230 /* Assemble the matrices so that the correct flags are set */ 231 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 232 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 234 /* Free work space */ 235 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 236 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 237 *B = newmat; 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatRestoreArray_MPIDense" 243 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 244 { 245 PetscFunctionBegin; 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 251 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 252 { 253 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 254 MPI_Comm comm = ((PetscObject)mat)->comm; 255 PetscErrorCode ierr; 256 PetscInt nstash,reallocs; 257 InsertMode addv; 258 259 PetscFunctionBegin; 260 /* make sure all processors are either in INSERTMODE or ADDMODE */ 261 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 262 if (addv == (ADD_VALUES|INSERT_VALUES)) { 263 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 264 } 265 mat->insertmode = addv; /* in case this processor had no cache */ 266 267 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 268 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 269 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 275 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 276 { 277 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,*row,*col,flg,j,rstart,ncols; 280 PetscMPIInt n; 281 PetscScalar *val; 282 InsertMode addv=mat->insertmode; 283 284 PetscFunctionBegin; 285 /* wait on receives */ 286 while (1) { 287 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 288 if (!flg) break; 289 290 for (i=0; i<n;) { 291 /* Now identify the consecutive vals belonging to the same row */ 292 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 293 if (j < n) ncols = j-i; 294 else ncols = n-i; 295 /* Now assemble all these values with a single function call */ 296 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 297 i = j; 298 } 299 } 300 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 301 302 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 303 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 304 305 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 306 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 307 } 308 PetscFunctionReturn(0); 309 } 310 311 #undef __FUNCT__ 312 #define __FUNCT__ "MatZeroEntries_MPIDense" 313 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 314 { 315 PetscErrorCode ierr; 316 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 317 318 PetscFunctionBegin; 319 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 /* the code does not do the diagonal entries correctly unless the 324 matrix is square and the column and row owerships are identical. 325 This is a BUG. The only way to fix it seems to be to access 326 mdn->A and mdn->B directly and not through the MatZeroRows() 327 routine. 328 */ 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatZeroRows_MPIDense" 331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 332 { 333 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 334 PetscErrorCode ierr; 335 PetscInt i,*owners = A->rmap.range; 336 PetscInt *nprocs,j,idx,nsends; 337 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 338 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 339 PetscInt *lens,*lrows,*values; 340 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 341 MPI_Comm comm = ((PetscObject)A)->comm; 342 MPI_Request *send_waits,*recv_waits; 343 MPI_Status recv_status,*send_status; 344 PetscTruth found; 345 346 PetscFunctionBegin; 347 /* first count number of contributors to each processor */ 348 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 349 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 351 for (i=0; i<N; i++) { 352 idx = rows[i]; 353 found = PETSC_FALSE; 354 for (j=0; j<size; j++) { 355 if (idx >= owners[j] && idx < owners[j+1]) { 356 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 357 } 358 } 359 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 360 } 361 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 362 363 /* inform other processors of number of messages and max length*/ 364 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 365 366 /* post receives: */ 367 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 368 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 369 for (i=0; i<nrecvs; i++) { 370 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 371 } 372 373 /* do sends: 374 1) starts[i] gives the starting index in svalues for stuff going to 375 the ith processor 376 */ 377 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 378 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 379 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 380 starts[0] = 0; 381 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 382 for (i=0; i<N; i++) { 383 svalues[starts[owner[i]]++] = rows[i]; 384 } 385 386 starts[0] = 0; 387 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 388 count = 0; 389 for (i=0; i<size; i++) { 390 if (nprocs[2*i+1]) { 391 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 392 } 393 } 394 ierr = PetscFree(starts);CHKERRQ(ierr); 395 396 base = owners[rank]; 397 398 /* wait on receives */ 399 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 400 source = lens + nrecvs; 401 count = nrecvs; slen = 0; 402 while (count) { 403 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 404 /* unpack receives into our local space */ 405 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 406 source[imdex] = recv_status.MPI_SOURCE; 407 lens[imdex] = n; 408 slen += n; 409 count--; 410 } 411 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 412 413 /* move the data into the send scatter */ 414 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 415 count = 0; 416 for (i=0; i<nrecvs; i++) { 417 values = rvalues + i*nmax; 418 for (j=0; j<lens[i]; j++) { 419 lrows[count++] = values[j] - base; 420 } 421 } 422 ierr = PetscFree(rvalues);CHKERRQ(ierr); 423 ierr = PetscFree(lens);CHKERRQ(ierr); 424 ierr = PetscFree(owner);CHKERRQ(ierr); 425 ierr = PetscFree(nprocs);CHKERRQ(ierr); 426 427 /* actually zap the local rows */ 428 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 429 ierr = PetscFree(lrows);CHKERRQ(ierr); 430 431 /* wait on sends */ 432 if (nsends) { 433 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 434 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 435 ierr = PetscFree(send_status);CHKERRQ(ierr); 436 } 437 ierr = PetscFree(send_waits);CHKERRQ(ierr); 438 ierr = PetscFree(svalues);CHKERRQ(ierr); 439 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatMult_MPIDense" 445 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 446 { 447 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 448 PetscErrorCode ierr; 449 450 PetscFunctionBegin; 451 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 452 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 453 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatMultAdd_MPIDense" 459 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 460 { 461 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 462 PetscErrorCode ierr; 463 464 PetscFunctionBegin; 465 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "MatMultTranspose_MPIDense" 473 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 474 { 475 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 476 PetscErrorCode ierr; 477 PetscScalar zero = 0.0; 478 479 PetscFunctionBegin; 480 ierr = VecSet(yy,zero);CHKERRQ(ierr); 481 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 482 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 483 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 489 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 490 { 491 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 496 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 497 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 498 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #undef __FUNCT__ 503 #define __FUNCT__ "MatGetDiagonal_MPIDense" 504 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 505 { 506 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 507 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 508 PetscErrorCode ierr; 509 PetscInt len,i,n,m = A->rmap.n,radd; 510 PetscScalar *x,zero = 0.0; 511 512 PetscFunctionBegin; 513 ierr = VecSet(v,zero);CHKERRQ(ierr); 514 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 515 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 516 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 517 len = PetscMin(a->A->rmap.n,a->A->cmap.n); 518 radd = A->rmap.rstart*m; 519 for (i=0; i<len; i++) { 520 x[i] = aloc->v[radd + i*m + i]; 521 } 522 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatDestroy_MPIDense" 528 PetscErrorCode MatDestroy_MPIDense(Mat mat) 529 { 530 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 531 PetscErrorCode ierr; 532 #if defined(PETSC_HAVE_PLAPACK) 533 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 534 #endif 535 536 PetscFunctionBegin; 537 538 #if defined(PETSC_USE_LOG) 539 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 540 #endif 541 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 542 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 543 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 544 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 545 #if defined(PETSC_HAVE_PLAPACK) 546 if (lu) { 547 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 548 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 549 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 550 551 if (lu->is_pla) { 552 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 553 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 554 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 555 } 556 } 557 #endif 558 559 ierr = PetscFree(mdn);CHKERRQ(ierr); 560 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 561 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 563 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatView_MPIDense_Binary" 571 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 572 { 573 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 574 PetscErrorCode ierr; 575 PetscViewerFormat format; 576 int fd; 577 PetscInt header[4],mmax,N = mat->cmap.N,i,j,m,k; 578 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 579 PetscScalar *work,*v,*vv; 580 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 581 MPI_Status status; 582 583 PetscFunctionBegin; 584 if (mdn->size == 1) { 585 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 586 } else { 587 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 588 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 589 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 590 591 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 592 if (format == PETSC_VIEWER_BINARY_NATIVE) { 593 594 if (!rank) { 595 /* store the matrix as a dense matrix */ 596 header[0] = MAT_FILE_COOKIE; 597 header[1] = mat->rmap.N; 598 header[2] = N; 599 header[3] = MATRIX_BINARY_FORMAT_DENSE; 600 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 601 602 /* get largest work array needed for transposing array */ 603 mmax = mat->rmap.n; 604 for (i=1; i<size; i++) { 605 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 606 } 607 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 608 609 /* write out local array, by rows */ 610 m = mat->rmap.n; 611 v = a->v; 612 for (j=0; j<N; j++) { 613 for (i=0; i<m; i++) { 614 work[j + i*N] = *v++; 615 } 616 } 617 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 618 /* get largest work array to receive messages from other processes, excludes process zero */ 619 mmax = 0; 620 for (i=1; i<size; i++) { 621 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 622 } 623 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 624 v = vv; 625 for (k=1; k<size; k++) { 626 m = mat->rmap.range[k+1] - mat->rmap.range[k]; 627 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 628 629 for (j=0; j<N; j++) { 630 for (i=0; i<m; i++) { 631 work[j + i*N] = *v++; 632 } 633 } 634 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 635 } 636 ierr = PetscFree(work);CHKERRQ(ierr); 637 ierr = PetscFree(vv);CHKERRQ(ierr); 638 } else { 639 ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 640 } 641 } 642 } 643 PetscFunctionReturn(0); 644 } 645 646 #undef __FUNCT__ 647 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 648 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 649 { 650 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 651 PetscErrorCode ierr; 652 PetscMPIInt size = mdn->size,rank = mdn->rank; 653 PetscViewerType vtype; 654 PetscTruth iascii,isdraw; 655 PetscViewer sviewer; 656 PetscViewerFormat format; 657 #if defined(PETSC_HAVE_PLAPACK) 658 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 659 #endif 660 661 PetscFunctionBegin; 662 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 663 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 664 if (iascii) { 665 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 666 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 667 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 668 MatInfo info; 669 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 670 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n, 671 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 672 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 673 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 674 #if defined(PETSC_HAVE_PLAPACK) 675 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 676 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 677 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 678 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 679 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 680 #endif 681 PetscFunctionReturn(0); 682 } else if (format == PETSC_VIEWER_ASCII_INFO) { 683 PetscFunctionReturn(0); 684 } 685 } else if (isdraw) { 686 PetscDraw draw; 687 PetscTruth isnull; 688 689 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 690 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 691 if (isnull) PetscFunctionReturn(0); 692 } 693 694 if (size == 1) { 695 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 696 } else { 697 /* assemble the entire matrix onto first processor. */ 698 Mat A; 699 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz; 700 PetscInt *cols; 701 PetscScalar *vals; 702 703 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 704 if (!rank) { 705 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 706 } else { 707 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 708 } 709 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 710 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 711 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 712 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 713 714 /* Copy the matrix ... This isn't the most efficient means, 715 but it's quick for now */ 716 A->insertmode = INSERT_VALUES; 717 row = mat->rmap.rstart; m = mdn->A->rmap.n; 718 for (i=0; i<m; i++) { 719 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 720 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 721 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 722 row++; 723 } 724 725 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 726 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 727 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 728 if (!rank) { 729 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 730 } 731 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 732 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 733 ierr = MatDestroy(A);CHKERRQ(ierr); 734 } 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatView_MPIDense" 740 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 741 { 742 PetscErrorCode ierr; 743 PetscTruth iascii,isbinary,isdraw,issocket; 744 745 PetscFunctionBegin; 746 747 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 748 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 749 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 750 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 751 752 if (iascii || issocket || isdraw) { 753 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 754 } else if (isbinary) { 755 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 756 } else { 757 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 758 } 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatGetInfo_MPIDense" 764 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 765 { 766 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 767 Mat mdn = mat->A; 768 PetscErrorCode ierr; 769 PetscReal isend[5],irecv[5]; 770 771 PetscFunctionBegin; 772 info->rows_global = (double)A->rmap.N; 773 info->columns_global = (double)A->cmap.N; 774 info->rows_local = (double)A->rmap.n; 775 info->columns_local = (double)A->cmap.N; 776 info->block_size = 1.0; 777 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 778 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 779 isend[3] = info->memory; isend[4] = info->mallocs; 780 if (flag == MAT_LOCAL) { 781 info->nz_used = isend[0]; 782 info->nz_allocated = isend[1]; 783 info->nz_unneeded = isend[2]; 784 info->memory = isend[3]; 785 info->mallocs = isend[4]; 786 } else if (flag == MAT_GLOBAL_MAX) { 787 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 788 info->nz_used = irecv[0]; 789 info->nz_allocated = irecv[1]; 790 info->nz_unneeded = irecv[2]; 791 info->memory = irecv[3]; 792 info->mallocs = irecv[4]; 793 } else if (flag == MAT_GLOBAL_SUM) { 794 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 795 info->nz_used = irecv[0]; 796 info->nz_allocated = irecv[1]; 797 info->nz_unneeded = irecv[2]; 798 info->memory = irecv[3]; 799 info->mallocs = irecv[4]; 800 } 801 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 802 info->fill_ratio_needed = 0; 803 info->factor_mallocs = 0; 804 PetscFunctionReturn(0); 805 } 806 807 #undef __FUNCT__ 808 #define __FUNCT__ "MatSetOption_MPIDense" 809 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 810 { 811 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 812 PetscErrorCode ierr; 813 814 PetscFunctionBegin; 815 switch (op) { 816 case MAT_NEW_NONZERO_LOCATIONS: 817 case MAT_NEW_NONZERO_LOCATION_ERR: 818 case MAT_NEW_NONZERO_ALLOCATION_ERR: 819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 820 break; 821 case MAT_ROW_ORIENTED: 822 a->roworiented = flg; 823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 824 break; 825 case MAT_NEW_DIAGONALS: 826 case MAT_USE_HASH_TABLE: 827 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 828 break; 829 case MAT_IGNORE_OFF_PROC_ENTRIES: 830 a->donotstash = flg; 831 break; 832 case MAT_SYMMETRIC: 833 case MAT_STRUCTURALLY_SYMMETRIC: 834 case MAT_HERMITIAN: 835 case MAT_SYMMETRY_ETERNAL: 836 case MAT_IGNORE_LOWER_TRIANGULAR: 837 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 838 break; 839 default: 840 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 841 } 842 PetscFunctionReturn(0); 843 } 844 845 846 #undef __FUNCT__ 847 #define __FUNCT__ "MatDiagonalScale_MPIDense" 848 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 849 { 850 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 851 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 852 PetscScalar *l,*r,x,*v; 853 PetscErrorCode ierr; 854 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n; 855 856 PetscFunctionBegin; 857 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 858 if (ll) { 859 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 860 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 861 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 862 for (i=0; i<m; i++) { 863 x = l[i]; 864 v = mat->v + i; 865 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 866 } 867 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 868 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 869 } 870 if (rr) { 871 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 872 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 873 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 876 for (i=0; i<n; i++) { 877 x = r[i]; 878 v = mat->v + i*m; 879 for (j=0; j<m; j++) { (*v++) *= x;} 880 } 881 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 882 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 883 } 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatNorm_MPIDense" 889 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 890 { 891 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 892 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 893 PetscErrorCode ierr; 894 PetscInt i,j; 895 PetscReal sum = 0.0; 896 PetscScalar *v = mat->v; 897 898 PetscFunctionBegin; 899 if (mdn->size == 1) { 900 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 901 } else { 902 if (type == NORM_FROBENIUS) { 903 for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) { 904 #if defined(PETSC_USE_COMPLEX) 905 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 906 #else 907 sum += (*v)*(*v); v++; 908 #endif 909 } 910 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 911 *nrm = sqrt(*nrm); 912 ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr); 913 } else if (type == NORM_1) { 914 PetscReal *tmp,*tmp2; 915 ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 916 tmp2 = tmp + A->cmap.N; 917 ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 918 *nrm = 0.0; 919 v = mat->v; 920 for (j=0; j<mdn->A->cmap.n; j++) { 921 for (i=0; i<mdn->A->rmap.n; i++) { 922 tmp[j] += PetscAbsScalar(*v); v++; 923 } 924 } 925 ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 926 for (j=0; j<A->cmap.N; j++) { 927 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 928 } 929 ierr = PetscFree(tmp);CHKERRQ(ierr); 930 ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr); 931 } else if (type == NORM_INFINITY) { /* max row norm */ 932 PetscReal ntemp; 933 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 934 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 935 } else { 936 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 937 } 938 } 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatTranspose_MPIDense" 944 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 945 { 946 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 947 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 948 Mat B; 949 PetscInt M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart; 950 PetscErrorCode ierr; 951 PetscInt j,i; 952 PetscScalar *v; 953 954 PetscFunctionBegin; 955 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 956 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 957 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 958 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 959 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 960 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 961 } else { 962 B = *matout; 963 } 964 965 m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v; 966 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 967 for (i=0; i<m; i++) rwork[i] = rstart + i; 968 for (j=0; j<n; j++) { 969 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 970 v += m; 971 } 972 ierr = PetscFree(rwork);CHKERRQ(ierr); 973 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 974 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 975 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 976 *matout = B; 977 } else { 978 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 979 } 980 PetscFunctionReturn(0); 981 } 982 983 #include "petscblaslapack.h" 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatScale_MPIDense" 986 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 987 { 988 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 989 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 990 PetscScalar oalpha = alpha; 991 PetscErrorCode ierr; 992 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N); 993 994 PetscFunctionBegin; 995 BLASscal_(&nz,&oalpha,a->v,&one); 996 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 997 PetscFunctionReturn(0); 998 } 999 1000 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1004 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1005 { 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #if defined(PETSC_HAVE_PLAPACK) 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatMPIDenseCreatePlapack" 1017 PetscErrorCode MatMPIDenseCreatePlapack(Mat A) 1018 { 1019 Mat_Plapack *lu; 1020 PetscErrorCode ierr; 1021 PetscInt M=A->rmap.N; 1022 MPI_Comm comm=((PetscObject)A)->comm; 1023 PetscMPIInt size; 1024 1025 PetscFunctionBegin; 1026 if (A->spptr) PetscFunctionReturn(0); 1027 ierr = PetscNewLog(A,Mat_Plapack,&lu);CHKERRQ(ierr); 1028 A->spptr = (void*)lu; 1029 1030 lu = (Mat_Plapack*)(A->spptr); 1031 1032 /* Set default Plapack parameters */ 1033 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1034 lu->nb = M/size; 1035 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1036 1037 /* Set runtime options */ 1038 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1039 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1040 PetscOptionsEnd(); 1041 1042 /* Create object distribution template */ 1043 lu->templ = NULL; 1044 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1045 1046 /* Set the datatype */ 1047 #if defined(PETSC_USE_COMPLEX) 1048 lu->datatype = MPI_DOUBLE_COMPLEX; 1049 #else 1050 lu->datatype = MPI_DOUBLE; 1051 #endif 1052 1053 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap.N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1054 1055 1056 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1057 PetscFunctionReturn(0); 1058 } 1059 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1062 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1063 { 1064 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1065 PetscErrorCode ierr; 1066 PetscInt M=A->cmap.N,m=A->rmap.n,rstart; 1067 PetscScalar *array; 1068 PetscReal one = 1.0; 1069 1070 PetscFunctionBegin; 1071 /* Copy A into F->lu->A */ 1072 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1073 ierr = PLA_API_begin();CHKERRQ(ierr); 1074 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1075 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1076 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1077 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1078 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1079 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1080 ierr = PLA_API_end();CHKERRQ(ierr); 1081 lu->rstart = rstart; 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1087 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1088 { 1089 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1090 PetscErrorCode ierr; 1091 PetscInt M=A->cmap.N,m=A->rmap.n,rstart; 1092 PetscScalar *array; 1093 PetscReal one = 1.0; 1094 1095 PetscFunctionBegin; 1096 /* Copy F into A->lu->A */ 1097 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1098 ierr = PLA_API_begin();CHKERRQ(ierr); 1099 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1100 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1101 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1102 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1103 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1104 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1105 ierr = PLA_API_end();CHKERRQ(ierr); 1106 lu->rstart = rstart; 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1112 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1113 { 1114 PetscErrorCode ierr; 1115 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1116 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1117 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1118 PLA_Obj alpha = NULL,beta = NULL; 1119 1120 PetscFunctionBegin; 1121 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1122 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1123 1124 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1125 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1126 1127 /* do the multiply in PLA */ 1128 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1129 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1130 CHKMEMQ; 1131 1132 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A);//CHKERRQ(ierr); 1133 CHKMEMQ; 1134 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1135 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1136 1137 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1138 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNCT__ 1143 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1144 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1145 { 1146 PetscErrorCode ierr; 1147 PetscInt m=A->rmap.n,n=B->cmap.n; 1148 Mat Cmat; 1149 1150 PetscFunctionBegin; 1151 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 1152 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1153 ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr); 1154 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1155 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1156 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1157 ierr = MatMPIDenseCreatePlapack(A);CHKERRQ(ierr); 1158 ierr = MatMPIDenseCreatePlapack(B);CHKERRQ(ierr); 1159 ierr = MatMPIDenseCreatePlapack(Cmat);CHKERRQ(ierr); 1160 *C = Cmat; 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1166 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1167 { 1168 PetscErrorCode ierr; 1169 1170 PetscFunctionBegin; 1171 if (scall == MAT_INITIAL_MATRIX){ 1172 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1173 } 1174 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatSolve_MPIDense" 1181 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1182 { 1183 MPI_Comm comm = ((PetscObject)A)->comm,dummy_comm; 1184 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1185 PetscErrorCode ierr; 1186 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1187 PetscScalar *array; 1188 PetscReal one = 1.0; 1189 PetscMPIInt size,r_rank,r_nproc,c_rank,c_nproc;; 1190 PLA_Obj v_pla = NULL; 1191 PetscScalar *loc_buf; 1192 Vec loc_x; 1193 1194 PetscFunctionBegin; 1195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1196 1197 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1198 ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr); 1199 ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr); 1200 1201 /* Copy b into rhs_pla */ 1202 ierr = PLA_API_begin();CHKERRQ(ierr); 1203 ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr); 1204 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1205 ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr); 1206 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1207 ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr); 1208 ierr = PLA_API_end();CHKERRQ(ierr); 1209 1210 if (A->factor == FACTOR_LU){ 1211 /* Apply the permutations to the right hand sides */ 1212 ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr); 1213 1214 /* Solve L y = b, overwriting b with y */ 1215 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1216 1217 /* Solve U x = y (=b), overwriting b with x */ 1218 ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1219 } else { /* FACTOR_CHOLESKY */ 1220 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1221 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1222 } 1223 1224 /* Copy PLAPACK x into Petsc vector x */ 1225 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1226 ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr); 1227 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1228 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1229 if (!lu->pla_solved){ 1230 1231 ierr = PLA_Temp_comm_row_info(lu->templ,&dummy_comm,&r_rank,&r_nproc);CHKERRQ(ierr); 1232 ierr = PLA_Temp_comm_col_info(lu->templ,&dummy_comm,&c_rank,&c_nproc);CHKERRQ(ierr); 1233 /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */ 1234 1235 /* Create IS and cts for VecScatterring */ 1236 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1237 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1238 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1239 idx_petsc = idx_pla + loc_m; 1240 1241 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1242 for (i=0; i<loc_m; i+=lu->nb){ 1243 j = 0; 1244 while (j < lu->nb && i+j < loc_m){ 1245 idx_petsc[i+j] = rstart + j; j++; 1246 } 1247 rstart += size*lu->nb; 1248 } 1249 1250 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1251 1252 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1253 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1254 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1255 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1256 } 1257 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1258 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1259 1260 /* Free data */ 1261 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1262 ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr); 1263 1264 lu->pla_solved = PETSC_TRUE; 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1270 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1271 { 1272 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1273 PetscErrorCode ierr; 1274 PetscInt info_pla; 1275 1276 PetscFunctionBegin; 1277 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1278 1279 /* Copy A into F->lu->A */ 1280 ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr); 1281 1282 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1283 info_pla = PLA_LU(lu->A,lu->pivots); 1284 if (info_pla) SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1285 1286 (*F)->assembled = PETSC_TRUE; 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1292 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1293 { 1294 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1295 PetscErrorCode ierr; 1296 PetscInt info_pla; 1297 1298 PetscFunctionBegin; 1299 1300 /* Copy A into F->lu->A */ 1301 ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr); 1302 1303 /* Factor P A -> Chol */ 1304 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1305 if (info_pla) SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1306 1307 (*F)->assembled = PETSC_TRUE; 1308 PetscFunctionReturn(0); 1309 } 1310 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private" 1314 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F) 1315 { 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 /* Create the factorization matrix */ 1320 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1321 ierr = MatSetSizes(*F,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1322 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1323 ierr = MatMPIDenseCreatePlapack(*F);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 /* Note the Petsc r and c permutations are ignored */ 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1330 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 1331 { 1332 PetscErrorCode ierr; 1333 PetscInt M = A->rmap.N; 1334 Mat_Plapack *lu; 1335 1336 PetscFunctionBegin; 1337 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1338 lu = (Mat_Plapack*)(*F)->spptr; 1339 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1340 (*F)->factor = FACTOR_LU; 1341 PetscFunctionReturn(0); 1342 } 1343 1344 /* Note the Petsc perm permutation is ignored */ 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1347 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F) 1348 { 1349 PetscErrorCode ierr; 1350 PetscTruth issymmetric,set; 1351 1352 PetscFunctionBegin; 1353 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1354 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1355 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1356 (*F)->factor = FACTOR_CHOLESKY; 1357 PetscFunctionReturn(0); 1358 } 1359 #endif 1360 1361 /* -------------------------------------------------------------------*/ 1362 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1363 MatGetRow_MPIDense, 1364 MatRestoreRow_MPIDense, 1365 MatMult_MPIDense, 1366 /* 4*/ MatMultAdd_MPIDense, 1367 MatMultTranspose_MPIDense, 1368 MatMultTransposeAdd_MPIDense, 1369 #if defined(PETSC_HAVE_PLAPACK) 1370 MatSolve_MPIDense, 1371 #else 1372 0, 1373 #endif 1374 0, 1375 0, 1376 /*10*/ 0, 1377 0, 1378 0, 1379 0, 1380 MatTranspose_MPIDense, 1381 /*15*/ MatGetInfo_MPIDense, 1382 MatEqual_MPIDense, 1383 MatGetDiagonal_MPIDense, 1384 MatDiagonalScale_MPIDense, 1385 MatNorm_MPIDense, 1386 /*20*/ MatAssemblyBegin_MPIDense, 1387 MatAssemblyEnd_MPIDense, 1388 0, 1389 MatSetOption_MPIDense, 1390 MatZeroEntries_MPIDense, 1391 /*25*/ MatZeroRows_MPIDense, 1392 #if defined(PETSC_HAVE_PLAPACK) 1393 MatLUFactorSymbolic_MPIDense, 1394 MatLUFactorNumeric_MPIDense, 1395 MatCholeskyFactorSymbolic_MPIDense, 1396 MatCholeskyFactorNumeric_MPIDense, 1397 #else 1398 0, 1399 0, 1400 0, 1401 0, 1402 #endif 1403 /*30*/ MatSetUpPreallocation_MPIDense, 1404 0, 1405 0, 1406 MatGetArray_MPIDense, 1407 MatRestoreArray_MPIDense, 1408 /*35*/ MatDuplicate_MPIDense, 1409 0, 1410 0, 1411 0, 1412 0, 1413 /*40*/ 0, 1414 MatGetSubMatrices_MPIDense, 1415 0, 1416 MatGetValues_MPIDense, 1417 0, 1418 /*45*/ 0, 1419 MatScale_MPIDense, 1420 0, 1421 0, 1422 0, 1423 /*50*/ 0, 1424 0, 1425 0, 1426 0, 1427 0, 1428 /*55*/ 0, 1429 0, 1430 0, 1431 0, 1432 0, 1433 /*60*/ MatGetSubMatrix_MPIDense, 1434 MatDestroy_MPIDense, 1435 MatView_MPIDense, 1436 0, 1437 0, 1438 /*65*/ 0, 1439 0, 1440 0, 1441 0, 1442 0, 1443 /*70*/ 0, 1444 0, 1445 0, 1446 0, 1447 0, 1448 /*75*/ 0, 1449 0, 1450 0, 1451 0, 1452 0, 1453 /*80*/ 0, 1454 0, 1455 0, 1456 0, 1457 /*84*/ MatLoad_MPIDense, 1458 0, 1459 0, 1460 0, 1461 0, 1462 0, 1463 /*90*/ 1464 #if defined(PETSC_HAVE_PLAPACK) 1465 MatMatMult_MPIDense_MPIDense, 1466 MatMatMultSymbolic_MPIDense_MPIDense, 1467 MatMatMultNumeric_MPIDense_MPIDense, 1468 #else 1469 0, 1470 0, 1471 0, 1472 #endif 1473 0, 1474 /*95*/ 0, 1475 0, 1476 0, 1477 0}; 1478 1479 EXTERN_C_BEGIN 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1482 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1483 { 1484 Mat_MPIDense *a; 1485 PetscErrorCode ierr; 1486 1487 PetscFunctionBegin; 1488 mat->preallocated = PETSC_TRUE; 1489 /* Note: For now, when data is specified above, this assumes the user correctly 1490 allocates the local dense storage space. We should add error checking. */ 1491 1492 a = (Mat_MPIDense*)mat->data; 1493 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1494 ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr); 1495 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1496 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1497 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } 1500 EXTERN_C_END 1501 1502 EXTERN_C_BEGIN 1503 #if defined(PETSC_HAVE_PLAPACK) 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK" 1506 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type) 1507 { 1508 PetscFunctionBegin; 1509 /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */ 1510 PetscFunctionReturn(0); 1511 } 1512 #endif 1513 EXTERN_C_END 1514 1515 /*MC 1516 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1517 1518 Options Database Keys: 1519 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1520 1521 Level: beginner 1522 1523 MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR) 1524 for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed 1525 (run config/configure.py with the option --download-plapack) 1526 1527 1528 Options Database Keys: 1529 . -mat_plapack_nprows <n> - number of rows in processor partition 1530 . -mat_plapack_npcols <n> - number of columns in processor partition 1531 . -mat_plapack_nb <n> - block size of template vector 1532 . -mat_plapack_nb_alg <n> - algorithmic block size 1533 - -mat_plapack_ckerror <n> - error checking flag 1534 1535 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE 1536 M*/ 1537 1538 EXTERN_C_BEGIN 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatCreate_MPIDense" 1541 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1542 { 1543 Mat_MPIDense *a; 1544 PetscErrorCode ierr; 1545 1546 PetscFunctionBegin; 1547 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1548 mat->data = (void*)a; 1549 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1550 mat->factor = 0; 1551 mat->mapping = 0; 1552 1553 mat->insertmode = NOT_SET_VALUES; 1554 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1555 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1556 1557 mat->rmap.bs = mat->cmap.bs = 1; 1558 ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr); 1559 ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr); 1560 a->nvec = mat->cmap.n; 1561 1562 /* build cache for off array entries formed */ 1563 a->donotstash = PETSC_FALSE; 1564 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1565 1566 /* stuff used for matrix vector multiply */ 1567 a->lvec = 0; 1568 a->Mvctx = 0; 1569 a->roworiented = PETSC_TRUE; 1570 1571 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1572 "MatGetDiagonalBlock_MPIDense", 1573 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1574 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1575 "MatMPIDenseSetPreallocation_MPIDense", 1576 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1577 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1578 "MatMatMult_MPIAIJ_MPIDense", 1579 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1580 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1581 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1582 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1583 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1584 "MatMatMultNumeric_MPIAIJ_MPIDense", 1585 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1586 #if defined(PETSC_HAVE_PLAPACK) 1587 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C", 1588 "MatSetSolverType_MPIDense_PLAPACK", 1589 MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr); 1590 #endif 1591 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1592 1593 PetscFunctionReturn(0); 1594 } 1595 EXTERN_C_END 1596 1597 /*MC 1598 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1599 1600 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1601 and MATMPIDENSE otherwise. 1602 1603 Options Database Keys: 1604 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1605 1606 Level: beginner 1607 1608 1609 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1610 M*/ 1611 1612 EXTERN_C_BEGIN 1613 #undef __FUNCT__ 1614 #define __FUNCT__ "MatCreate_Dense" 1615 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1616 { 1617 PetscErrorCode ierr; 1618 PetscMPIInt size; 1619 1620 PetscFunctionBegin; 1621 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1622 if (size == 1) { 1623 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1624 } else { 1625 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1626 } 1627 PetscFunctionReturn(0); 1628 } 1629 EXTERN_C_END 1630 1631 #undef __FUNCT__ 1632 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1633 /*@C 1634 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1635 1636 Not collective 1637 1638 Input Parameters: 1639 . A - the matrix 1640 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1641 to control all matrix memory allocation. 1642 1643 Notes: 1644 The dense format is fully compatible with standard Fortran 77 1645 storage by columns. 1646 1647 The data input variable is intended primarily for Fortran programmers 1648 who wish to allocate their own matrix memory space. Most users should 1649 set data=PETSC_NULL. 1650 1651 Level: intermediate 1652 1653 .keywords: matrix,dense, parallel 1654 1655 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1656 @*/ 1657 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1658 { 1659 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1660 1661 PetscFunctionBegin; 1662 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1663 if (f) { 1664 ierr = (*f)(mat,data);CHKERRQ(ierr); 1665 } 1666 PetscFunctionReturn(0); 1667 } 1668 1669 #undef __FUNCT__ 1670 #define __FUNCT__ "MatCreateMPIDense" 1671 /*@C 1672 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1673 1674 Collective on MPI_Comm 1675 1676 Input Parameters: 1677 + comm - MPI communicator 1678 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1679 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1680 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1681 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1682 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1683 to control all matrix memory allocation. 1684 1685 Output Parameter: 1686 . A - the matrix 1687 1688 Notes: 1689 The dense format is fully compatible with standard Fortran 77 1690 storage by columns. 1691 1692 The data input variable is intended primarily for Fortran programmers 1693 who wish to allocate their own matrix memory space. Most users should 1694 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1695 1696 The user MUST specify either the local or global matrix dimensions 1697 (possibly both). 1698 1699 Level: intermediate 1700 1701 .keywords: matrix,dense, parallel 1702 1703 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1704 @*/ 1705 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1706 { 1707 PetscErrorCode ierr; 1708 PetscMPIInt size; 1709 1710 PetscFunctionBegin; 1711 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1712 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1713 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1714 if (size > 1) { 1715 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1716 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1717 } else { 1718 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1719 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1720 } 1721 PetscFunctionReturn(0); 1722 } 1723 1724 #undef __FUNCT__ 1725 #define __FUNCT__ "MatDuplicate_MPIDense" 1726 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1727 { 1728 Mat mat; 1729 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1730 PetscErrorCode ierr; 1731 1732 PetscFunctionBegin; 1733 *newmat = 0; 1734 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1735 ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1736 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1737 a = (Mat_MPIDense*)mat->data; 1738 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1739 mat->factor = A->factor; 1740 mat->assembled = PETSC_TRUE; 1741 mat->preallocated = PETSC_TRUE; 1742 1743 mat->rmap.rstart = A->rmap.rstart; 1744 mat->rmap.rend = A->rmap.rend; 1745 a->size = oldmat->size; 1746 a->rank = oldmat->rank; 1747 mat->insertmode = NOT_SET_VALUES; 1748 a->nvec = oldmat->nvec; 1749 a->donotstash = oldmat->donotstash; 1750 1751 ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1752 ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1753 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1754 1755 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1756 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1757 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1758 1759 #if defined(PETSC_HAVE_PLAPACK) 1760 ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr); 1761 #endif 1762 *newmat = mat; 1763 PetscFunctionReturn(0); 1764 } 1765 1766 #include "petscsys.h" 1767 1768 #undef __FUNCT__ 1769 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1770 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1771 { 1772 PetscErrorCode ierr; 1773 PetscMPIInt rank,size; 1774 PetscInt *rowners,i,m,nz,j; 1775 PetscScalar *array,*vals,*vals_ptr; 1776 MPI_Status status; 1777 1778 PetscFunctionBegin; 1779 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1780 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1781 1782 /* determine ownership of all rows */ 1783 m = M/size + ((M % size) > rank); 1784 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1785 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1786 rowners[0] = 0; 1787 for (i=2; i<=size; i++) { 1788 rowners[i] += rowners[i-1]; 1789 } 1790 1791 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1792 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1793 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1794 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1795 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1796 1797 if (!rank) { 1798 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1799 1800 /* read in my part of the matrix numerical values */ 1801 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1802 1803 /* insert into matrix-by row (this is why cannot directly read into array */ 1804 vals_ptr = vals; 1805 for (i=0; i<m; i++) { 1806 for (j=0; j<N; j++) { 1807 array[i + j*m] = *vals_ptr++; 1808 } 1809 } 1810 1811 /* read in other processors and ship out */ 1812 for (i=1; i<size; i++) { 1813 nz = (rowners[i+1] - rowners[i])*N; 1814 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1815 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1816 } 1817 } else { 1818 /* receive numeric values */ 1819 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1820 1821 /* receive message of values*/ 1822 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1823 1824 /* insert into matrix-by row (this is why cannot directly read into array */ 1825 vals_ptr = vals; 1826 for (i=0; i<m; i++) { 1827 for (j=0; j<N; j++) { 1828 array[i + j*m] = *vals_ptr++; 1829 } 1830 } 1831 } 1832 ierr = PetscFree(rowners);CHKERRQ(ierr); 1833 ierr = PetscFree(vals);CHKERRQ(ierr); 1834 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1835 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1836 PetscFunctionReturn(0); 1837 } 1838 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatLoad_MPIDense" 1841 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1842 { 1843 Mat A; 1844 PetscScalar *vals,*svals; 1845 MPI_Comm comm = ((PetscObject)viewer)->comm; 1846 MPI_Status status; 1847 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1848 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1849 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1850 PetscInt i,nz,j,rstart,rend; 1851 int fd; 1852 PetscErrorCode ierr; 1853 1854 PetscFunctionBegin; 1855 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1856 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1857 if (!rank) { 1858 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1859 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1860 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1861 } 1862 1863 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1864 M = header[1]; N = header[2]; nz = header[3]; 1865 1866 /* 1867 Handle case where matrix is stored on disk as a dense matrix 1868 */ 1869 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1870 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 /* determine ownership of all rows */ 1875 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1876 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1877 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1878 rowners[0] = 0; 1879 for (i=2; i<=size; i++) { 1880 rowners[i] += rowners[i-1]; 1881 } 1882 rstart = rowners[rank]; 1883 rend = rowners[rank+1]; 1884 1885 /* distribute row lengths to all processors */ 1886 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1887 offlens = ourlens + (rend-rstart); 1888 if (!rank) { 1889 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1890 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1891 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1892 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1893 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1894 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1895 } else { 1896 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1897 } 1898 1899 if (!rank) { 1900 /* calculate the number of nonzeros on each processor */ 1901 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1902 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1903 for (i=0; i<size; i++) { 1904 for (j=rowners[i]; j< rowners[i+1]; j++) { 1905 procsnz[i] += rowlengths[j]; 1906 } 1907 } 1908 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1909 1910 /* determine max buffer needed and allocate it */ 1911 maxnz = 0; 1912 for (i=0; i<size; i++) { 1913 maxnz = PetscMax(maxnz,procsnz[i]); 1914 } 1915 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1916 1917 /* read in my part of the matrix column indices */ 1918 nz = procsnz[0]; 1919 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1920 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1921 1922 /* read in every one elses and ship off */ 1923 for (i=1; i<size; i++) { 1924 nz = procsnz[i]; 1925 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1926 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1927 } 1928 ierr = PetscFree(cols);CHKERRQ(ierr); 1929 } else { 1930 /* determine buffer space needed for message */ 1931 nz = 0; 1932 for (i=0; i<m; i++) { 1933 nz += ourlens[i]; 1934 } 1935 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1936 1937 /* receive message of column indices*/ 1938 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1939 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1940 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1941 } 1942 1943 /* loop over local rows, determining number of off diagonal entries */ 1944 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1945 jj = 0; 1946 for (i=0; i<m; i++) { 1947 for (j=0; j<ourlens[i]; j++) { 1948 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1949 jj++; 1950 } 1951 } 1952 1953 /* create our matrix */ 1954 for (i=0; i<m; i++) { 1955 ourlens[i] -= offlens[i]; 1956 } 1957 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1958 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1959 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1960 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1961 A = *newmat; 1962 for (i=0; i<m; i++) { 1963 ourlens[i] += offlens[i]; 1964 } 1965 1966 if (!rank) { 1967 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1968 1969 /* read in my part of the matrix numerical values */ 1970 nz = procsnz[0]; 1971 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1972 1973 /* insert into matrix */ 1974 jj = rstart; 1975 smycols = mycols; 1976 svals = vals; 1977 for (i=0; i<m; i++) { 1978 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1979 smycols += ourlens[i]; 1980 svals += ourlens[i]; 1981 jj++; 1982 } 1983 1984 /* read in other processors and ship out */ 1985 for (i=1; i<size; i++) { 1986 nz = procsnz[i]; 1987 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1988 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 1989 } 1990 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1991 } else { 1992 /* receive numeric values */ 1993 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1994 1995 /* receive message of values*/ 1996 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 1997 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1998 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1999 2000 /* insert into matrix */ 2001 jj = rstart; 2002 smycols = mycols; 2003 svals = vals; 2004 for (i=0; i<m; i++) { 2005 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2006 smycols += ourlens[i]; 2007 svals += ourlens[i]; 2008 jj++; 2009 } 2010 } 2011 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2012 ierr = PetscFree(vals);CHKERRQ(ierr); 2013 ierr = PetscFree(mycols);CHKERRQ(ierr); 2014 ierr = PetscFree(rowners);CHKERRQ(ierr); 2015 2016 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2017 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 #undef __FUNCT__ 2022 #define __FUNCT__ "MatEqual_MPIDense" 2023 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2024 { 2025 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2026 Mat a,b; 2027 PetscTruth flg; 2028 PetscErrorCode ierr; 2029 2030 PetscFunctionBegin; 2031 a = matA->A; 2032 b = matB->A; 2033 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2034 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2035 PetscFunctionReturn(0); 2036 } 2037 2038 #if defined(PETSC_HAVE_PLAPACK) 2039 2040 #undef __FUNCT__ 2041 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2042 /*@C 2043 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2044 Level: developer 2045 2046 .keywords: Petsc, destroy, package, PLAPACK 2047 .seealso: PetscFinalize() 2048 @*/ 2049 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2050 { 2051 PetscErrorCode ierr; 2052 2053 PetscFunctionBegin; 2054 ierr = PLA_Finalize();CHKERRQ(ierr); 2055 PetscFunctionReturn(0); 2056 } 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2060 /*@C 2061 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2062 called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize() 2063 when using static libraries. 2064 2065 Input Parameter: 2066 path - The dynamic library path, or PETSC_NULL 2067 2068 Level: developer 2069 2070 .keywords: Petsc, initialize, package, PLAPACK 2071 .seealso: PetscInitializePackage(), PetscInitialize() 2072 @*/ 2073 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[]) 2074 { 2075 MPI_Comm comm = PETSC_COMM_WORLD; 2076 PetscMPIInt size; 2077 PetscErrorCode ierr; 2078 2079 PetscFunctionBegin; 2080 if (!PLA_Initialized(PETSC_NULL)) { 2081 2082 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2083 Plapack_nprows = 1; 2084 Plapack_npcols = size; 2085 2086 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2087 ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2088 ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2089 #if defined(PETSC_USE_DEBUG) 2090 Plapack_ierror = 3; 2091 #else 2092 Plapack_ierror = 0; 2093 #endif 2094 ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2095 if (Plapack_ierror){ 2096 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2097 } else { 2098 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2099 } 2100 2101 Plapack_nb_alg = 0; 2102 ierr = PetscOptionsInt("-plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2103 if (Plapack_nb_alg) { 2104 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2105 } 2106 PetscOptionsEnd(); 2107 2108 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2109 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2110 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2111 } 2112 PetscFunctionReturn(0); 2113 } 2114 2115 2116 #endif 2117