1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 #endif 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatDenseGetLocalMatrix" 16 /*@ 17 18 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 19 matrix that represents the operator. For sequential matrices it returns itself. 20 21 Input Parameter: 22 . A - the Seq or MPI dense matrix 23 24 Output Parameter: 25 . B - the inner matrix 26 27 Level: intermediate 28 29 @*/ 30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 31 { 32 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 33 PetscErrorCode ierr; 34 PetscTruth flg; 35 36 PetscFunctionBegin; 37 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 38 if (flg) { 39 *B = mat->A; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatGetRow_MPIDense" 48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 49 { 50 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 51 PetscErrorCode ierr; 52 PetscInt lrow,rstart = A->rmap.rstart,rend = A->rmap.rend; 53 54 PetscFunctionBegin; 55 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 56 lrow = row - rstart; 57 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRestoreRow_MPIDense" 63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 64 { 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 69 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 70 PetscFunctionReturn(0); 71 } 72 73 EXTERN_C_BEGIN 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 79 PetscErrorCode ierr; 80 PetscInt m = A->rmap.n,rstart = A->rmap.rstart; 81 PetscScalar *array; 82 MPI_Comm comm; 83 84 PetscFunctionBegin; 85 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 86 87 /* The reuse aspect is not implemented efficiently */ 88 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 89 90 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 91 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatCreate(comm,B);CHKERRQ(ierr); 93 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 94 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 95 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 96 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 97 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 98 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 99 100 *iscopy = PETSC_TRUE; 101 PetscFunctionReturn(0); 102 } 103 EXTERN_C_END 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIDense" 107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 110 PetscErrorCode ierr; 111 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 112 PetscTruth roworiented = A->roworiented; 113 114 PetscFunctionBegin; 115 for (i=0; i<m; i++) { 116 if (idxm[i] < 0) continue; 117 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 118 if (idxm[i] >= rstart && idxm[i] < rend) { 119 row = idxm[i] - rstart; 120 if (roworiented) { 121 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 122 } else { 123 for (j=0; j<n; j++) { 124 if (idxn[j] < 0) continue; 125 if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 126 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 127 } 128 } 129 } else { 130 if (!A->donotstash) { 131 if (roworiented) { 132 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 133 } else { 134 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 135 } 136 } 137 } 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetValues_MPIDense" 144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 145 { 146 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 147 PetscErrorCode ierr; 148 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 149 150 PetscFunctionBegin; 151 for (i=0; i<m; i++) { 152 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 153 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 154 if (idxm[i] >= rstart && idxm[i] < rend) { 155 row = idxm[i] - rstart; 156 for (j=0; j<n; j++) { 157 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 158 if (idxn[j] >= mat->cmap.N) { 159 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 160 } 161 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 162 } 163 } else { 164 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 165 } 166 } 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatGetArray_MPIDense" 172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 173 { 174 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 175 PetscErrorCode ierr; 176 177 PetscFunctionBegin; 178 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 185 { 186 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 187 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 188 PetscErrorCode ierr; 189 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 190 PetscScalar *av,*bv,*v = lmat->v; 191 Mat newmat; 192 193 PetscFunctionBegin; 194 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 195 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 196 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 197 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 198 199 /* No parallel redistribution currently supported! Should really check each index set 200 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 201 original matrix! */ 202 203 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 204 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 205 206 /* Check submatrix call */ 207 if (scall == MAT_REUSE_MATRIX) { 208 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 209 /* Really need to test rows and column sizes! */ 210 newmat = *B; 211 } else { 212 /* Create and fill new matrix */ 213 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 214 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 215 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 216 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 217 } 218 219 /* Now extract the data pointers and do the copy, column at a time */ 220 newmatd = (Mat_MPIDense*)newmat->data; 221 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 222 223 for (i=0; i<ncols; i++) { 224 av = v + nlrows*icol[i]; 225 for (j=0; j<nrows; j++) { 226 *bv++ = av[irow[j] - rstart]; 227 } 228 } 229 230 /* Assemble the matrices so that the correct flags are set */ 231 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 232 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 234 /* Free work space */ 235 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 236 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 237 *B = newmat; 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatRestoreArray_MPIDense" 243 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 244 { 245 PetscFunctionBegin; 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 251 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 252 { 253 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 254 MPI_Comm comm = ((PetscObject)mat)->comm; 255 PetscErrorCode ierr; 256 PetscInt nstash,reallocs; 257 InsertMode addv; 258 259 PetscFunctionBegin; 260 /* make sure all processors are either in INSERTMODE or ADDMODE */ 261 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 262 if (addv == (ADD_VALUES|INSERT_VALUES)) { 263 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 264 } 265 mat->insertmode = addv; /* in case this processor had no cache */ 266 267 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 268 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 269 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 275 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 276 { 277 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,*row,*col,flg,j,rstart,ncols; 280 PetscMPIInt n; 281 PetscScalar *val; 282 InsertMode addv=mat->insertmode; 283 284 PetscFunctionBegin; 285 /* wait on receives */ 286 while (1) { 287 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 288 if (!flg) break; 289 290 for (i=0; i<n;) { 291 /* Now identify the consecutive vals belonging to the same row */ 292 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 293 if (j < n) ncols = j-i; 294 else ncols = n-i; 295 /* Now assemble all these values with a single function call */ 296 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 297 i = j; 298 } 299 } 300 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 301 302 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 303 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 304 305 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 306 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 307 } 308 PetscFunctionReturn(0); 309 } 310 311 #undef __FUNCT__ 312 #define __FUNCT__ "MatZeroEntries_MPIDense" 313 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 314 { 315 PetscErrorCode ierr; 316 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 317 318 PetscFunctionBegin; 319 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 /* the code does not do the diagonal entries correctly unless the 324 matrix is square and the column and row owerships are identical. 325 This is a BUG. The only way to fix it seems to be to access 326 mdn->A and mdn->B directly and not through the MatZeroRows() 327 routine. 328 */ 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatZeroRows_MPIDense" 331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 332 { 333 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 334 PetscErrorCode ierr; 335 PetscInt i,*owners = A->rmap.range; 336 PetscInt *nprocs,j,idx,nsends; 337 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 338 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 339 PetscInt *lens,*lrows,*values; 340 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 341 MPI_Comm comm = ((PetscObject)A)->comm; 342 MPI_Request *send_waits,*recv_waits; 343 MPI_Status recv_status,*send_status; 344 PetscTruth found; 345 346 PetscFunctionBegin; 347 /* first count number of contributors to each processor */ 348 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 349 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 351 for (i=0; i<N; i++) { 352 idx = rows[i]; 353 found = PETSC_FALSE; 354 for (j=0; j<size; j++) { 355 if (idx >= owners[j] && idx < owners[j+1]) { 356 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 357 } 358 } 359 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 360 } 361 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 362 363 /* inform other processors of number of messages and max length*/ 364 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 365 366 /* post receives: */ 367 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 368 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 369 for (i=0; i<nrecvs; i++) { 370 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 371 } 372 373 /* do sends: 374 1) starts[i] gives the starting index in svalues for stuff going to 375 the ith processor 376 */ 377 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 378 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 379 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 380 starts[0] = 0; 381 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 382 for (i=0; i<N; i++) { 383 svalues[starts[owner[i]]++] = rows[i]; 384 } 385 386 starts[0] = 0; 387 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 388 count = 0; 389 for (i=0; i<size; i++) { 390 if (nprocs[2*i+1]) { 391 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 392 } 393 } 394 ierr = PetscFree(starts);CHKERRQ(ierr); 395 396 base = owners[rank]; 397 398 /* wait on receives */ 399 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 400 source = lens + nrecvs; 401 count = nrecvs; slen = 0; 402 while (count) { 403 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 404 /* unpack receives into our local space */ 405 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 406 source[imdex] = recv_status.MPI_SOURCE; 407 lens[imdex] = n; 408 slen += n; 409 count--; 410 } 411 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 412 413 /* move the data into the send scatter */ 414 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 415 count = 0; 416 for (i=0; i<nrecvs; i++) { 417 values = rvalues + i*nmax; 418 for (j=0; j<lens[i]; j++) { 419 lrows[count++] = values[j] - base; 420 } 421 } 422 ierr = PetscFree(rvalues);CHKERRQ(ierr); 423 ierr = PetscFree(lens);CHKERRQ(ierr); 424 ierr = PetscFree(owner);CHKERRQ(ierr); 425 ierr = PetscFree(nprocs);CHKERRQ(ierr); 426 427 /* actually zap the local rows */ 428 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 429 ierr = PetscFree(lrows);CHKERRQ(ierr); 430 431 /* wait on sends */ 432 if (nsends) { 433 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 434 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 435 ierr = PetscFree(send_status);CHKERRQ(ierr); 436 } 437 ierr = PetscFree(send_waits);CHKERRQ(ierr); 438 ierr = PetscFree(svalues);CHKERRQ(ierr); 439 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatMult_MPIDense" 445 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 446 { 447 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 448 PetscErrorCode ierr; 449 450 PetscFunctionBegin; 451 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 452 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 453 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatMultAdd_MPIDense" 459 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 460 { 461 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 462 PetscErrorCode ierr; 463 464 PetscFunctionBegin; 465 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "MatMultTranspose_MPIDense" 473 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 474 { 475 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 476 PetscErrorCode ierr; 477 PetscScalar zero = 0.0; 478 479 PetscFunctionBegin; 480 ierr = VecSet(yy,zero);CHKERRQ(ierr); 481 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 482 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 483 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 489 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 490 { 491 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 496 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 497 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 498 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #undef __FUNCT__ 503 #define __FUNCT__ "MatGetDiagonal_MPIDense" 504 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 505 { 506 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 507 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 508 PetscErrorCode ierr; 509 PetscInt len,i,n,m = A->rmap.n,radd; 510 PetscScalar *x,zero = 0.0; 511 512 PetscFunctionBegin; 513 ierr = VecSet(v,zero);CHKERRQ(ierr); 514 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 515 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 516 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 517 len = PetscMin(a->A->rmap.n,a->A->cmap.n); 518 radd = A->rmap.rstart*m; 519 for (i=0; i<len; i++) { 520 x[i] = aloc->v[radd + i*m + i]; 521 } 522 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatDestroy_MPIDense" 528 PetscErrorCode MatDestroy_MPIDense(Mat mat) 529 { 530 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 531 PetscErrorCode ierr; 532 #if defined(PETSC_HAVE_PLAPACK) 533 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 534 #endif 535 536 PetscFunctionBegin; 537 538 #if defined(PETSC_USE_LOG) 539 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 540 #endif 541 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 542 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 543 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 544 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 545 #if defined(PETSC_HAVE_PLAPACK) 546 if (lu) { 547 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 548 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 549 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 550 551 if (lu->is_pla) { 552 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 553 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 554 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 555 } 556 } 557 #endif 558 559 ierr = PetscFree(mdn);CHKERRQ(ierr); 560 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 561 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 563 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatView_MPIDense_Binary" 571 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 572 { 573 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 574 PetscErrorCode ierr; 575 PetscViewerFormat format; 576 int fd; 577 PetscInt header[4],mmax,N = mat->cmap.N,i,j,m,k; 578 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 579 PetscScalar *work,*v,*vv; 580 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 581 MPI_Status status; 582 583 PetscFunctionBegin; 584 if (mdn->size == 1) { 585 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 586 } else { 587 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 588 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 589 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 590 591 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 592 if (format == PETSC_VIEWER_BINARY_NATIVE) { 593 594 if (!rank) { 595 /* store the matrix as a dense matrix */ 596 header[0] = MAT_FILE_COOKIE; 597 header[1] = mat->rmap.N; 598 header[2] = N; 599 header[3] = MATRIX_BINARY_FORMAT_DENSE; 600 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 601 602 /* get largest work array needed for transposing array */ 603 mmax = mat->rmap.n; 604 for (i=1; i<size; i++) { 605 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 606 } 607 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 608 609 /* write out local array, by rows */ 610 m = mat->rmap.n; 611 v = a->v; 612 for (j=0; j<N; j++) { 613 for (i=0; i<m; i++) { 614 work[j + i*N] = *v++; 615 } 616 } 617 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 618 /* get largest work array to receive messages from other processes, excludes process zero */ 619 mmax = 0; 620 for (i=1; i<size; i++) { 621 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 622 } 623 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 624 v = vv; 625 for (k=1; k<size; k++) { 626 m = mat->rmap.range[k+1] - mat->rmap.range[k]; 627 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 628 629 for (j=0; j<N; j++) { 630 for (i=0; i<m; i++) { 631 work[j + i*N] = *v++; 632 } 633 } 634 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 635 } 636 ierr = PetscFree(work);CHKERRQ(ierr); 637 ierr = PetscFree(vv);CHKERRQ(ierr); 638 } else { 639 ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 640 } 641 } 642 } 643 PetscFunctionReturn(0); 644 } 645 646 #undef __FUNCT__ 647 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 648 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 649 { 650 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 651 PetscErrorCode ierr; 652 PetscMPIInt size = mdn->size,rank = mdn->rank; 653 PetscViewerType vtype; 654 PetscTruth iascii,isdraw; 655 PetscViewer sviewer; 656 PetscViewerFormat format; 657 #if defined(PETSC_HAVE_PLAPACK) 658 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 659 #endif 660 661 PetscFunctionBegin; 662 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 663 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 664 if (iascii) { 665 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 666 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 667 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 668 MatInfo info; 669 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 670 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n, 671 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 672 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 673 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 674 #if defined(PETSC_HAVE_PLAPACK) 675 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 676 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 677 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 678 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 679 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 680 #endif 681 PetscFunctionReturn(0); 682 } else if (format == PETSC_VIEWER_ASCII_INFO) { 683 PetscFunctionReturn(0); 684 } 685 } else if (isdraw) { 686 PetscDraw draw; 687 PetscTruth isnull; 688 689 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 690 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 691 if (isnull) PetscFunctionReturn(0); 692 } 693 694 if (size == 1) { 695 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 696 } else { 697 /* assemble the entire matrix onto first processor. */ 698 Mat A; 699 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz; 700 PetscInt *cols; 701 PetscScalar *vals; 702 703 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 704 if (!rank) { 705 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 706 } else { 707 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 708 } 709 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 710 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 711 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 712 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 713 714 /* Copy the matrix ... This isn't the most efficient means, 715 but it's quick for now */ 716 A->insertmode = INSERT_VALUES; 717 row = mat->rmap.rstart; m = mdn->A->rmap.n; 718 for (i=0; i<m; i++) { 719 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 720 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 721 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 722 row++; 723 } 724 725 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 726 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 727 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 728 if (!rank) { 729 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 730 } 731 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 732 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 733 ierr = MatDestroy(A);CHKERRQ(ierr); 734 } 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatView_MPIDense" 740 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 741 { 742 PetscErrorCode ierr; 743 PetscTruth iascii,isbinary,isdraw,issocket; 744 745 PetscFunctionBegin; 746 747 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 748 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 749 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 750 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 751 752 if (iascii || issocket || isdraw) { 753 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 754 } else if (isbinary) { 755 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 756 } else { 757 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 758 } 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatGetInfo_MPIDense" 764 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 765 { 766 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 767 Mat mdn = mat->A; 768 PetscErrorCode ierr; 769 PetscReal isend[5],irecv[5]; 770 771 PetscFunctionBegin; 772 info->rows_global = (double)A->rmap.N; 773 info->columns_global = (double)A->cmap.N; 774 info->rows_local = (double)A->rmap.n; 775 info->columns_local = (double)A->cmap.N; 776 info->block_size = 1.0; 777 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 778 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 779 isend[3] = info->memory; isend[4] = info->mallocs; 780 if (flag == MAT_LOCAL) { 781 info->nz_used = isend[0]; 782 info->nz_allocated = isend[1]; 783 info->nz_unneeded = isend[2]; 784 info->memory = isend[3]; 785 info->mallocs = isend[4]; 786 } else if (flag == MAT_GLOBAL_MAX) { 787 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 788 info->nz_used = irecv[0]; 789 info->nz_allocated = irecv[1]; 790 info->nz_unneeded = irecv[2]; 791 info->memory = irecv[3]; 792 info->mallocs = irecv[4]; 793 } else if (flag == MAT_GLOBAL_SUM) { 794 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 795 info->nz_used = irecv[0]; 796 info->nz_allocated = irecv[1]; 797 info->nz_unneeded = irecv[2]; 798 info->memory = irecv[3]; 799 info->mallocs = irecv[4]; 800 } 801 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 802 info->fill_ratio_needed = 0; 803 info->factor_mallocs = 0; 804 PetscFunctionReturn(0); 805 } 806 807 #undef __FUNCT__ 808 #define __FUNCT__ "MatSetOption_MPIDense" 809 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 810 { 811 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 812 PetscErrorCode ierr; 813 814 PetscFunctionBegin; 815 switch (op) { 816 case MAT_NEW_NONZERO_LOCATIONS: 817 case MAT_NEW_NONZERO_LOCATION_ERR: 818 case MAT_NEW_NONZERO_ALLOCATION_ERR: 819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 820 break; 821 case MAT_ROW_ORIENTED: 822 a->roworiented = flg; 823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 824 break; 825 case MAT_NEW_DIAGONALS: 826 case MAT_USE_HASH_TABLE: 827 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 828 break; 829 case MAT_IGNORE_OFF_PROC_ENTRIES: 830 a->donotstash = flg; 831 break; 832 case MAT_SYMMETRIC: 833 case MAT_STRUCTURALLY_SYMMETRIC: 834 case MAT_HERMITIAN: 835 case MAT_SYMMETRY_ETERNAL: 836 case MAT_IGNORE_LOWER_TRIANGULAR: 837 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 838 break; 839 default: 840 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 841 } 842 PetscFunctionReturn(0); 843 } 844 845 846 #undef __FUNCT__ 847 #define __FUNCT__ "MatDiagonalScale_MPIDense" 848 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 849 { 850 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 851 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 852 PetscScalar *l,*r,x,*v; 853 PetscErrorCode ierr; 854 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n; 855 856 PetscFunctionBegin; 857 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 858 if (ll) { 859 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 860 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 861 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 862 for (i=0; i<m; i++) { 863 x = l[i]; 864 v = mat->v + i; 865 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 866 } 867 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 868 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 869 } 870 if (rr) { 871 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 872 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 873 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 876 for (i=0; i<n; i++) { 877 x = r[i]; 878 v = mat->v + i*m; 879 for (j=0; j<m; j++) { (*v++) *= x;} 880 } 881 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 882 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 883 } 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatNorm_MPIDense" 889 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 890 { 891 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 892 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 893 PetscErrorCode ierr; 894 PetscInt i,j; 895 PetscReal sum = 0.0; 896 PetscScalar *v = mat->v; 897 898 PetscFunctionBegin; 899 if (mdn->size == 1) { 900 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 901 } else { 902 if (type == NORM_FROBENIUS) { 903 for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) { 904 #if defined(PETSC_USE_COMPLEX) 905 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 906 #else 907 sum += (*v)*(*v); v++; 908 #endif 909 } 910 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 911 *nrm = sqrt(*nrm); 912 ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr); 913 } else if (type == NORM_1) { 914 PetscReal *tmp,*tmp2; 915 ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 916 tmp2 = tmp + A->cmap.N; 917 ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 918 *nrm = 0.0; 919 v = mat->v; 920 for (j=0; j<mdn->A->cmap.n; j++) { 921 for (i=0; i<mdn->A->rmap.n; i++) { 922 tmp[j] += PetscAbsScalar(*v); v++; 923 } 924 } 925 ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 926 for (j=0; j<A->cmap.N; j++) { 927 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 928 } 929 ierr = PetscFree(tmp);CHKERRQ(ierr); 930 ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr); 931 } else if (type == NORM_INFINITY) { /* max row norm */ 932 PetscReal ntemp; 933 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 934 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 935 } else { 936 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 937 } 938 } 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatTranspose_MPIDense" 944 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 945 { 946 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 947 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 948 Mat B; 949 PetscInt M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart; 950 PetscErrorCode ierr; 951 PetscInt j,i; 952 PetscScalar *v; 953 954 PetscFunctionBegin; 955 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 956 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 957 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 958 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 959 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 960 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 961 } else { 962 B = *matout; 963 } 964 965 m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v; 966 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 967 for (i=0; i<m; i++) rwork[i] = rstart + i; 968 for (j=0; j<n; j++) { 969 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 970 v += m; 971 } 972 ierr = PetscFree(rwork);CHKERRQ(ierr); 973 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 974 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 975 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 976 *matout = B; 977 } else { 978 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 979 } 980 PetscFunctionReturn(0); 981 } 982 983 #include "petscblaslapack.h" 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatScale_MPIDense" 986 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 987 { 988 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 989 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 990 PetscScalar oalpha = alpha; 991 PetscErrorCode ierr; 992 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N); 993 994 PetscFunctionBegin; 995 BLASscal_(&nz,&oalpha,a->v,&one); 996 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 997 PetscFunctionReturn(0); 998 } 999 1000 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1004 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1005 { 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #if defined(PETSC_HAVE_PLAPACK) 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatMPIDenseCreatePlapack" 1017 PetscErrorCode MatMPIDenseCreatePlapack(Mat A) 1018 { 1019 Mat_Plapack *lu; 1020 PetscErrorCode ierr; 1021 PetscInt M=A->rmap.N; 1022 MPI_Comm comm=((PetscObject)A)->comm; 1023 PetscMPIInt size; 1024 1025 PetscFunctionBegin; 1026 if (A->spptr) PetscFunctionReturn(0); 1027 ierr = PetscNewLog(A,Mat_Plapack,&lu);CHKERRQ(ierr); 1028 A->spptr = (void*)lu; 1029 1030 lu = (Mat_Plapack*)(A->spptr); 1031 1032 /* Set default Plapack parameters */ 1033 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1034 lu->nb = M/size; 1035 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1036 1037 /* Set runtime options */ 1038 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1039 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1040 PetscOptionsEnd(); 1041 1042 /* Create object distribution template */ 1043 lu->templ = NULL; 1044 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1045 1046 /* Set the datatype */ 1047 #if defined(PETSC_USE_COMPLEX) 1048 lu->datatype = MPI_DOUBLE_COMPLEX; 1049 #else 1050 lu->datatype = MPI_DOUBLE; 1051 #endif 1052 1053 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap.N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1054 1055 1056 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1057 PetscFunctionReturn(0); 1058 } 1059 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1062 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1063 { 1064 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1065 PetscErrorCode ierr; 1066 PetscInt M=A->cmap.N,m=A->rmap.n,rstart; 1067 PetscScalar *array; 1068 PetscReal one = 1.0; 1069 1070 PetscFunctionBegin; 1071 /* Copy A into F->lu->A */ 1072 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1073 ierr = PLA_API_begin();CHKERRQ(ierr); 1074 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1075 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1076 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1077 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1078 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1079 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1080 ierr = PLA_API_end();CHKERRQ(ierr); 1081 lu->rstart = rstart; 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1087 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1088 { 1089 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1090 PetscErrorCode ierr; 1091 PetscInt M=A->cmap.N,m=A->rmap.n,rstart; 1092 PetscScalar *array; 1093 PetscReal one = 1.0; 1094 1095 PetscFunctionBegin; 1096 /* Copy F into A->lu->A */ 1097 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1098 ierr = PLA_API_begin();CHKERRQ(ierr); 1099 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1100 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1101 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1102 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1103 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1104 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1105 ierr = PLA_API_end();CHKERRQ(ierr); 1106 lu->rstart = rstart; 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1112 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1113 { 1114 PetscErrorCode ierr; 1115 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1116 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1117 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1118 PLA_Obj alpha = NULL,beta = NULL; 1119 1120 PetscFunctionBegin; 1121 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1122 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1123 1124 /* 1125 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1126 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1127 */ 1128 1129 /* do the multiply in PLA */ 1130 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1131 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1132 CHKMEMQ; 1133 1134 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A);//CHKERRQ(ierr); 1135 CHKMEMQ; 1136 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1137 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1138 1139 /* 1140 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1141 */ 1142 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1148 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1149 { 1150 PetscErrorCode ierr; 1151 PetscInt m=A->rmap.n,n=B->cmap.n; 1152 Mat Cmat; 1153 1154 PetscFunctionBegin; 1155 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 1156 SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported"); 1157 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1158 ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr); 1159 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1160 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1161 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1162 ierr = MatMPIDenseCreatePlapack(A);CHKERRQ(ierr); 1163 ierr = MatMPIDenseCreatePlapack(B);CHKERRQ(ierr); 1164 ierr = MatMPIDenseCreatePlapack(Cmat);CHKERRQ(ierr); 1165 *C = Cmat; 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1171 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1172 { 1173 PetscErrorCode ierr; 1174 1175 PetscFunctionBegin; 1176 if (scall == MAT_INITIAL_MATRIX){ 1177 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1178 } 1179 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 1184 #undef __FUNCT__ 1185 #define __FUNCT__ "MatSolve_MPIDense" 1186 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1187 { 1188 MPI_Comm comm = ((PetscObject)A)->comm,dummy_comm; 1189 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1190 PetscErrorCode ierr; 1191 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1192 PetscScalar *array; 1193 PetscReal one = 1.0; 1194 PetscMPIInt size,r_rank,r_nproc,c_rank,c_nproc;; 1195 PLA_Obj v_pla = NULL; 1196 PetscScalar *loc_buf; 1197 Vec loc_x; 1198 1199 PetscFunctionBegin; 1200 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1201 1202 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1203 ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr); 1204 ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr); 1205 1206 /* Copy b into rhs_pla */ 1207 ierr = PLA_API_begin();CHKERRQ(ierr); 1208 ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr); 1209 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1210 ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr); 1211 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1212 ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr); 1213 ierr = PLA_API_end();CHKERRQ(ierr); 1214 1215 if (A->factor == FACTOR_LU){ 1216 /* Apply the permutations to the right hand sides */ 1217 ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr); 1218 1219 /* Solve L y = b, overwriting b with y */ 1220 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1221 1222 /* Solve U x = y (=b), overwriting b with x */ 1223 ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1224 } else { /* FACTOR_CHOLESKY */ 1225 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1226 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1227 } 1228 1229 /* Copy PLAPACK x into Petsc vector x */ 1230 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1231 ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr); 1232 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1233 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1234 if (!lu->pla_solved){ 1235 1236 ierr = PLA_Temp_comm_row_info(lu->templ,&dummy_comm,&r_rank,&r_nproc);CHKERRQ(ierr); 1237 ierr = PLA_Temp_comm_col_info(lu->templ,&dummy_comm,&c_rank,&c_nproc);CHKERRQ(ierr); 1238 /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */ 1239 1240 /* Create IS and cts for VecScatterring */ 1241 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1242 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1243 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1244 idx_petsc = idx_pla + loc_m; 1245 1246 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1247 for (i=0; i<loc_m; i+=lu->nb){ 1248 j = 0; 1249 while (j < lu->nb && i+j < loc_m){ 1250 idx_petsc[i+j] = rstart + j; j++; 1251 } 1252 rstart += size*lu->nb; 1253 } 1254 1255 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1256 1257 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1258 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1259 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1260 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1261 } 1262 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 1265 /* Free data */ 1266 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1267 ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr); 1268 1269 lu->pla_solved = PETSC_TRUE; 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1275 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1276 { 1277 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1278 PetscErrorCode ierr; 1279 PetscInt info_pla; 1280 1281 PetscFunctionBegin; 1282 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1283 1284 /* Copy A into F->lu->A */ 1285 ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr); 1286 1287 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1288 info_pla = PLA_LU(lu->A,lu->pivots); 1289 if (info_pla) SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1290 1291 (*F)->assembled = PETSC_TRUE; 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #undef __FUNCT__ 1296 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1297 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1298 { 1299 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1300 PetscErrorCode ierr; 1301 PetscInt info_pla; 1302 1303 PetscFunctionBegin; 1304 1305 /* Copy A into F->lu->A */ 1306 ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr); 1307 1308 /* Factor P A -> Chol */ 1309 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1310 if (info_pla) SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1311 1312 (*F)->assembled = PETSC_TRUE; 1313 PetscFunctionReturn(0); 1314 } 1315 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private" 1319 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F) 1320 { 1321 PetscErrorCode ierr; 1322 1323 PetscFunctionBegin; 1324 /* Create the factorization matrix */ 1325 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1326 ierr = MatSetSizes(*F,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1327 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1328 ierr = MatMPIDenseCreatePlapack(*F);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 /* Note the Petsc r and c permutations are ignored */ 1333 #undef __FUNCT__ 1334 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1335 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 1336 { 1337 PetscErrorCode ierr; 1338 PetscInt M = A->rmap.N; 1339 Mat_Plapack *lu; 1340 1341 PetscFunctionBegin; 1342 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1343 lu = (Mat_Plapack*)(*F)->spptr; 1344 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1345 (*F)->factor = FACTOR_LU; 1346 PetscFunctionReturn(0); 1347 } 1348 1349 /* Note the Petsc perm permutation is ignored */ 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1352 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F) 1353 { 1354 PetscErrorCode ierr; 1355 PetscTruth issymmetric,set; 1356 1357 PetscFunctionBegin; 1358 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1359 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1360 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1361 (*F)->factor = FACTOR_CHOLESKY; 1362 PetscFunctionReturn(0); 1363 } 1364 #endif 1365 1366 /* -------------------------------------------------------------------*/ 1367 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1368 MatGetRow_MPIDense, 1369 MatRestoreRow_MPIDense, 1370 MatMult_MPIDense, 1371 /* 4*/ MatMultAdd_MPIDense, 1372 MatMultTranspose_MPIDense, 1373 MatMultTransposeAdd_MPIDense, 1374 #if defined(PETSC_HAVE_PLAPACK) 1375 MatSolve_MPIDense, 1376 #else 1377 0, 1378 #endif 1379 0, 1380 0, 1381 /*10*/ 0, 1382 0, 1383 0, 1384 0, 1385 MatTranspose_MPIDense, 1386 /*15*/ MatGetInfo_MPIDense, 1387 MatEqual_MPIDense, 1388 MatGetDiagonal_MPIDense, 1389 MatDiagonalScale_MPIDense, 1390 MatNorm_MPIDense, 1391 /*20*/ MatAssemblyBegin_MPIDense, 1392 MatAssemblyEnd_MPIDense, 1393 0, 1394 MatSetOption_MPIDense, 1395 MatZeroEntries_MPIDense, 1396 /*25*/ MatZeroRows_MPIDense, 1397 #if defined(PETSC_HAVE_PLAPACK) 1398 MatLUFactorSymbolic_MPIDense, 1399 MatLUFactorNumeric_MPIDense, 1400 MatCholeskyFactorSymbolic_MPIDense, 1401 MatCholeskyFactorNumeric_MPIDense, 1402 #else 1403 0, 1404 0, 1405 0, 1406 0, 1407 #endif 1408 /*30*/ MatSetUpPreallocation_MPIDense, 1409 0, 1410 0, 1411 MatGetArray_MPIDense, 1412 MatRestoreArray_MPIDense, 1413 /*35*/ MatDuplicate_MPIDense, 1414 0, 1415 0, 1416 0, 1417 0, 1418 /*40*/ 0, 1419 MatGetSubMatrices_MPIDense, 1420 0, 1421 MatGetValues_MPIDense, 1422 0, 1423 /*45*/ 0, 1424 MatScale_MPIDense, 1425 0, 1426 0, 1427 0, 1428 /*50*/ 0, 1429 0, 1430 0, 1431 0, 1432 0, 1433 /*55*/ 0, 1434 0, 1435 0, 1436 0, 1437 0, 1438 /*60*/ MatGetSubMatrix_MPIDense, 1439 MatDestroy_MPIDense, 1440 MatView_MPIDense, 1441 0, 1442 0, 1443 /*65*/ 0, 1444 0, 1445 0, 1446 0, 1447 0, 1448 /*70*/ 0, 1449 0, 1450 0, 1451 0, 1452 0, 1453 /*75*/ 0, 1454 0, 1455 0, 1456 0, 1457 0, 1458 /*80*/ 0, 1459 0, 1460 0, 1461 0, 1462 /*84*/ MatLoad_MPIDense, 1463 0, 1464 0, 1465 0, 1466 0, 1467 0, 1468 /*90*/ 1469 #if defined(PETSC_HAVE_PLAPACK) 1470 MatMatMult_MPIDense_MPIDense, 1471 MatMatMultSymbolic_MPIDense_MPIDense, 1472 MatMatMultNumeric_MPIDense_MPIDense, 1473 #else 1474 0, 1475 0, 1476 0, 1477 #endif 1478 0, 1479 /*95*/ 0, 1480 0, 1481 0, 1482 0}; 1483 1484 EXTERN_C_BEGIN 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1487 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1488 { 1489 Mat_MPIDense *a; 1490 PetscErrorCode ierr; 1491 1492 PetscFunctionBegin; 1493 mat->preallocated = PETSC_TRUE; 1494 /* Note: For now, when data is specified above, this assumes the user correctly 1495 allocates the local dense storage space. We should add error checking. */ 1496 1497 a = (Mat_MPIDense*)mat->data; 1498 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1499 ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr); 1500 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1501 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1502 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 EXTERN_C_END 1506 1507 EXTERN_C_BEGIN 1508 #if defined(PETSC_HAVE_PLAPACK) 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK" 1511 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type) 1512 { 1513 PetscFunctionBegin; 1514 /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */ 1515 PetscFunctionReturn(0); 1516 } 1517 #endif 1518 EXTERN_C_END 1519 1520 /*MC 1521 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1522 1523 Options Database Keys: 1524 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1525 1526 Level: beginner 1527 1528 MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR) 1529 for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed 1530 (run config/configure.py with the option --download-plapack) 1531 1532 1533 Options Database Keys: 1534 . -mat_plapack_nprows <n> - number of rows in processor partition 1535 . -mat_plapack_npcols <n> - number of columns in processor partition 1536 . -mat_plapack_nb <n> - block size of template vector 1537 . -mat_plapack_nb_alg <n> - algorithmic block size 1538 - -mat_plapack_ckerror <n> - error checking flag 1539 1540 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE 1541 M*/ 1542 1543 EXTERN_C_BEGIN 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatCreate_MPIDense" 1546 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1547 { 1548 Mat_MPIDense *a; 1549 PetscErrorCode ierr; 1550 1551 PetscFunctionBegin; 1552 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1553 mat->data = (void*)a; 1554 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1555 mat->factor = 0; 1556 mat->mapping = 0; 1557 1558 mat->insertmode = NOT_SET_VALUES; 1559 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1560 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1561 1562 mat->rmap.bs = mat->cmap.bs = 1; 1563 ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr); 1564 ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr); 1565 a->nvec = mat->cmap.n; 1566 1567 /* build cache for off array entries formed */ 1568 a->donotstash = PETSC_FALSE; 1569 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1570 1571 /* stuff used for matrix vector multiply */ 1572 a->lvec = 0; 1573 a->Mvctx = 0; 1574 a->roworiented = PETSC_TRUE; 1575 1576 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1577 "MatGetDiagonalBlock_MPIDense", 1578 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1579 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1580 "MatMPIDenseSetPreallocation_MPIDense", 1581 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1582 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1583 "MatMatMult_MPIAIJ_MPIDense", 1584 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1585 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1586 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1587 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1588 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1589 "MatMatMultNumeric_MPIAIJ_MPIDense", 1590 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1591 #if defined(PETSC_HAVE_PLAPACK) 1592 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C", 1593 "MatSetSolverType_MPIDense_PLAPACK", 1594 MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr); 1595 #endif 1596 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1597 1598 PetscFunctionReturn(0); 1599 } 1600 EXTERN_C_END 1601 1602 /*MC 1603 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1604 1605 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1606 and MATMPIDENSE otherwise. 1607 1608 Options Database Keys: 1609 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1610 1611 Level: beginner 1612 1613 1614 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1615 M*/ 1616 1617 EXTERN_C_BEGIN 1618 #undef __FUNCT__ 1619 #define __FUNCT__ "MatCreate_Dense" 1620 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1621 { 1622 PetscErrorCode ierr; 1623 PetscMPIInt size; 1624 1625 PetscFunctionBegin; 1626 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1627 if (size == 1) { 1628 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1629 } else { 1630 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1631 } 1632 PetscFunctionReturn(0); 1633 } 1634 EXTERN_C_END 1635 1636 #undef __FUNCT__ 1637 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1638 /*@C 1639 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1640 1641 Not collective 1642 1643 Input Parameters: 1644 . A - the matrix 1645 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1646 to control all matrix memory allocation. 1647 1648 Notes: 1649 The dense format is fully compatible with standard Fortran 77 1650 storage by columns. 1651 1652 The data input variable is intended primarily for Fortran programmers 1653 who wish to allocate their own matrix memory space. Most users should 1654 set data=PETSC_NULL. 1655 1656 Level: intermediate 1657 1658 .keywords: matrix,dense, parallel 1659 1660 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1661 @*/ 1662 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1663 { 1664 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1665 1666 PetscFunctionBegin; 1667 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1668 if (f) { 1669 ierr = (*f)(mat,data);CHKERRQ(ierr); 1670 } 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatCreateMPIDense" 1676 /*@C 1677 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1678 1679 Collective on MPI_Comm 1680 1681 Input Parameters: 1682 + comm - MPI communicator 1683 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1684 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1685 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1686 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1687 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1688 to control all matrix memory allocation. 1689 1690 Output Parameter: 1691 . A - the matrix 1692 1693 Notes: 1694 The dense format is fully compatible with standard Fortran 77 1695 storage by columns. 1696 1697 The data input variable is intended primarily for Fortran programmers 1698 who wish to allocate their own matrix memory space. Most users should 1699 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1700 1701 The user MUST specify either the local or global matrix dimensions 1702 (possibly both). 1703 1704 Level: intermediate 1705 1706 .keywords: matrix,dense, parallel 1707 1708 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1709 @*/ 1710 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1711 { 1712 PetscErrorCode ierr; 1713 PetscMPIInt size; 1714 1715 PetscFunctionBegin; 1716 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1717 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1718 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1719 if (size > 1) { 1720 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1721 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1722 } else { 1723 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1724 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1725 } 1726 PetscFunctionReturn(0); 1727 } 1728 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatDuplicate_MPIDense" 1731 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1732 { 1733 Mat mat; 1734 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1735 PetscErrorCode ierr; 1736 1737 PetscFunctionBegin; 1738 *newmat = 0; 1739 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1740 ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1741 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1742 a = (Mat_MPIDense*)mat->data; 1743 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1744 mat->factor = A->factor; 1745 mat->assembled = PETSC_TRUE; 1746 mat->preallocated = PETSC_TRUE; 1747 1748 mat->rmap.rstart = A->rmap.rstart; 1749 mat->rmap.rend = A->rmap.rend; 1750 a->size = oldmat->size; 1751 a->rank = oldmat->rank; 1752 mat->insertmode = NOT_SET_VALUES; 1753 a->nvec = oldmat->nvec; 1754 a->donotstash = oldmat->donotstash; 1755 1756 ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1757 ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1758 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1759 1760 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1761 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1762 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1763 1764 #if defined(PETSC_HAVE_PLAPACK) 1765 ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr); 1766 #endif 1767 *newmat = mat; 1768 PetscFunctionReturn(0); 1769 } 1770 1771 #include "petscsys.h" 1772 1773 #undef __FUNCT__ 1774 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1775 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1776 { 1777 PetscErrorCode ierr; 1778 PetscMPIInt rank,size; 1779 PetscInt *rowners,i,m,nz,j; 1780 PetscScalar *array,*vals,*vals_ptr; 1781 MPI_Status status; 1782 1783 PetscFunctionBegin; 1784 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1785 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1786 1787 /* determine ownership of all rows */ 1788 m = M/size + ((M % size) > rank); 1789 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1790 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1791 rowners[0] = 0; 1792 for (i=2; i<=size; i++) { 1793 rowners[i] += rowners[i-1]; 1794 } 1795 1796 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1797 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1798 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1799 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1800 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1801 1802 if (!rank) { 1803 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1804 1805 /* read in my part of the matrix numerical values */ 1806 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1807 1808 /* insert into matrix-by row (this is why cannot directly read into array */ 1809 vals_ptr = vals; 1810 for (i=0; i<m; i++) { 1811 for (j=0; j<N; j++) { 1812 array[i + j*m] = *vals_ptr++; 1813 } 1814 } 1815 1816 /* read in other processors and ship out */ 1817 for (i=1; i<size; i++) { 1818 nz = (rowners[i+1] - rowners[i])*N; 1819 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1820 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1821 } 1822 } else { 1823 /* receive numeric values */ 1824 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1825 1826 /* receive message of values*/ 1827 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1828 1829 /* insert into matrix-by row (this is why cannot directly read into array */ 1830 vals_ptr = vals; 1831 for (i=0; i<m; i++) { 1832 for (j=0; j<N; j++) { 1833 array[i + j*m] = *vals_ptr++; 1834 } 1835 } 1836 } 1837 ierr = PetscFree(rowners);CHKERRQ(ierr); 1838 ierr = PetscFree(vals);CHKERRQ(ierr); 1839 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1840 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1841 PetscFunctionReturn(0); 1842 } 1843 1844 #undef __FUNCT__ 1845 #define __FUNCT__ "MatLoad_MPIDense" 1846 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1847 { 1848 Mat A; 1849 PetscScalar *vals,*svals; 1850 MPI_Comm comm = ((PetscObject)viewer)->comm; 1851 MPI_Status status; 1852 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1853 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1854 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1855 PetscInt i,nz,j,rstart,rend; 1856 int fd; 1857 PetscErrorCode ierr; 1858 1859 PetscFunctionBegin; 1860 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1861 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1862 if (!rank) { 1863 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1864 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1865 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1866 } 1867 1868 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1869 M = header[1]; N = header[2]; nz = header[3]; 1870 1871 /* 1872 Handle case where matrix is stored on disk as a dense matrix 1873 */ 1874 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1875 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1876 PetscFunctionReturn(0); 1877 } 1878 1879 /* determine ownership of all rows */ 1880 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1881 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1882 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1883 rowners[0] = 0; 1884 for (i=2; i<=size; i++) { 1885 rowners[i] += rowners[i-1]; 1886 } 1887 rstart = rowners[rank]; 1888 rend = rowners[rank+1]; 1889 1890 /* distribute row lengths to all processors */ 1891 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1892 offlens = ourlens + (rend-rstart); 1893 if (!rank) { 1894 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1895 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1896 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1897 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1898 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1899 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1900 } else { 1901 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1902 } 1903 1904 if (!rank) { 1905 /* calculate the number of nonzeros on each processor */ 1906 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1907 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1908 for (i=0; i<size; i++) { 1909 for (j=rowners[i]; j< rowners[i+1]; j++) { 1910 procsnz[i] += rowlengths[j]; 1911 } 1912 } 1913 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1914 1915 /* determine max buffer needed and allocate it */ 1916 maxnz = 0; 1917 for (i=0; i<size; i++) { 1918 maxnz = PetscMax(maxnz,procsnz[i]); 1919 } 1920 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1921 1922 /* read in my part of the matrix column indices */ 1923 nz = procsnz[0]; 1924 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1925 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1926 1927 /* read in every one elses and ship off */ 1928 for (i=1; i<size; i++) { 1929 nz = procsnz[i]; 1930 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1931 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1932 } 1933 ierr = PetscFree(cols);CHKERRQ(ierr); 1934 } else { 1935 /* determine buffer space needed for message */ 1936 nz = 0; 1937 for (i=0; i<m; i++) { 1938 nz += ourlens[i]; 1939 } 1940 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1941 1942 /* receive message of column indices*/ 1943 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1944 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1945 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1946 } 1947 1948 /* loop over local rows, determining number of off diagonal entries */ 1949 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1950 jj = 0; 1951 for (i=0; i<m; i++) { 1952 for (j=0; j<ourlens[i]; j++) { 1953 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1954 jj++; 1955 } 1956 } 1957 1958 /* create our matrix */ 1959 for (i=0; i<m; i++) { 1960 ourlens[i] -= offlens[i]; 1961 } 1962 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1963 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1964 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1965 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1966 A = *newmat; 1967 for (i=0; i<m; i++) { 1968 ourlens[i] += offlens[i]; 1969 } 1970 1971 if (!rank) { 1972 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1973 1974 /* read in my part of the matrix numerical values */ 1975 nz = procsnz[0]; 1976 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1977 1978 /* insert into matrix */ 1979 jj = rstart; 1980 smycols = mycols; 1981 svals = vals; 1982 for (i=0; i<m; i++) { 1983 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1984 smycols += ourlens[i]; 1985 svals += ourlens[i]; 1986 jj++; 1987 } 1988 1989 /* read in other processors and ship out */ 1990 for (i=1; i<size; i++) { 1991 nz = procsnz[i]; 1992 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1993 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 1994 } 1995 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1996 } else { 1997 /* receive numeric values */ 1998 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1999 2000 /* receive message of values*/ 2001 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2002 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2003 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2004 2005 /* insert into matrix */ 2006 jj = rstart; 2007 smycols = mycols; 2008 svals = vals; 2009 for (i=0; i<m; i++) { 2010 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2011 smycols += ourlens[i]; 2012 svals += ourlens[i]; 2013 jj++; 2014 } 2015 } 2016 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2017 ierr = PetscFree(vals);CHKERRQ(ierr); 2018 ierr = PetscFree(mycols);CHKERRQ(ierr); 2019 ierr = PetscFree(rowners);CHKERRQ(ierr); 2020 2021 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2022 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2023 PetscFunctionReturn(0); 2024 } 2025 2026 #undef __FUNCT__ 2027 #define __FUNCT__ "MatEqual_MPIDense" 2028 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2029 { 2030 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2031 Mat a,b; 2032 PetscTruth flg; 2033 PetscErrorCode ierr; 2034 2035 PetscFunctionBegin; 2036 a = matA->A; 2037 b = matB->A; 2038 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2039 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2040 PetscFunctionReturn(0); 2041 } 2042 2043 #if defined(PETSC_HAVE_PLAPACK) 2044 2045 #undef __FUNCT__ 2046 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2047 /*@C 2048 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2049 Level: developer 2050 2051 .keywords: Petsc, destroy, package, PLAPACK 2052 .seealso: PetscFinalize() 2053 @*/ 2054 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2055 { 2056 PetscErrorCode ierr; 2057 2058 PetscFunctionBegin; 2059 ierr = PLA_Finalize();CHKERRQ(ierr); 2060 PetscFunctionReturn(0); 2061 } 2062 2063 #undef __FUNCT__ 2064 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2065 /*@C 2066 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2067 called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize() 2068 when using static libraries. 2069 2070 Input Parameter: 2071 path - The dynamic library path, or PETSC_NULL 2072 2073 Level: developer 2074 2075 .keywords: Petsc, initialize, package, PLAPACK 2076 .seealso: PetscInitializePackage(), PetscInitialize() 2077 @*/ 2078 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[]) 2079 { 2080 MPI_Comm comm = PETSC_COMM_WORLD; 2081 PetscMPIInt size; 2082 PetscErrorCode ierr; 2083 2084 PetscFunctionBegin; 2085 if (!PLA_Initialized(PETSC_NULL)) { 2086 2087 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2088 Plapack_nprows = 1; 2089 Plapack_npcols = size; 2090 2091 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2092 ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2093 ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2094 #if defined(PETSC_USE_DEBUG) 2095 Plapack_ierror = 3; 2096 #else 2097 Plapack_ierror = 0; 2098 #endif 2099 ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2100 if (Plapack_ierror){ 2101 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2102 } else { 2103 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2104 } 2105 2106 Plapack_nb_alg = 0; 2107 ierr = PetscOptionsInt("-plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2108 if (Plapack_nb_alg) { 2109 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2110 } 2111 PetscOptionsEnd(); 2112 2113 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2114 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2115 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2116 } 2117 PetscFunctionReturn(0); 2118 } 2119 2120 2121 #endif 2122