1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 #endif 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatDenseGetLocalMatrix" 16 /*@ 17 18 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 19 matrix that represents the operator. For sequential matrices it returns itself. 20 21 Input Parameter: 22 . A - the Seq or MPI dense matrix 23 24 Output Parameter: 25 . B - the inner matrix 26 27 Level: intermediate 28 29 @*/ 30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 31 { 32 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 33 PetscErrorCode ierr; 34 PetscTruth flg; 35 36 PetscFunctionBegin; 37 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 38 if (flg) { 39 *B = mat->A; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatGetRow_MPIDense" 48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 49 { 50 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 51 PetscErrorCode ierr; 52 PetscInt lrow,rstart = A->rmap.rstart,rend = A->rmap.rend; 53 54 PetscFunctionBegin; 55 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 56 lrow = row - rstart; 57 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRestoreRow_MPIDense" 63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 64 { 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 69 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 70 PetscFunctionReturn(0); 71 } 72 73 EXTERN_C_BEGIN 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 79 PetscErrorCode ierr; 80 PetscInt m = A->rmap.n,rstart = A->rmap.rstart; 81 PetscScalar *array; 82 MPI_Comm comm; 83 84 PetscFunctionBegin; 85 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 86 87 /* The reuse aspect is not implemented efficiently */ 88 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 89 90 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 91 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatCreate(comm,B);CHKERRQ(ierr); 93 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 94 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 95 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 96 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 97 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 98 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 99 100 *iscopy = PETSC_TRUE; 101 PetscFunctionReturn(0); 102 } 103 EXTERN_C_END 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIDense" 107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 110 PetscErrorCode ierr; 111 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 112 PetscTruth roworiented = A->roworiented; 113 114 PetscFunctionBegin; 115 for (i=0; i<m; i++) { 116 if (idxm[i] < 0) continue; 117 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 118 if (idxm[i] >= rstart && idxm[i] < rend) { 119 row = idxm[i] - rstart; 120 if (roworiented) { 121 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 122 } else { 123 for (j=0; j<n; j++) { 124 if (idxn[j] < 0) continue; 125 if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 126 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 127 } 128 } 129 } else { 130 if (!A->donotstash) { 131 if (roworiented) { 132 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 133 } else { 134 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 135 } 136 } 137 } 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetValues_MPIDense" 144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 145 { 146 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 147 PetscErrorCode ierr; 148 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 149 150 PetscFunctionBegin; 151 for (i=0; i<m; i++) { 152 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 153 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 154 if (idxm[i] >= rstart && idxm[i] < rend) { 155 row = idxm[i] - rstart; 156 for (j=0; j<n; j++) { 157 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 158 if (idxn[j] >= mat->cmap.N) { 159 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 160 } 161 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 162 } 163 } else { 164 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 165 } 166 } 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatGetArray_MPIDense" 172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 173 { 174 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 175 PetscErrorCode ierr; 176 177 PetscFunctionBegin; 178 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 185 { 186 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 187 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 188 PetscErrorCode ierr; 189 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 190 PetscScalar *av,*bv,*v = lmat->v; 191 Mat newmat; 192 193 PetscFunctionBegin; 194 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 195 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 196 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 197 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 198 199 /* No parallel redistribution currently supported! Should really check each index set 200 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 201 original matrix! */ 202 203 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 204 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 205 206 /* Check submatrix call */ 207 if (scall == MAT_REUSE_MATRIX) { 208 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 209 /* Really need to test rows and column sizes! */ 210 newmat = *B; 211 } else { 212 /* Create and fill new matrix */ 213 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 214 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 215 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 216 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 217 } 218 219 /* Now extract the data pointers and do the copy, column at a time */ 220 newmatd = (Mat_MPIDense*)newmat->data; 221 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 222 223 for (i=0; i<ncols; i++) { 224 av = v + nlrows*icol[i]; 225 for (j=0; j<nrows; j++) { 226 *bv++ = av[irow[j] - rstart]; 227 } 228 } 229 230 /* Assemble the matrices so that the correct flags are set */ 231 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 232 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 234 /* Free work space */ 235 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 236 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 237 *B = newmat; 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatRestoreArray_MPIDense" 243 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 244 { 245 PetscFunctionBegin; 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 251 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 252 { 253 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 254 MPI_Comm comm = ((PetscObject)mat)->comm; 255 PetscErrorCode ierr; 256 PetscInt nstash,reallocs; 257 InsertMode addv; 258 259 PetscFunctionBegin; 260 /* make sure all processors are either in INSERTMODE or ADDMODE */ 261 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 262 if (addv == (ADD_VALUES|INSERT_VALUES)) { 263 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 264 } 265 mat->insertmode = addv; /* in case this processor had no cache */ 266 267 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 268 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 269 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 275 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 276 { 277 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,*row,*col,flg,j,rstart,ncols; 280 PetscMPIInt n; 281 PetscScalar *val; 282 InsertMode addv=mat->insertmode; 283 284 PetscFunctionBegin; 285 /* wait on receives */ 286 while (1) { 287 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 288 if (!flg) break; 289 290 for (i=0; i<n;) { 291 /* Now identify the consecutive vals belonging to the same row */ 292 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 293 if (j < n) ncols = j-i; 294 else ncols = n-i; 295 /* Now assemble all these values with a single function call */ 296 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 297 i = j; 298 } 299 } 300 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 301 302 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 303 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 304 305 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 306 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 307 } 308 PetscFunctionReturn(0); 309 } 310 311 #undef __FUNCT__ 312 #define __FUNCT__ "MatZeroEntries_MPIDense" 313 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 314 { 315 PetscErrorCode ierr; 316 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 317 318 PetscFunctionBegin; 319 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 /* the code does not do the diagonal entries correctly unless the 324 matrix is square and the column and row owerships are identical. 325 This is a BUG. The only way to fix it seems to be to access 326 mdn->A and mdn->B directly and not through the MatZeroRows() 327 routine. 328 */ 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatZeroRows_MPIDense" 331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 332 { 333 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 334 PetscErrorCode ierr; 335 PetscInt i,*owners = A->rmap.range; 336 PetscInt *nprocs,j,idx,nsends; 337 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 338 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 339 PetscInt *lens,*lrows,*values; 340 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 341 MPI_Comm comm = ((PetscObject)A)->comm; 342 MPI_Request *send_waits,*recv_waits; 343 MPI_Status recv_status,*send_status; 344 PetscTruth found; 345 346 PetscFunctionBegin; 347 /* first count number of contributors to each processor */ 348 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 349 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 351 for (i=0; i<N; i++) { 352 idx = rows[i]; 353 found = PETSC_FALSE; 354 for (j=0; j<size; j++) { 355 if (idx >= owners[j] && idx < owners[j+1]) { 356 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 357 } 358 } 359 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 360 } 361 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 362 363 /* inform other processors of number of messages and max length*/ 364 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 365 366 /* post receives: */ 367 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 368 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 369 for (i=0; i<nrecvs; i++) { 370 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 371 } 372 373 /* do sends: 374 1) starts[i] gives the starting index in svalues for stuff going to 375 the ith processor 376 */ 377 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 378 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 379 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 380 starts[0] = 0; 381 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 382 for (i=0; i<N; i++) { 383 svalues[starts[owner[i]]++] = rows[i]; 384 } 385 386 starts[0] = 0; 387 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 388 count = 0; 389 for (i=0; i<size; i++) { 390 if (nprocs[2*i+1]) { 391 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 392 } 393 } 394 ierr = PetscFree(starts);CHKERRQ(ierr); 395 396 base = owners[rank]; 397 398 /* wait on receives */ 399 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 400 source = lens + nrecvs; 401 count = nrecvs; slen = 0; 402 while (count) { 403 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 404 /* unpack receives into our local space */ 405 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 406 source[imdex] = recv_status.MPI_SOURCE; 407 lens[imdex] = n; 408 slen += n; 409 count--; 410 } 411 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 412 413 /* move the data into the send scatter */ 414 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 415 count = 0; 416 for (i=0; i<nrecvs; i++) { 417 values = rvalues + i*nmax; 418 for (j=0; j<lens[i]; j++) { 419 lrows[count++] = values[j] - base; 420 } 421 } 422 ierr = PetscFree(rvalues);CHKERRQ(ierr); 423 ierr = PetscFree(lens);CHKERRQ(ierr); 424 ierr = PetscFree(owner);CHKERRQ(ierr); 425 ierr = PetscFree(nprocs);CHKERRQ(ierr); 426 427 /* actually zap the local rows */ 428 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 429 ierr = PetscFree(lrows);CHKERRQ(ierr); 430 431 /* wait on sends */ 432 if (nsends) { 433 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 434 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 435 ierr = PetscFree(send_status);CHKERRQ(ierr); 436 } 437 ierr = PetscFree(send_waits);CHKERRQ(ierr); 438 ierr = PetscFree(svalues);CHKERRQ(ierr); 439 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatMult_MPIDense" 445 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 446 { 447 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 448 PetscErrorCode ierr; 449 450 PetscFunctionBegin; 451 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 452 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 453 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatMultAdd_MPIDense" 459 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 460 { 461 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 462 PetscErrorCode ierr; 463 464 PetscFunctionBegin; 465 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "MatMultTranspose_MPIDense" 473 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 474 { 475 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 476 PetscErrorCode ierr; 477 PetscScalar zero = 0.0; 478 479 PetscFunctionBegin; 480 ierr = VecSet(yy,zero);CHKERRQ(ierr); 481 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 482 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 483 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 489 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 490 { 491 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 496 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 497 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 498 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #undef __FUNCT__ 503 #define __FUNCT__ "MatGetDiagonal_MPIDense" 504 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 505 { 506 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 507 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 508 PetscErrorCode ierr; 509 PetscInt len,i,n,m = A->rmap.n,radd; 510 PetscScalar *x,zero = 0.0; 511 512 PetscFunctionBegin; 513 ierr = VecSet(v,zero);CHKERRQ(ierr); 514 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 515 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 516 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 517 len = PetscMin(a->A->rmap.n,a->A->cmap.n); 518 radd = A->rmap.rstart*m; 519 for (i=0; i<len; i++) { 520 x[i] = aloc->v[radd + i*m + i]; 521 } 522 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatDestroy_MPIDense" 528 PetscErrorCode MatDestroy_MPIDense(Mat mat) 529 { 530 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 531 PetscErrorCode ierr; 532 #if defined(PETSC_HAVE_PLAPACK) 533 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 534 #endif 535 536 PetscFunctionBegin; 537 538 #if defined(PETSC_USE_LOG) 539 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 540 #endif 541 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 542 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 543 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 544 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 545 #if defined(PETSC_HAVE_PLAPACK) 546 if (lu) { 547 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 548 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 549 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 550 551 if (lu->is_pla) { 552 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 553 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 554 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 555 } 556 } 557 #endif 558 559 ierr = PetscFree(mdn);CHKERRQ(ierr); 560 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 561 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 563 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatView_MPIDense_Binary" 571 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 572 { 573 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 574 PetscErrorCode ierr; 575 PetscViewerFormat format; 576 int fd; 577 PetscInt header[4],mmax,N = mat->cmap.N,i,j,m,k; 578 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 579 PetscScalar *work,*v,*vv; 580 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 581 MPI_Status status; 582 583 PetscFunctionBegin; 584 if (mdn->size == 1) { 585 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 586 } else { 587 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 588 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 589 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 590 591 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 592 if (format == PETSC_VIEWER_BINARY_NATIVE) { 593 594 if (!rank) { 595 /* store the matrix as a dense matrix */ 596 header[0] = MAT_FILE_COOKIE; 597 header[1] = mat->rmap.N; 598 header[2] = N; 599 header[3] = MATRIX_BINARY_FORMAT_DENSE; 600 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 601 602 /* get largest work array needed for transposing array */ 603 mmax = mat->rmap.n; 604 for (i=1; i<size; i++) { 605 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 606 } 607 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 608 609 /* write out local array, by rows */ 610 m = mat->rmap.n; 611 v = a->v; 612 for (j=0; j<N; j++) { 613 for (i=0; i<m; i++) { 614 work[j + i*N] = *v++; 615 } 616 } 617 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 618 /* get largest work array to receive messages from other processes, excludes process zero */ 619 mmax = 0; 620 for (i=1; i<size; i++) { 621 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 622 } 623 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 624 for(k = 1; k < size; k++) { 625 v = vv; 626 m = mat->rmap.range[k+1] - mat->rmap.range[k]; 627 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 628 629 for(j = 0; j < N; j++) { 630 for(i = 0; i < m; i++) { 631 work[j + i*N] = *v++; 632 } 633 } 634 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 635 } 636 ierr = PetscFree(work);CHKERRQ(ierr); 637 ierr = PetscFree(vv);CHKERRQ(ierr); 638 } else { 639 ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 640 } 641 } else { 642 SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_BINARY_NATIVE"); 643 } 644 } 645 PetscFunctionReturn(0); 646 } 647 648 #undef __FUNCT__ 649 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 650 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 651 { 652 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 653 PetscErrorCode ierr; 654 PetscMPIInt size = mdn->size,rank = mdn->rank; 655 PetscViewerType vtype; 656 PetscTruth iascii,isdraw; 657 PetscViewer sviewer; 658 PetscViewerFormat format; 659 #if defined(PETSC_HAVE_PLAPACK) 660 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 661 #endif 662 663 PetscFunctionBegin; 664 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 665 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 666 if (iascii) { 667 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 668 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 669 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 670 MatInfo info; 671 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 672 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n, 673 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 674 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 675 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 676 #if defined(PETSC_HAVE_PLAPACK) 677 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 678 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 679 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 680 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 681 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 682 #endif 683 PetscFunctionReturn(0); 684 } else if (format == PETSC_VIEWER_ASCII_INFO) { 685 PetscFunctionReturn(0); 686 } 687 } else if (isdraw) { 688 PetscDraw draw; 689 PetscTruth isnull; 690 691 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 692 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 693 if (isnull) PetscFunctionReturn(0); 694 } 695 696 if (size == 1) { 697 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 698 } else { 699 /* assemble the entire matrix onto first processor. */ 700 Mat A; 701 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz; 702 PetscInt *cols; 703 PetscScalar *vals; 704 705 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 706 if (!rank) { 707 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 708 } else { 709 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 710 } 711 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 712 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 713 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 714 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 715 716 /* Copy the matrix ... This isn't the most efficient means, 717 but it's quick for now */ 718 A->insertmode = INSERT_VALUES; 719 row = mat->rmap.rstart; m = mdn->A->rmap.n; 720 for (i=0; i<m; i++) { 721 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 722 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 723 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 724 row++; 725 } 726 727 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 728 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 729 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 730 if (!rank) { 731 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 732 } 733 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 734 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 735 ierr = MatDestroy(A);CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIDense" 742 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 743 { 744 PetscErrorCode ierr; 745 PetscTruth iascii,isbinary,isdraw,issocket; 746 747 PetscFunctionBegin; 748 749 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 750 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 751 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 752 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 753 754 if (iascii || issocket || isdraw) { 755 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 756 } else if (isbinary) { 757 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 758 } else { 759 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 760 } 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatGetInfo_MPIDense" 766 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 767 { 768 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 769 Mat mdn = mat->A; 770 PetscErrorCode ierr; 771 PetscReal isend[5],irecv[5]; 772 773 PetscFunctionBegin; 774 info->rows_global = (double)A->rmap.N; 775 info->columns_global = (double)A->cmap.N; 776 info->rows_local = (double)A->rmap.n; 777 info->columns_local = (double)A->cmap.N; 778 info->block_size = 1.0; 779 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 780 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 781 isend[3] = info->memory; isend[4] = info->mallocs; 782 if (flag == MAT_LOCAL) { 783 info->nz_used = isend[0]; 784 info->nz_allocated = isend[1]; 785 info->nz_unneeded = isend[2]; 786 info->memory = isend[3]; 787 info->mallocs = isend[4]; 788 } else if (flag == MAT_GLOBAL_MAX) { 789 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 790 info->nz_used = irecv[0]; 791 info->nz_allocated = irecv[1]; 792 info->nz_unneeded = irecv[2]; 793 info->memory = irecv[3]; 794 info->mallocs = irecv[4]; 795 } else if (flag == MAT_GLOBAL_SUM) { 796 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 797 info->nz_used = irecv[0]; 798 info->nz_allocated = irecv[1]; 799 info->nz_unneeded = irecv[2]; 800 info->memory = irecv[3]; 801 info->mallocs = irecv[4]; 802 } 803 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 804 info->fill_ratio_needed = 0; 805 info->factor_mallocs = 0; 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatSetOption_MPIDense" 811 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 812 { 813 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 814 PetscErrorCode ierr; 815 816 PetscFunctionBegin; 817 switch (op) { 818 case MAT_NEW_NONZERO_LOCATIONS: 819 case MAT_NEW_NONZERO_LOCATION_ERR: 820 case MAT_NEW_NONZERO_ALLOCATION_ERR: 821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 822 break; 823 case MAT_ROW_ORIENTED: 824 a->roworiented = flg; 825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 826 break; 827 case MAT_NEW_DIAGONALS: 828 case MAT_USE_HASH_TABLE: 829 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 830 break; 831 case MAT_IGNORE_OFF_PROC_ENTRIES: 832 a->donotstash = flg; 833 break; 834 case MAT_SYMMETRIC: 835 case MAT_STRUCTURALLY_SYMMETRIC: 836 case MAT_HERMITIAN: 837 case MAT_SYMMETRY_ETERNAL: 838 case MAT_IGNORE_LOWER_TRIANGULAR: 839 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 840 break; 841 default: 842 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 843 } 844 PetscFunctionReturn(0); 845 } 846 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatDiagonalScale_MPIDense" 850 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 851 { 852 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 853 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 854 PetscScalar *l,*r,x,*v; 855 PetscErrorCode ierr; 856 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n; 857 858 PetscFunctionBegin; 859 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 860 if (ll) { 861 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 862 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 863 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 864 for (i=0; i<m; i++) { 865 x = l[i]; 866 v = mat->v + i; 867 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 868 } 869 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 870 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 871 } 872 if (rr) { 873 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 874 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 875 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 876 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 878 for (i=0; i<n; i++) { 879 x = r[i]; 880 v = mat->v + i*m; 881 for (j=0; j<m; j++) { (*v++) *= x;} 882 } 883 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 884 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 885 } 886 PetscFunctionReturn(0); 887 } 888 889 #undef __FUNCT__ 890 #define __FUNCT__ "MatNorm_MPIDense" 891 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 892 { 893 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 894 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 895 PetscErrorCode ierr; 896 PetscInt i,j; 897 PetscReal sum = 0.0; 898 PetscScalar *v = mat->v; 899 900 PetscFunctionBegin; 901 if (mdn->size == 1) { 902 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 903 } else { 904 if (type == NORM_FROBENIUS) { 905 for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) { 906 #if defined(PETSC_USE_COMPLEX) 907 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 908 #else 909 sum += (*v)*(*v); v++; 910 #endif 911 } 912 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 913 *nrm = sqrt(*nrm); 914 ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr); 915 } else if (type == NORM_1) { 916 PetscReal *tmp,*tmp2; 917 ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 918 tmp2 = tmp + A->cmap.N; 919 ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 920 *nrm = 0.0; 921 v = mat->v; 922 for (j=0; j<mdn->A->cmap.n; j++) { 923 for (i=0; i<mdn->A->rmap.n; i++) { 924 tmp[j] += PetscAbsScalar(*v); v++; 925 } 926 } 927 ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 928 for (j=0; j<A->cmap.N; j++) { 929 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 930 } 931 ierr = PetscFree(tmp);CHKERRQ(ierr); 932 ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr); 933 } else if (type == NORM_INFINITY) { /* max row norm */ 934 PetscReal ntemp; 935 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 936 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 937 } else { 938 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 939 } 940 } 941 PetscFunctionReturn(0); 942 } 943 944 #undef __FUNCT__ 945 #define __FUNCT__ "MatTranspose_MPIDense" 946 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 947 { 948 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 949 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 950 Mat B; 951 PetscInt M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart; 952 PetscErrorCode ierr; 953 PetscInt j,i; 954 PetscScalar *v; 955 956 PetscFunctionBegin; 957 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 958 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 959 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 960 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 961 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 962 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 963 } else { 964 B = *matout; 965 } 966 967 m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v; 968 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 969 for (i=0; i<m; i++) rwork[i] = rstart + i; 970 for (j=0; j<n; j++) { 971 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 972 v += m; 973 } 974 ierr = PetscFree(rwork);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 978 *matout = B; 979 } else { 980 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 981 } 982 PetscFunctionReturn(0); 983 } 984 985 #include "petscblaslapack.h" 986 #undef __FUNCT__ 987 #define __FUNCT__ "MatScale_MPIDense" 988 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 989 { 990 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 991 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 992 PetscScalar oalpha = alpha; 993 PetscErrorCode ierr; 994 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N); 995 996 PetscFunctionBegin; 997 BLASscal_(&nz,&oalpha,a->v,&one); 998 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1003 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1006 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1007 { 1008 PetscErrorCode ierr; 1009 1010 PetscFunctionBegin; 1011 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #if defined(PETSC_HAVE_PLAPACK) 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatMPIDenseCreatePlapack" 1019 PetscErrorCode MatMPIDenseCreatePlapack(Mat A) 1020 { 1021 Mat_Plapack *lu; 1022 PetscErrorCode ierr; 1023 PetscInt M=A->rmap.N; 1024 MPI_Comm comm=((PetscObject)A)->comm; 1025 PetscMPIInt size; 1026 1027 PetscFunctionBegin; 1028 if (A->spptr) PetscFunctionReturn(0); 1029 ierr = PetscNewLog(A,Mat_Plapack,&lu);CHKERRQ(ierr); 1030 A->spptr = (void*)lu; 1031 1032 lu = (Mat_Plapack*)(A->spptr); 1033 1034 /* Set default Plapack parameters */ 1035 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1036 lu->nb = M/size; 1037 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1038 1039 /* Set runtime options */ 1040 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1041 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1042 PetscOptionsEnd(); 1043 1044 /* Create object distribution template */ 1045 lu->templ = NULL; 1046 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1047 1048 /* Set the datatype */ 1049 #if defined(PETSC_USE_COMPLEX) 1050 lu->datatype = MPI_DOUBLE_COMPLEX; 1051 #else 1052 lu->datatype = MPI_DOUBLE; 1053 #endif 1054 1055 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap.N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1056 1057 1058 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1059 PetscFunctionReturn(0); 1060 } 1061 1062 #undef __FUNCT__ 1063 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1064 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1065 { 1066 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1067 PetscErrorCode ierr; 1068 PetscInt M=A->cmap.N,m=A->rmap.n,rstart; 1069 PetscScalar *array; 1070 PetscReal one = 1.0; 1071 1072 PetscFunctionBegin; 1073 /* Copy A into F->lu->A */ 1074 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1075 ierr = PLA_API_begin();CHKERRQ(ierr); 1076 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1077 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1078 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1079 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1080 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1081 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1082 ierr = PLA_API_end();CHKERRQ(ierr); 1083 lu->rstart = rstart; 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1089 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1090 { 1091 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1092 PetscErrorCode ierr; 1093 PetscInt M=A->cmap.N,m=A->rmap.n,rstart; 1094 PetscScalar *array; 1095 PetscReal one = 1.0; 1096 1097 PetscFunctionBegin; 1098 /* Copy F into A->lu->A */ 1099 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1100 ierr = PLA_API_begin();CHKERRQ(ierr); 1101 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1102 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1103 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1104 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1105 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1106 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1107 ierr = PLA_API_end();CHKERRQ(ierr); 1108 lu->rstart = rstart; 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1114 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1115 { 1116 PetscErrorCode ierr; 1117 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1118 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1119 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1120 PLA_Obj alpha = NULL,beta = NULL; 1121 1122 PetscFunctionBegin; 1123 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1124 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1125 1126 /* 1127 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1128 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1129 */ 1130 1131 /* do the multiply in PLA */ 1132 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1133 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1134 CHKMEMQ; 1135 1136 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A);//CHKERRQ(ierr); 1137 CHKMEMQ; 1138 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1139 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1140 1141 /* 1142 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1143 */ 1144 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1150 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1151 { 1152 PetscErrorCode ierr; 1153 PetscInt m=A->rmap.n,n=B->cmap.n; 1154 Mat Cmat; 1155 1156 PetscFunctionBegin; 1157 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 1158 SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported"); 1159 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1160 ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr); 1161 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1162 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1163 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1164 ierr = MatMPIDenseCreatePlapack(A);CHKERRQ(ierr); 1165 ierr = MatMPIDenseCreatePlapack(B);CHKERRQ(ierr); 1166 ierr = MatMPIDenseCreatePlapack(Cmat);CHKERRQ(ierr); 1167 *C = Cmat; 1168 PetscFunctionReturn(0); 1169 } 1170 1171 #undef __FUNCT__ 1172 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1173 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1174 { 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 if (scall == MAT_INITIAL_MATRIX){ 1179 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1180 } 1181 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1182 PetscFunctionReturn(0); 1183 } 1184 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatSolve_MPIDense" 1188 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1189 { 1190 MPI_Comm comm = ((PetscObject)A)->comm,dummy_comm; 1191 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1192 PetscErrorCode ierr; 1193 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1194 PetscScalar *array; 1195 PetscReal one = 1.0; 1196 PetscMPIInt size,r_rank,r_nproc,c_rank,c_nproc;; 1197 PLA_Obj v_pla = NULL; 1198 PetscScalar *loc_buf; 1199 Vec loc_x; 1200 1201 PetscFunctionBegin; 1202 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1203 1204 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1205 ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr); 1206 ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr); 1207 1208 /* Copy b into rhs_pla */ 1209 ierr = PLA_API_begin();CHKERRQ(ierr); 1210 ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr); 1211 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1212 ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr); 1213 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1214 ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr); 1215 ierr = PLA_API_end();CHKERRQ(ierr); 1216 1217 if (A->factor == FACTOR_LU){ 1218 /* Apply the permutations to the right hand sides */ 1219 ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr); 1220 1221 /* Solve L y = b, overwriting b with y */ 1222 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1223 1224 /* Solve U x = y (=b), overwriting b with x */ 1225 ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1226 } else { /* FACTOR_CHOLESKY */ 1227 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1228 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1229 } 1230 1231 /* Copy PLAPACK x into Petsc vector x */ 1232 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1233 ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr); 1234 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1235 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1236 if (!lu->pla_solved){ 1237 1238 ierr = PLA_Temp_comm_row_info(lu->templ,&dummy_comm,&r_rank,&r_nproc);CHKERRQ(ierr); 1239 ierr = PLA_Temp_comm_col_info(lu->templ,&dummy_comm,&c_rank,&c_nproc);CHKERRQ(ierr); 1240 /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */ 1241 1242 /* Create IS and cts for VecScatterring */ 1243 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1244 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1245 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1246 idx_petsc = idx_pla + loc_m; 1247 1248 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1249 for (i=0; i<loc_m; i+=lu->nb){ 1250 j = 0; 1251 while (j < lu->nb && i+j < loc_m){ 1252 idx_petsc[i+j] = rstart + j; j++; 1253 } 1254 rstart += size*lu->nb; 1255 } 1256 1257 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1258 1259 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1260 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1261 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1262 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1263 } 1264 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 1267 /* Free data */ 1268 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1269 ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr); 1270 1271 lu->pla_solved = PETSC_TRUE; 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1277 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1278 { 1279 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1280 PetscErrorCode ierr; 1281 PetscInt info_pla; 1282 1283 PetscFunctionBegin; 1284 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1285 1286 /* Copy A into F->lu->A */ 1287 ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr); 1288 1289 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1290 info_pla = PLA_LU(lu->A,lu->pivots); 1291 if (info_pla) SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1292 1293 (*F)->assembled = PETSC_TRUE; 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1299 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1300 { 1301 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1302 PetscErrorCode ierr; 1303 PetscInt info_pla; 1304 1305 PetscFunctionBegin; 1306 1307 /* Copy A into F->lu->A */ 1308 ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr); 1309 1310 /* Factor P A -> Chol */ 1311 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1312 if (info_pla) SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1313 1314 (*F)->assembled = PETSC_TRUE; 1315 PetscFunctionReturn(0); 1316 } 1317 1318 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private" 1321 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F) 1322 { 1323 PetscErrorCode ierr; 1324 1325 PetscFunctionBegin; 1326 /* Create the factorization matrix */ 1327 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1328 ierr = MatSetSizes(*F,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1329 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1330 ierr = MatMPIDenseCreatePlapack(*F);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 /* Note the Petsc r and c permutations are ignored */ 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1337 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 1338 { 1339 PetscErrorCode ierr; 1340 PetscInt M = A->rmap.N; 1341 Mat_Plapack *lu; 1342 1343 PetscFunctionBegin; 1344 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1345 lu = (Mat_Plapack*)(*F)->spptr; 1346 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1347 (*F)->factor = FACTOR_LU; 1348 PetscFunctionReturn(0); 1349 } 1350 1351 /* Note the Petsc perm permutation is ignored */ 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1354 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F) 1355 { 1356 PetscErrorCode ierr; 1357 PetscTruth issymmetric,set; 1358 1359 PetscFunctionBegin; 1360 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1361 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1362 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1363 (*F)->factor = FACTOR_CHOLESKY; 1364 PetscFunctionReturn(0); 1365 } 1366 #endif 1367 1368 /* -------------------------------------------------------------------*/ 1369 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1370 MatGetRow_MPIDense, 1371 MatRestoreRow_MPIDense, 1372 MatMult_MPIDense, 1373 /* 4*/ MatMultAdd_MPIDense, 1374 MatMultTranspose_MPIDense, 1375 MatMultTransposeAdd_MPIDense, 1376 #if defined(PETSC_HAVE_PLAPACK) 1377 MatSolve_MPIDense, 1378 #else 1379 0, 1380 #endif 1381 0, 1382 0, 1383 /*10*/ 0, 1384 0, 1385 0, 1386 0, 1387 MatTranspose_MPIDense, 1388 /*15*/ MatGetInfo_MPIDense, 1389 MatEqual_MPIDense, 1390 MatGetDiagonal_MPIDense, 1391 MatDiagonalScale_MPIDense, 1392 MatNorm_MPIDense, 1393 /*20*/ MatAssemblyBegin_MPIDense, 1394 MatAssemblyEnd_MPIDense, 1395 0, 1396 MatSetOption_MPIDense, 1397 MatZeroEntries_MPIDense, 1398 /*25*/ MatZeroRows_MPIDense, 1399 #if defined(PETSC_HAVE_PLAPACK) 1400 MatLUFactorSymbolic_MPIDense, 1401 MatLUFactorNumeric_MPIDense, 1402 MatCholeskyFactorSymbolic_MPIDense, 1403 MatCholeskyFactorNumeric_MPIDense, 1404 #else 1405 0, 1406 0, 1407 0, 1408 0, 1409 #endif 1410 /*30*/ MatSetUpPreallocation_MPIDense, 1411 0, 1412 0, 1413 MatGetArray_MPIDense, 1414 MatRestoreArray_MPIDense, 1415 /*35*/ MatDuplicate_MPIDense, 1416 0, 1417 0, 1418 0, 1419 0, 1420 /*40*/ 0, 1421 MatGetSubMatrices_MPIDense, 1422 0, 1423 MatGetValues_MPIDense, 1424 0, 1425 /*45*/ 0, 1426 MatScale_MPIDense, 1427 0, 1428 0, 1429 0, 1430 /*50*/ 0, 1431 0, 1432 0, 1433 0, 1434 0, 1435 /*55*/ 0, 1436 0, 1437 0, 1438 0, 1439 0, 1440 /*60*/ MatGetSubMatrix_MPIDense, 1441 MatDestroy_MPIDense, 1442 MatView_MPIDense, 1443 0, 1444 0, 1445 /*65*/ 0, 1446 0, 1447 0, 1448 0, 1449 0, 1450 /*70*/ 0, 1451 0, 1452 0, 1453 0, 1454 0, 1455 /*75*/ 0, 1456 0, 1457 0, 1458 0, 1459 0, 1460 /*80*/ 0, 1461 0, 1462 0, 1463 0, 1464 /*84*/ MatLoad_MPIDense, 1465 0, 1466 0, 1467 0, 1468 0, 1469 0, 1470 /*90*/ 1471 #if defined(PETSC_HAVE_PLAPACK) 1472 MatMatMult_MPIDense_MPIDense, 1473 MatMatMultSymbolic_MPIDense_MPIDense, 1474 MatMatMultNumeric_MPIDense_MPIDense, 1475 #else 1476 0, 1477 0, 1478 0, 1479 #endif 1480 0, 1481 /*95*/ 0, 1482 0, 1483 0, 1484 0}; 1485 1486 EXTERN_C_BEGIN 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1489 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1490 { 1491 Mat_MPIDense *a; 1492 PetscErrorCode ierr; 1493 1494 PetscFunctionBegin; 1495 mat->preallocated = PETSC_TRUE; 1496 /* Note: For now, when data is specified above, this assumes the user correctly 1497 allocates the local dense storage space. We should add error checking. */ 1498 1499 a = (Mat_MPIDense*)mat->data; 1500 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1501 ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr); 1502 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1503 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1504 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1505 PetscFunctionReturn(0); 1506 } 1507 EXTERN_C_END 1508 1509 EXTERN_C_BEGIN 1510 #if defined(PETSC_HAVE_PLAPACK) 1511 #undef __FUNCT__ 1512 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK" 1513 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type) 1514 { 1515 PetscFunctionBegin; 1516 /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */ 1517 PetscFunctionReturn(0); 1518 } 1519 #endif 1520 EXTERN_C_END 1521 1522 /*MC 1523 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1524 1525 Options Database Keys: 1526 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1527 1528 Level: beginner 1529 1530 MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR) 1531 for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed 1532 (run config/configure.py with the option --download-plapack) 1533 1534 1535 Options Database Keys: 1536 . -mat_plapack_nprows <n> - number of rows in processor partition 1537 . -mat_plapack_npcols <n> - number of columns in processor partition 1538 . -mat_plapack_nb <n> - block size of template vector 1539 . -mat_plapack_nb_alg <n> - algorithmic block size 1540 - -mat_plapack_ckerror <n> - error checking flag 1541 1542 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE 1543 M*/ 1544 1545 EXTERN_C_BEGIN 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatCreate_MPIDense" 1548 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1549 { 1550 Mat_MPIDense *a; 1551 PetscErrorCode ierr; 1552 1553 PetscFunctionBegin; 1554 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1555 mat->data = (void*)a; 1556 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1557 mat->factor = 0; 1558 mat->mapping = 0; 1559 1560 mat->insertmode = NOT_SET_VALUES; 1561 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1562 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1563 1564 mat->rmap.bs = mat->cmap.bs = 1; 1565 ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr); 1566 ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr); 1567 a->nvec = mat->cmap.n; 1568 1569 /* build cache for off array entries formed */ 1570 a->donotstash = PETSC_FALSE; 1571 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1572 1573 /* stuff used for matrix vector multiply */ 1574 a->lvec = 0; 1575 a->Mvctx = 0; 1576 a->roworiented = PETSC_TRUE; 1577 1578 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1579 "MatGetDiagonalBlock_MPIDense", 1580 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1581 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1582 "MatMPIDenseSetPreallocation_MPIDense", 1583 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1584 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1585 "MatMatMult_MPIAIJ_MPIDense", 1586 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1587 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1588 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1589 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1590 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1591 "MatMatMultNumeric_MPIAIJ_MPIDense", 1592 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1593 #if defined(PETSC_HAVE_PLAPACK) 1594 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C", 1595 "MatSetSolverType_MPIDense_PLAPACK", 1596 MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr); 1597 #endif 1598 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1599 1600 PetscFunctionReturn(0); 1601 } 1602 EXTERN_C_END 1603 1604 /*MC 1605 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1606 1607 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1608 and MATMPIDENSE otherwise. 1609 1610 Options Database Keys: 1611 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1612 1613 Level: beginner 1614 1615 1616 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1617 M*/ 1618 1619 EXTERN_C_BEGIN 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatCreate_Dense" 1622 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1623 { 1624 PetscErrorCode ierr; 1625 PetscMPIInt size; 1626 1627 PetscFunctionBegin; 1628 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1629 if (size == 1) { 1630 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1631 } else { 1632 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1633 } 1634 PetscFunctionReturn(0); 1635 } 1636 EXTERN_C_END 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1640 /*@C 1641 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1642 1643 Not collective 1644 1645 Input Parameters: 1646 . A - the matrix 1647 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1648 to control all matrix memory allocation. 1649 1650 Notes: 1651 The dense format is fully compatible with standard Fortran 77 1652 storage by columns. 1653 1654 The data input variable is intended primarily for Fortran programmers 1655 who wish to allocate their own matrix memory space. Most users should 1656 set data=PETSC_NULL. 1657 1658 Level: intermediate 1659 1660 .keywords: matrix,dense, parallel 1661 1662 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1663 @*/ 1664 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1665 { 1666 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1667 1668 PetscFunctionBegin; 1669 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1670 if (f) { 1671 ierr = (*f)(mat,data);CHKERRQ(ierr); 1672 } 1673 PetscFunctionReturn(0); 1674 } 1675 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatCreateMPIDense" 1678 /*@C 1679 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1680 1681 Collective on MPI_Comm 1682 1683 Input Parameters: 1684 + comm - MPI communicator 1685 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1686 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1687 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1688 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1689 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1690 to control all matrix memory allocation. 1691 1692 Output Parameter: 1693 . A - the matrix 1694 1695 Notes: 1696 The dense format is fully compatible with standard Fortran 77 1697 storage by columns. 1698 1699 The data input variable is intended primarily for Fortran programmers 1700 who wish to allocate their own matrix memory space. Most users should 1701 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1702 1703 The user MUST specify either the local or global matrix dimensions 1704 (possibly both). 1705 1706 Level: intermediate 1707 1708 .keywords: matrix,dense, parallel 1709 1710 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1711 @*/ 1712 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1713 { 1714 PetscErrorCode ierr; 1715 PetscMPIInt size; 1716 1717 PetscFunctionBegin; 1718 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1719 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1720 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1721 if (size > 1) { 1722 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1723 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1724 } else { 1725 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1726 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1727 } 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatDuplicate_MPIDense" 1733 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1734 { 1735 Mat mat; 1736 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 *newmat = 0; 1741 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1742 ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1743 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1744 a = (Mat_MPIDense*)mat->data; 1745 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1746 mat->factor = A->factor; 1747 mat->assembled = PETSC_TRUE; 1748 mat->preallocated = PETSC_TRUE; 1749 1750 mat->rmap.rstart = A->rmap.rstart; 1751 mat->rmap.rend = A->rmap.rend; 1752 a->size = oldmat->size; 1753 a->rank = oldmat->rank; 1754 mat->insertmode = NOT_SET_VALUES; 1755 a->nvec = oldmat->nvec; 1756 a->donotstash = oldmat->donotstash; 1757 1758 ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1759 ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1760 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1761 1762 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1763 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1764 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1765 1766 #if defined(PETSC_HAVE_PLAPACK) 1767 ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr); 1768 #endif 1769 *newmat = mat; 1770 PetscFunctionReturn(0); 1771 } 1772 1773 #include "petscsys.h" 1774 1775 #undef __FUNCT__ 1776 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1777 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1778 { 1779 PetscErrorCode ierr; 1780 PetscMPIInt rank,size; 1781 PetscInt *rowners,i,m,nz,j; 1782 PetscScalar *array,*vals,*vals_ptr; 1783 MPI_Status status; 1784 1785 PetscFunctionBegin; 1786 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1787 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1788 1789 /* determine ownership of all rows */ 1790 m = M/size + ((M % size) > rank); 1791 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1792 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1793 rowners[0] = 0; 1794 for (i=2; i<=size; i++) { 1795 rowners[i] += rowners[i-1]; 1796 } 1797 1798 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1799 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1800 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1801 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1802 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1803 1804 if (!rank) { 1805 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1806 1807 /* read in my part of the matrix numerical values */ 1808 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1809 1810 /* insert into matrix-by row (this is why cannot directly read into array */ 1811 vals_ptr = vals; 1812 for (i=0; i<m; i++) { 1813 for (j=0; j<N; j++) { 1814 array[i + j*m] = *vals_ptr++; 1815 } 1816 } 1817 1818 /* read in other processors and ship out */ 1819 for (i=1; i<size; i++) { 1820 nz = (rowners[i+1] - rowners[i])*N; 1821 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1822 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1823 } 1824 } else { 1825 /* receive numeric values */ 1826 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1827 1828 /* receive message of values*/ 1829 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1830 1831 /* insert into matrix-by row (this is why cannot directly read into array */ 1832 vals_ptr = vals; 1833 for (i=0; i<m; i++) { 1834 for (j=0; j<N; j++) { 1835 array[i + j*m] = *vals_ptr++; 1836 } 1837 } 1838 } 1839 ierr = PetscFree(rowners);CHKERRQ(ierr); 1840 ierr = PetscFree(vals);CHKERRQ(ierr); 1841 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1842 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 1846 #undef __FUNCT__ 1847 #define __FUNCT__ "MatLoad_MPIDense" 1848 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1849 { 1850 Mat A; 1851 PetscScalar *vals,*svals; 1852 MPI_Comm comm = ((PetscObject)viewer)->comm; 1853 MPI_Status status; 1854 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1855 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1856 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1857 PetscInt i,nz,j,rstart,rend; 1858 int fd; 1859 PetscErrorCode ierr; 1860 1861 PetscFunctionBegin; 1862 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1863 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1864 if (!rank) { 1865 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1866 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1867 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1868 } 1869 1870 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1871 M = header[1]; N = header[2]; nz = header[3]; 1872 1873 /* 1874 Handle case where matrix is stored on disk as a dense matrix 1875 */ 1876 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1877 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1878 PetscFunctionReturn(0); 1879 } 1880 1881 /* determine ownership of all rows */ 1882 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1883 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1884 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1885 rowners[0] = 0; 1886 for (i=2; i<=size; i++) { 1887 rowners[i] += rowners[i-1]; 1888 } 1889 rstart = rowners[rank]; 1890 rend = rowners[rank+1]; 1891 1892 /* distribute row lengths to all processors */ 1893 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1894 offlens = ourlens + (rend-rstart); 1895 if (!rank) { 1896 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1897 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1898 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1899 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1900 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1901 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1902 } else { 1903 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1904 } 1905 1906 if (!rank) { 1907 /* calculate the number of nonzeros on each processor */ 1908 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1909 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1910 for (i=0; i<size; i++) { 1911 for (j=rowners[i]; j< rowners[i+1]; j++) { 1912 procsnz[i] += rowlengths[j]; 1913 } 1914 } 1915 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1916 1917 /* determine max buffer needed and allocate it */ 1918 maxnz = 0; 1919 for (i=0; i<size; i++) { 1920 maxnz = PetscMax(maxnz,procsnz[i]); 1921 } 1922 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1923 1924 /* read in my part of the matrix column indices */ 1925 nz = procsnz[0]; 1926 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1927 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1928 1929 /* read in every one elses and ship off */ 1930 for (i=1; i<size; i++) { 1931 nz = procsnz[i]; 1932 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1933 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1934 } 1935 ierr = PetscFree(cols);CHKERRQ(ierr); 1936 } else { 1937 /* determine buffer space needed for message */ 1938 nz = 0; 1939 for (i=0; i<m; i++) { 1940 nz += ourlens[i]; 1941 } 1942 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1943 1944 /* receive message of column indices*/ 1945 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1946 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1947 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1948 } 1949 1950 /* loop over local rows, determining number of off diagonal entries */ 1951 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1952 jj = 0; 1953 for (i=0; i<m; i++) { 1954 for (j=0; j<ourlens[i]; j++) { 1955 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1956 jj++; 1957 } 1958 } 1959 1960 /* create our matrix */ 1961 for (i=0; i<m; i++) { 1962 ourlens[i] -= offlens[i]; 1963 } 1964 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1965 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1966 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1967 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1968 A = *newmat; 1969 for (i=0; i<m; i++) { 1970 ourlens[i] += offlens[i]; 1971 } 1972 1973 if (!rank) { 1974 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1975 1976 /* read in my part of the matrix numerical values */ 1977 nz = procsnz[0]; 1978 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1979 1980 /* insert into matrix */ 1981 jj = rstart; 1982 smycols = mycols; 1983 svals = vals; 1984 for (i=0; i<m; i++) { 1985 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1986 smycols += ourlens[i]; 1987 svals += ourlens[i]; 1988 jj++; 1989 } 1990 1991 /* read in other processors and ship out */ 1992 for (i=1; i<size; i++) { 1993 nz = procsnz[i]; 1994 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1995 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 1996 } 1997 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1998 } else { 1999 /* receive numeric values */ 2000 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2001 2002 /* receive message of values*/ 2003 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2004 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2005 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2006 2007 /* insert into matrix */ 2008 jj = rstart; 2009 smycols = mycols; 2010 svals = vals; 2011 for (i=0; i<m; i++) { 2012 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2013 smycols += ourlens[i]; 2014 svals += ourlens[i]; 2015 jj++; 2016 } 2017 } 2018 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2019 ierr = PetscFree(vals);CHKERRQ(ierr); 2020 ierr = PetscFree(mycols);CHKERRQ(ierr); 2021 ierr = PetscFree(rowners);CHKERRQ(ierr); 2022 2023 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2024 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2025 PetscFunctionReturn(0); 2026 } 2027 2028 #undef __FUNCT__ 2029 #define __FUNCT__ "MatEqual_MPIDense" 2030 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2031 { 2032 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2033 Mat a,b; 2034 PetscTruth flg; 2035 PetscErrorCode ierr; 2036 2037 PetscFunctionBegin; 2038 a = matA->A; 2039 b = matB->A; 2040 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2041 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2042 PetscFunctionReturn(0); 2043 } 2044 2045 #if defined(PETSC_HAVE_PLAPACK) 2046 2047 #undef __FUNCT__ 2048 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2049 /*@C 2050 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2051 Level: developer 2052 2053 .keywords: Petsc, destroy, package, PLAPACK 2054 .seealso: PetscFinalize() 2055 @*/ 2056 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2057 { 2058 PetscErrorCode ierr; 2059 2060 PetscFunctionBegin; 2061 ierr = PLA_Finalize();CHKERRQ(ierr); 2062 PetscFunctionReturn(0); 2063 } 2064 2065 #undef __FUNCT__ 2066 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2067 /*@C 2068 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2069 called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize() 2070 when using static libraries. 2071 2072 Input Parameter: 2073 path - The dynamic library path, or PETSC_NULL 2074 2075 Level: developer 2076 2077 .keywords: Petsc, initialize, package, PLAPACK 2078 .seealso: PetscInitializePackage(), PetscInitialize() 2079 @*/ 2080 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[]) 2081 { 2082 MPI_Comm comm = PETSC_COMM_WORLD; 2083 PetscMPIInt size; 2084 PetscErrorCode ierr; 2085 2086 PetscFunctionBegin; 2087 if (!PLA_Initialized(PETSC_NULL)) { 2088 2089 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2090 Plapack_nprows = 1; 2091 Plapack_npcols = size; 2092 2093 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2094 ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2095 ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2096 #if defined(PETSC_USE_DEBUG) 2097 Plapack_ierror = 3; 2098 #else 2099 Plapack_ierror = 0; 2100 #endif 2101 ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2102 if (Plapack_ierror){ 2103 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2104 } else { 2105 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2106 } 2107 2108 Plapack_nb_alg = 0; 2109 ierr = PetscOptionsInt("-plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2110 if (Plapack_nb_alg) { 2111 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2112 } 2113 PetscOptionsEnd(); 2114 2115 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2116 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2117 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2118 } 2119 PetscFunctionReturn(0); 2120 } 2121 2122 2123 #endif 2124