1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 #endif 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatDenseGetLocalMatrix" 16 /*@ 17 18 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 19 matrix that represents the operator. For sequential matrices it returns itself. 20 21 Input Parameter: 22 . A - the Seq or MPI dense matrix 23 24 Output Parameter: 25 . B - the inner matrix 26 27 Level: intermediate 28 29 @*/ 30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 31 { 32 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 33 PetscErrorCode ierr; 34 PetscTruth flg; 35 36 PetscFunctionBegin; 37 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 38 if (flg) { 39 *B = mat->A; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatGetRow_MPIDense" 48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 49 { 50 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 51 PetscErrorCode ierr; 52 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 53 54 PetscFunctionBegin; 55 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 56 lrow = row - rstart; 57 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRestoreRow_MPIDense" 63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 64 { 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 69 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 70 PetscFunctionReturn(0); 71 } 72 73 EXTERN_C_BEGIN 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 79 PetscErrorCode ierr; 80 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 81 PetscScalar *array; 82 MPI_Comm comm; 83 84 PetscFunctionBegin; 85 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 86 87 /* The reuse aspect is not implemented efficiently */ 88 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 89 90 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 91 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatCreate(comm,B);CHKERRQ(ierr); 93 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 94 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 95 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 96 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 97 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 98 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 99 100 *iscopy = PETSC_TRUE; 101 PetscFunctionReturn(0); 102 } 103 EXTERN_C_END 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIDense" 107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 110 PetscErrorCode ierr; 111 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 112 PetscTruth roworiented = A->roworiented; 113 114 PetscFunctionBegin; 115 for (i=0; i<m; i++) { 116 if (idxm[i] < 0) continue; 117 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 118 if (idxm[i] >= rstart && idxm[i] < rend) { 119 row = idxm[i] - rstart; 120 if (roworiented) { 121 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 122 } else { 123 for (j=0; j<n; j++) { 124 if (idxn[j] < 0) continue; 125 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 126 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 127 } 128 } 129 } else { 130 if (!A->donotstash) { 131 if (roworiented) { 132 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 133 } else { 134 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 135 } 136 } 137 } 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetValues_MPIDense" 144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 145 { 146 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 147 PetscErrorCode ierr; 148 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 149 150 PetscFunctionBegin; 151 for (i=0; i<m; i++) { 152 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 153 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 154 if (idxm[i] >= rstart && idxm[i] < rend) { 155 row = idxm[i] - rstart; 156 for (j=0; j<n; j++) { 157 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 158 if (idxn[j] >= mat->cmap->N) { 159 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 160 } 161 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 162 } 163 } else { 164 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 165 } 166 } 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatGetArray_MPIDense" 172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 173 { 174 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 175 PetscErrorCode ierr; 176 177 PetscFunctionBegin; 178 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 185 { 186 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 187 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 188 PetscErrorCode ierr; 189 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 190 const PetscInt *irow,*icol; 191 PetscScalar *av,*bv,*v = lmat->v; 192 Mat newmat; 193 IS iscol_local; 194 195 PetscFunctionBegin; 196 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 197 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 198 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 199 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 200 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 201 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 202 203 /* No parallel redistribution currently supported! Should really check each index set 204 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 205 original matrix! */ 206 207 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 208 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 209 210 /* Check submatrix call */ 211 if (scall == MAT_REUSE_MATRIX) { 212 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 213 /* Really need to test rows and column sizes! */ 214 newmat = *B; 215 } else { 216 /* Create and fill new matrix */ 217 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 218 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 219 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 220 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 221 } 222 223 /* Now extract the data pointers and do the copy, column at a time */ 224 newmatd = (Mat_MPIDense*)newmat->data; 225 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 226 227 for (i=0; i<Ncols; i++) { 228 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 229 for (j=0; j<nrows; j++) { 230 *bv++ = av[irow[j] - rstart]; 231 } 232 } 233 234 /* Assemble the matrices so that the correct flags are set */ 235 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 236 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 237 238 /* Free work space */ 239 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 240 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 241 *B = newmat; 242 PetscFunctionReturn(0); 243 } 244 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatRestoreArray_MPIDense" 247 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 248 { 249 PetscFunctionBegin; 250 PetscFunctionReturn(0); 251 } 252 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 255 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 256 { 257 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 258 MPI_Comm comm = ((PetscObject)mat)->comm; 259 PetscErrorCode ierr; 260 PetscInt nstash,reallocs; 261 InsertMode addv; 262 263 PetscFunctionBegin; 264 /* make sure all processors are either in INSERTMODE or ADDMODE */ 265 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 266 if (addv == (ADD_VALUES|INSERT_VALUES)) { 267 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 268 } 269 mat->insertmode = addv; /* in case this processor had no cache */ 270 271 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 272 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 273 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 279 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 280 { 281 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 282 PetscErrorCode ierr; 283 PetscInt i,*row,*col,flg,j,rstart,ncols; 284 PetscMPIInt n; 285 PetscScalar *val; 286 InsertMode addv=mat->insertmode; 287 288 PetscFunctionBegin; 289 /* wait on receives */ 290 while (1) { 291 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 292 if (!flg) break; 293 294 for (i=0; i<n;) { 295 /* Now identify the consecutive vals belonging to the same row */ 296 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 297 if (j < n) ncols = j-i; 298 else ncols = n-i; 299 /* Now assemble all these values with a single function call */ 300 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 301 i = j; 302 } 303 } 304 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 305 306 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 307 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 308 309 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 310 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatZeroEntries_MPIDense" 317 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 318 { 319 PetscErrorCode ierr; 320 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 321 322 PetscFunctionBegin; 323 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 /* the code does not do the diagonal entries correctly unless the 328 matrix is square and the column and row owerships are identical. 329 This is a BUG. The only way to fix it seems to be to access 330 mdn->A and mdn->B directly and not through the MatZeroRows() 331 routine. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatZeroRows_MPIDense" 335 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 336 { 337 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 338 PetscErrorCode ierr; 339 PetscInt i,*owners = A->rmap->range; 340 PetscInt *nprocs,j,idx,nsends; 341 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 342 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 343 PetscInt *lens,*lrows,*values; 344 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 345 MPI_Comm comm = ((PetscObject)A)->comm; 346 MPI_Request *send_waits,*recv_waits; 347 MPI_Status recv_status,*send_status; 348 PetscTruth found; 349 350 PetscFunctionBegin; 351 /* first count number of contributors to each processor */ 352 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 353 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 354 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 355 for (i=0; i<N; i++) { 356 idx = rows[i]; 357 found = PETSC_FALSE; 358 for (j=0; j<size; j++) { 359 if (idx >= owners[j] && idx < owners[j+1]) { 360 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 361 } 362 } 363 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 364 } 365 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 366 367 /* inform other processors of number of messages and max length*/ 368 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 369 370 /* post receives: */ 371 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 372 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 373 for (i=0; i<nrecvs; i++) { 374 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 375 } 376 377 /* do sends: 378 1) starts[i] gives the starting index in svalues for stuff going to 379 the ith processor 380 */ 381 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 382 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 383 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 384 starts[0] = 0; 385 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 386 for (i=0; i<N; i++) { 387 svalues[starts[owner[i]]++] = rows[i]; 388 } 389 390 starts[0] = 0; 391 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 392 count = 0; 393 for (i=0; i<size; i++) { 394 if (nprocs[2*i+1]) { 395 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 396 } 397 } 398 ierr = PetscFree(starts);CHKERRQ(ierr); 399 400 base = owners[rank]; 401 402 /* wait on receives */ 403 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 404 source = lens + nrecvs; 405 count = nrecvs; slen = 0; 406 while (count) { 407 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 408 /* unpack receives into our local space */ 409 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 410 source[imdex] = recv_status.MPI_SOURCE; 411 lens[imdex] = n; 412 slen += n; 413 count--; 414 } 415 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 416 417 /* move the data into the send scatter */ 418 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 419 count = 0; 420 for (i=0; i<nrecvs; i++) { 421 values = rvalues + i*nmax; 422 for (j=0; j<lens[i]; j++) { 423 lrows[count++] = values[j] - base; 424 } 425 } 426 ierr = PetscFree(rvalues);CHKERRQ(ierr); 427 ierr = PetscFree(lens);CHKERRQ(ierr); 428 ierr = PetscFree(owner);CHKERRQ(ierr); 429 ierr = PetscFree(nprocs);CHKERRQ(ierr); 430 431 /* actually zap the local rows */ 432 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 433 ierr = PetscFree(lrows);CHKERRQ(ierr); 434 435 /* wait on sends */ 436 if (nsends) { 437 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 438 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 439 ierr = PetscFree(send_status);CHKERRQ(ierr); 440 } 441 ierr = PetscFree(send_waits);CHKERRQ(ierr); 442 ierr = PetscFree(svalues);CHKERRQ(ierr); 443 444 PetscFunctionReturn(0); 445 } 446 447 #undef __FUNCT__ 448 #define __FUNCT__ "MatMult_MPIDense" 449 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 450 { 451 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 452 PetscErrorCode ierr; 453 454 PetscFunctionBegin; 455 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 456 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 457 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatMultAdd_MPIDense" 463 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 464 { 465 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 466 PetscErrorCode ierr; 467 468 PetscFunctionBegin; 469 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 470 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatMultTranspose_MPIDense" 477 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 478 { 479 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 480 PetscErrorCode ierr; 481 PetscScalar zero = 0.0; 482 483 PetscFunctionBegin; 484 ierr = VecSet(yy,zero);CHKERRQ(ierr); 485 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 486 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 487 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 493 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 494 { 495 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 496 PetscErrorCode ierr; 497 498 PetscFunctionBegin; 499 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 500 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 501 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 502 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatGetDiagonal_MPIDense" 508 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 509 { 510 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 511 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 512 PetscErrorCode ierr; 513 PetscInt len,i,n,m = A->rmap->n,radd; 514 PetscScalar *x,zero = 0.0; 515 516 PetscFunctionBegin; 517 ierr = VecSet(v,zero);CHKERRQ(ierr); 518 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 519 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 520 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 521 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 522 radd = A->rmap->rstart*m; 523 for (i=0; i<len; i++) { 524 x[i] = aloc->v[radd + i*m + i]; 525 } 526 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 527 PetscFunctionReturn(0); 528 } 529 530 #undef __FUNCT__ 531 #define __FUNCT__ "MatDestroy_MPIDense" 532 PetscErrorCode MatDestroy_MPIDense(Mat mat) 533 { 534 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 535 PetscErrorCode ierr; 536 #if defined(PETSC_HAVE_PLAPACK) 537 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 538 #endif 539 540 PetscFunctionBegin; 541 542 #if defined(PETSC_USE_LOG) 543 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 544 #endif 545 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 546 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 547 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 548 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 549 #if defined(PETSC_HAVE_PLAPACK) 550 if (lu) { 551 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 552 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 553 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 554 555 if (lu->is_pla) { 556 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 557 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 558 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 559 } 560 } 561 #endif 562 563 ierr = PetscFree(mdn);CHKERRQ(ierr); 564 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 566 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 568 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "MatView_MPIDense_Binary" 575 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 576 { 577 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 578 PetscErrorCode ierr; 579 PetscViewerFormat format; 580 int fd; 581 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 582 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 583 PetscScalar *work,*v,*vv; 584 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 585 MPI_Status status; 586 587 PetscFunctionBegin; 588 if (mdn->size == 1) { 589 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 590 } else { 591 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 592 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 593 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 594 595 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 596 if (format == PETSC_VIEWER_NATIVE) { 597 598 if (!rank) { 599 /* store the matrix as a dense matrix */ 600 header[0] = MAT_FILE_COOKIE; 601 header[1] = mat->rmap->N; 602 header[2] = N; 603 header[3] = MATRIX_BINARY_FORMAT_DENSE; 604 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 605 606 /* get largest work array needed for transposing array */ 607 mmax = mat->rmap->n; 608 for (i=1; i<size; i++) { 609 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 610 } 611 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 612 613 /* write out local array, by rows */ 614 m = mat->rmap->n; 615 v = a->v; 616 for (j=0; j<N; j++) { 617 for (i=0; i<m; i++) { 618 work[j + i*N] = *v++; 619 } 620 } 621 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 622 /* get largest work array to receive messages from other processes, excludes process zero */ 623 mmax = 0; 624 for (i=1; i<size; i++) { 625 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 626 } 627 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 628 for(k = 1; k < size; k++) { 629 v = vv; 630 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 631 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 632 633 for(j = 0; j < N; j++) { 634 for(i = 0; i < m; i++) { 635 work[j + i*N] = *v++; 636 } 637 } 638 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(work);CHKERRQ(ierr); 641 ierr = PetscFree(vv);CHKERRQ(ierr); 642 } else { 643 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 644 } 645 } else { 646 SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE"); 647 } 648 } 649 PetscFunctionReturn(0); 650 } 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 654 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 655 { 656 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 657 PetscErrorCode ierr; 658 PetscMPIInt size = mdn->size,rank = mdn->rank; 659 const PetscViewerType vtype; 660 PetscTruth iascii,isdraw; 661 PetscViewer sviewer; 662 PetscViewerFormat format; 663 #if defined(PETSC_HAVE_PLAPACK) 664 Mat_Plapack *lu; 665 #endif 666 667 PetscFunctionBegin; 668 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 669 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 670 if (iascii) { 671 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 672 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 673 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 674 MatInfo info; 675 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 676 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 677 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 678 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 679 #if defined(PETSC_HAVE_PLAPACK) 680 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 681 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 682 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 683 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 684 if (mat->factor){ 685 lu=(Mat_Plapack*)(mat->spptr); 686 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 687 } 688 #else 689 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 690 #endif 691 PetscFunctionReturn(0); 692 } else if (format == PETSC_VIEWER_ASCII_INFO) { 693 PetscFunctionReturn(0); 694 } 695 } else if (isdraw) { 696 PetscDraw draw; 697 PetscTruth isnull; 698 699 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 700 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 701 if (isnull) PetscFunctionReturn(0); 702 } 703 704 if (size == 1) { 705 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 706 } else { 707 /* assemble the entire matrix onto first processor. */ 708 Mat A; 709 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 710 PetscInt *cols; 711 PetscScalar *vals; 712 713 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 714 if (!rank) { 715 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 716 } else { 717 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 718 } 719 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 720 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 721 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 722 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 723 724 /* Copy the matrix ... This isn't the most efficient means, 725 but it's quick for now */ 726 A->insertmode = INSERT_VALUES; 727 row = mat->rmap->rstart; m = mdn->A->rmap->n; 728 for (i=0; i<m; i++) { 729 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 730 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 731 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 732 row++; 733 } 734 735 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 736 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 737 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 738 if (!rank) { 739 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 740 } 741 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 742 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 743 ierr = MatDestroy(A);CHKERRQ(ierr); 744 } 745 PetscFunctionReturn(0); 746 } 747 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatView_MPIDense" 750 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 751 { 752 PetscErrorCode ierr; 753 PetscTruth iascii,isbinary,isdraw,issocket; 754 755 PetscFunctionBegin; 756 757 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 758 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 759 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 760 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 761 762 if (iascii || issocket || isdraw) { 763 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 764 } else if (isbinary) { 765 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 766 } else { 767 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 768 } 769 PetscFunctionReturn(0); 770 } 771 772 #undef __FUNCT__ 773 #define __FUNCT__ "MatGetInfo_MPIDense" 774 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 775 { 776 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 777 Mat mdn = mat->A; 778 PetscErrorCode ierr; 779 PetscReal isend[5],irecv[5]; 780 781 PetscFunctionBegin; 782 info->block_size = 1.0; 783 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 784 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 785 isend[3] = info->memory; isend[4] = info->mallocs; 786 if (flag == MAT_LOCAL) { 787 info->nz_used = isend[0]; 788 info->nz_allocated = isend[1]; 789 info->nz_unneeded = isend[2]; 790 info->memory = isend[3]; 791 info->mallocs = isend[4]; 792 } else if (flag == MAT_GLOBAL_MAX) { 793 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 794 info->nz_used = irecv[0]; 795 info->nz_allocated = irecv[1]; 796 info->nz_unneeded = irecv[2]; 797 info->memory = irecv[3]; 798 info->mallocs = irecv[4]; 799 } else if (flag == MAT_GLOBAL_SUM) { 800 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 801 info->nz_used = irecv[0]; 802 info->nz_allocated = irecv[1]; 803 info->nz_unneeded = irecv[2]; 804 info->memory = irecv[3]; 805 info->mallocs = irecv[4]; 806 } 807 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 808 info->fill_ratio_needed = 0; 809 info->factor_mallocs = 0; 810 PetscFunctionReturn(0); 811 } 812 813 #undef __FUNCT__ 814 #define __FUNCT__ "MatSetOption_MPIDense" 815 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 816 { 817 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 818 PetscErrorCode ierr; 819 820 PetscFunctionBegin; 821 switch (op) { 822 case MAT_NEW_NONZERO_LOCATIONS: 823 case MAT_NEW_NONZERO_LOCATION_ERR: 824 case MAT_NEW_NONZERO_ALLOCATION_ERR: 825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 826 break; 827 case MAT_ROW_ORIENTED: 828 a->roworiented = flg; 829 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 830 break; 831 case MAT_NEW_DIAGONALS: 832 case MAT_USE_HASH_TABLE: 833 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 834 break; 835 case MAT_IGNORE_OFF_PROC_ENTRIES: 836 a->donotstash = flg; 837 break; 838 case MAT_SYMMETRIC: 839 case MAT_STRUCTURALLY_SYMMETRIC: 840 case MAT_HERMITIAN: 841 case MAT_SYMMETRY_ETERNAL: 842 case MAT_IGNORE_LOWER_TRIANGULAR: 843 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 844 break; 845 default: 846 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatDiagonalScale_MPIDense" 854 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 855 { 856 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 857 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 858 PetscScalar *l,*r,x,*v; 859 PetscErrorCode ierr; 860 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 861 862 PetscFunctionBegin; 863 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 864 if (ll) { 865 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 866 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 867 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 868 for (i=0; i<m; i++) { 869 x = l[i]; 870 v = mat->v + i; 871 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 872 } 873 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 874 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 875 } 876 if (rr) { 877 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 878 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 879 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 882 for (i=0; i<n; i++) { 883 x = r[i]; 884 v = mat->v + i*m; 885 for (j=0; j<m; j++) { (*v++) *= x;} 886 } 887 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 888 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 889 } 890 PetscFunctionReturn(0); 891 } 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatNorm_MPIDense" 895 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 896 { 897 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 898 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 899 PetscErrorCode ierr; 900 PetscInt i,j; 901 PetscReal sum = 0.0; 902 PetscScalar *v = mat->v; 903 904 PetscFunctionBegin; 905 if (mdn->size == 1) { 906 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 907 } else { 908 if (type == NORM_FROBENIUS) { 909 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 910 #if defined(PETSC_USE_COMPLEX) 911 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 912 #else 913 sum += (*v)*(*v); v++; 914 #endif 915 } 916 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 917 *nrm = sqrt(*nrm); 918 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 919 } else if (type == NORM_1) { 920 PetscReal *tmp,*tmp2; 921 ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 922 tmp2 = tmp + A->cmap->N; 923 ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 924 *nrm = 0.0; 925 v = mat->v; 926 for (j=0; j<mdn->A->cmap->n; j++) { 927 for (i=0; i<mdn->A->rmap->n; i++) { 928 tmp[j] += PetscAbsScalar(*v); v++; 929 } 930 } 931 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 932 for (j=0; j<A->cmap->N; j++) { 933 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 934 } 935 ierr = PetscFree(tmp);CHKERRQ(ierr); 936 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 937 } else if (type == NORM_INFINITY) { /* max row norm */ 938 PetscReal ntemp; 939 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 940 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 941 } else { 942 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 943 } 944 } 945 PetscFunctionReturn(0); 946 } 947 948 #undef __FUNCT__ 949 #define __FUNCT__ "MatTranspose_MPIDense" 950 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 951 { 952 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 953 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 954 Mat B; 955 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 956 PetscErrorCode ierr; 957 PetscInt j,i; 958 PetscScalar *v; 959 960 PetscFunctionBegin; 961 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 962 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 963 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 964 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 965 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 966 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 967 } else { 968 B = *matout; 969 } 970 971 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 972 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 973 for (i=0; i<m; i++) rwork[i] = rstart + i; 974 for (j=0; j<n; j++) { 975 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 976 v += m; 977 } 978 ierr = PetscFree(rwork);CHKERRQ(ierr); 979 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 980 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 982 *matout = B; 983 } else { 984 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 985 } 986 PetscFunctionReturn(0); 987 } 988 989 #include "petscblaslapack.h" 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatScale_MPIDense" 992 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 993 { 994 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 995 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 996 PetscScalar oalpha = alpha; 997 PetscErrorCode ierr; 998 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N); 999 1000 PetscFunctionBegin; 1001 BLASscal_(&nz,&oalpha,a->v,&one); 1002 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 1003 PetscFunctionReturn(0); 1004 } 1005 1006 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1010 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1011 { 1012 PetscErrorCode ierr; 1013 1014 PetscFunctionBegin; 1015 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #if defined(PETSC_HAVE_PLAPACK) 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1023 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1024 { 1025 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1026 PetscErrorCode ierr; 1027 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1028 PetscScalar *array; 1029 PetscReal one = 1.0; 1030 1031 PetscFunctionBegin; 1032 /* Copy A into F->lu->A */ 1033 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1034 ierr = PLA_API_begin();CHKERRQ(ierr); 1035 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1036 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1037 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1038 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1039 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1040 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1041 ierr = PLA_API_end();CHKERRQ(ierr); 1042 lu->rstart = rstart; 1043 PetscFunctionReturn(0); 1044 } 1045 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1048 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1049 { 1050 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1051 PetscErrorCode ierr; 1052 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1053 PetscScalar *array; 1054 PetscReal one = 1.0; 1055 1056 PetscFunctionBegin; 1057 /* Copy F into A->lu->A */ 1058 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1059 ierr = PLA_API_begin();CHKERRQ(ierr); 1060 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1061 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1062 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1063 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1064 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1065 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1066 ierr = PLA_API_end();CHKERRQ(ierr); 1067 lu->rstart = rstart; 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1073 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1074 { 1075 PetscErrorCode ierr; 1076 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1077 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1078 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1079 PLA_Obj alpha = NULL,beta = NULL; 1080 1081 PetscFunctionBegin; 1082 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1083 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1084 1085 /* 1086 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1087 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1088 */ 1089 1090 /* do the multiply in PLA */ 1091 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1092 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1093 CHKMEMQ; 1094 1095 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1096 CHKMEMQ; 1097 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1098 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1099 1100 /* 1101 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1102 */ 1103 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1109 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1110 { 1111 PetscErrorCode ierr; 1112 PetscInt m=A->rmap->n,n=B->cmap->n; 1113 Mat Cmat; 1114 1115 PetscFunctionBegin; 1116 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1117 SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported"); 1118 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1119 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1120 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1121 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1122 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1123 1124 *C = Cmat; 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1130 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1131 { 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 if (scall == MAT_INITIAL_MATRIX){ 1136 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1137 } 1138 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNCT__ 1143 #define __FUNCT__ "MatSolve_MPIDense" 1144 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1145 { 1146 MPI_Comm comm = ((PetscObject)A)->comm; 1147 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1148 PetscErrorCode ierr; 1149 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1150 PetscScalar *array; 1151 PetscReal one = 1.0; 1152 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1153 PLA_Obj v_pla = NULL; 1154 PetscScalar *loc_buf; 1155 Vec loc_x; 1156 1157 PetscFunctionBegin; 1158 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1159 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1160 1161 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1162 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1163 PLA_Obj_set_to_zero(v_pla); 1164 1165 /* Copy b into rhs_pla */ 1166 PLA_API_begin(); 1167 PLA_Obj_API_open(v_pla); 1168 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1169 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1170 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1171 PLA_Obj_API_close(v_pla); 1172 PLA_API_end(); 1173 1174 if (A->factor == MAT_FACTOR_LU){ 1175 /* Apply the permutations to the right hand sides */ 1176 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1177 1178 /* Solve L y = b, overwriting b with y */ 1179 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1180 1181 /* Solve U x = y (=b), overwriting b with x */ 1182 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1183 } else { /* MAT_FACTOR_CHOLESKY */ 1184 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1185 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1186 PLA_NONUNIT_DIAG,lu->A,v_pla); 1187 } 1188 1189 /* Copy PLAPACK x into Petsc vector x */ 1190 PLA_Obj_local_length(v_pla, &loc_m); 1191 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1192 PLA_Obj_local_stride(v_pla, &loc_stride); 1193 /* 1194 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1195 */ 1196 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1197 if (!lu->pla_solved){ 1198 1199 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1200 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1201 1202 /* Create IS and cts for VecScatterring */ 1203 PLA_Obj_local_length(v_pla, &loc_m); 1204 PLA_Obj_local_stride(v_pla, &loc_stride); 1205 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1206 idx_petsc = idx_pla + loc_m; 1207 1208 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1209 for (i=0; i<loc_m; i+=lu->nb){ 1210 j = 0; 1211 while (j < lu->nb && i+j < loc_m){ 1212 idx_petsc[i+j] = rstart + j; j++; 1213 } 1214 rstart += size*lu->nb; 1215 } 1216 1217 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1218 1219 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1220 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1221 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1222 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1223 } 1224 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1225 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1226 1227 /* Free data */ 1228 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1229 PLA_Obj_free(&v_pla); 1230 1231 lu->pla_solved = PETSC_TRUE; 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1237 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1238 { 1239 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1240 PetscErrorCode ierr; 1241 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1242 PetscInt info_pla=0; 1243 PetscScalar *array,one = 1.0; 1244 1245 PetscFunctionBegin; 1246 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1247 PLA_Obj_free(&lu->A); 1248 PLA_Obj_free (&lu->pivots); 1249 } 1250 /* Create PLAPACK matrix object */ 1251 lu->A = NULL; lu->pivots = NULL; 1252 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1253 PLA_Obj_set_to_zero(lu->A); 1254 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1255 1256 /* Copy A into lu->A */ 1257 PLA_API_begin(); 1258 PLA_Obj_API_open(lu->A); 1259 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1260 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1261 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1262 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1263 PLA_Obj_API_close(lu->A); 1264 PLA_API_end(); 1265 1266 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1267 info_pla = PLA_LU(lu->A,lu->pivots); 1268 if (info_pla != 0) 1269 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1270 1271 lu->rstart = rstart; 1272 lu->mstruct = SAME_NONZERO_PATTERN; 1273 F->ops->solve = MatSolve_MPIDense; 1274 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1280 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1281 { 1282 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1283 PetscErrorCode ierr; 1284 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1285 PetscInt info_pla=0; 1286 PetscScalar *array,one = 1.0; 1287 1288 PetscFunctionBegin; 1289 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1290 PLA_Obj_free(&lu->A); 1291 } 1292 /* Create PLAPACK matrix object */ 1293 lu->A = NULL; 1294 lu->pivots = NULL; 1295 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1296 1297 /* Copy A into lu->A */ 1298 PLA_API_begin(); 1299 PLA_Obj_API_open(lu->A); 1300 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1301 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1302 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1303 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1304 PLA_Obj_API_close(lu->A); 1305 PLA_API_end(); 1306 1307 /* Factor P A -> Chol */ 1308 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1309 if (info_pla != 0) 1310 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1311 1312 lu->rstart = rstart; 1313 lu->mstruct = SAME_NONZERO_PATTERN; 1314 F->ops->solve = MatSolve_MPIDense; 1315 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1316 PetscFunctionReturn(0); 1317 } 1318 1319 /* Note the Petsc perm permutation is ignored */ 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1322 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1323 { 1324 PetscErrorCode ierr; 1325 PetscTruth issymmetric,set; 1326 1327 PetscFunctionBegin; 1328 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1329 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1330 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1331 PetscFunctionReturn(0); 1332 } 1333 1334 /* Note the Petsc r and c permutations are ignored */ 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1337 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1338 { 1339 PetscErrorCode ierr; 1340 PetscInt M = A->rmap->N; 1341 Mat_Plapack *lu; 1342 1343 PetscFunctionBegin; 1344 lu = (Mat_Plapack*)F->spptr; 1345 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1346 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1347 PetscFunctionReturn(0); 1348 } 1349 1350 EXTERN_C_BEGIN 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1353 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1354 { 1355 PetscFunctionBegin; 1356 *type = MAT_SOLVER_PLAPACK; 1357 PetscFunctionReturn(0); 1358 } 1359 EXTERN_C_END 1360 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1363 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1364 { 1365 PetscErrorCode ierr; 1366 Mat_Plapack *lu; 1367 PetscMPIInt size; 1368 PetscInt M=A->rmap->N; 1369 1370 PetscFunctionBegin; 1371 /* Create the factorization matrix */ 1372 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1373 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1374 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1375 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1376 (*F)->spptr = (void*)lu; 1377 1378 /* Set default Plapack parameters */ 1379 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1380 lu->nb = M/size; 1381 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1382 1383 /* Set runtime options */ 1384 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1385 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1386 PetscOptionsEnd(); 1387 1388 /* Create object distribution template */ 1389 lu->templ = NULL; 1390 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1391 1392 /* Set the datatype */ 1393 #if defined(PETSC_USE_COMPLEX) 1394 lu->datatype = MPI_DOUBLE_COMPLEX; 1395 #else 1396 lu->datatype = MPI_DOUBLE; 1397 #endif 1398 1399 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1400 1401 1402 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1403 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1404 1405 if (ftype == MAT_FACTOR_LU) { 1406 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1407 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1408 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1409 } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1410 (*F)->factor = ftype; 1411 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 #endif 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatAXPY_MPIDense" 1418 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1419 { 1420 PetscErrorCode ierr; 1421 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1422 1423 PetscFunctionBegin; 1424 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 /* -------------------------------------------------------------------*/ 1429 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1430 MatGetRow_MPIDense, 1431 MatRestoreRow_MPIDense, 1432 MatMult_MPIDense, 1433 /* 4*/ MatMultAdd_MPIDense, 1434 MatMultTranspose_MPIDense, 1435 MatMultTransposeAdd_MPIDense, 1436 0, 1437 0, 1438 0, 1439 /*10*/ 0, 1440 0, 1441 0, 1442 0, 1443 MatTranspose_MPIDense, 1444 /*15*/ MatGetInfo_MPIDense, 1445 MatEqual_MPIDense, 1446 MatGetDiagonal_MPIDense, 1447 MatDiagonalScale_MPIDense, 1448 MatNorm_MPIDense, 1449 /*20*/ MatAssemblyBegin_MPIDense, 1450 MatAssemblyEnd_MPIDense, 1451 MatSetOption_MPIDense, 1452 MatZeroEntries_MPIDense, 1453 /*24*/ MatZeroRows_MPIDense, 1454 0, 1455 0, 1456 0, 1457 0, 1458 /*29*/ MatSetUpPreallocation_MPIDense, 1459 0, 1460 0, 1461 MatGetArray_MPIDense, 1462 MatRestoreArray_MPIDense, 1463 /*34*/ MatDuplicate_MPIDense, 1464 0, 1465 0, 1466 0, 1467 0, 1468 /*39*/ MatAXPY_MPIDense, 1469 MatGetSubMatrices_MPIDense, 1470 0, 1471 MatGetValues_MPIDense, 1472 0, 1473 /*44*/ 0, 1474 MatScale_MPIDense, 1475 0, 1476 0, 1477 0, 1478 /*49*/ 0, 1479 0, 1480 0, 1481 0, 1482 0, 1483 /*54*/ 0, 1484 0, 1485 0, 1486 0, 1487 0, 1488 /*59*/ MatGetSubMatrix_MPIDense, 1489 MatDestroy_MPIDense, 1490 MatView_MPIDense, 1491 0, 1492 0, 1493 /*64*/ 0, 1494 0, 1495 0, 1496 0, 1497 0, 1498 /*69*/ 0, 1499 0, 1500 0, 1501 0, 1502 0, 1503 /*74*/ 0, 1504 0, 1505 0, 1506 0, 1507 0, 1508 /*79*/ 0, 1509 0, 1510 0, 1511 0, 1512 /*83*/ MatLoad_MPIDense, 1513 0, 1514 0, 1515 0, 1516 0, 1517 0, 1518 /*89*/ 1519 #if defined(PETSC_HAVE_PLAPACK) 1520 MatMatMult_MPIDense_MPIDense, 1521 MatMatMultSymbolic_MPIDense_MPIDense, 1522 MatMatMultNumeric_MPIDense_MPIDense, 1523 #else 1524 0, 1525 0, 1526 0, 1527 #endif 1528 0, 1529 /*94*/ 0, 1530 0, 1531 0, 1532 0}; 1533 1534 EXTERN_C_BEGIN 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1537 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1538 { 1539 Mat_MPIDense *a; 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 mat->preallocated = PETSC_TRUE; 1544 /* Note: For now, when data is specified above, this assumes the user correctly 1545 allocates the local dense storage space. We should add error checking. */ 1546 1547 a = (Mat_MPIDense*)mat->data; 1548 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1549 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1550 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1551 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1552 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 EXTERN_C_END 1556 1557 /*MC 1558 MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1559 1560 run config/configure.py with the option --download-plapack 1561 1562 1563 Options Database Keys: 1564 . -mat_plapack_nprows <n> - number of rows in processor partition 1565 . -mat_plapack_npcols <n> - number of columns in processor partition 1566 . -mat_plapack_nb <n> - block size of template vector 1567 . -mat_plapack_nb_alg <n> - algorithmic block size 1568 - -mat_plapack_ckerror <n> - error checking flag 1569 1570 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1571 1572 M*/ 1573 1574 EXTERN_C_BEGIN 1575 #undef __FUNCT__ 1576 #define __FUNCT__ "MatCreate_MPIDense" 1577 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1578 { 1579 Mat_MPIDense *a; 1580 PetscErrorCode ierr; 1581 1582 PetscFunctionBegin; 1583 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1584 mat->data = (void*)a; 1585 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1586 mat->mapping = 0; 1587 1588 mat->insertmode = NOT_SET_VALUES; 1589 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1590 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1591 1592 ierr = PetscMapSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1593 ierr = PetscMapSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1594 ierr = PetscMapSetUp(mat->rmap);CHKERRQ(ierr); 1595 ierr = PetscMapSetUp(mat->cmap);CHKERRQ(ierr); 1596 a->nvec = mat->cmap->n; 1597 1598 /* build cache for off array entries formed */ 1599 a->donotstash = PETSC_FALSE; 1600 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1601 1602 /* stuff used for matrix vector multiply */ 1603 a->lvec = 0; 1604 a->Mvctx = 0; 1605 a->roworiented = PETSC_TRUE; 1606 1607 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1608 "MatGetDiagonalBlock_MPIDense", 1609 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1610 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1611 "MatMPIDenseSetPreallocation_MPIDense", 1612 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1613 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1614 "MatMatMult_MPIAIJ_MPIDense", 1615 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1617 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1618 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1620 "MatMatMultNumeric_MPIAIJ_MPIDense", 1621 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1622 #if defined(PETSC_HAVE_PLAPACK) 1623 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_plapack_C", 1624 "MatGetFactor_mpidense_plapack", 1625 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1626 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1627 #endif 1628 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1629 1630 PetscFunctionReturn(0); 1631 } 1632 EXTERN_C_END 1633 1634 /*MC 1635 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1636 1637 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1638 and MATMPIDENSE otherwise. 1639 1640 Options Database Keys: 1641 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1642 1643 Level: beginner 1644 1645 1646 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1647 M*/ 1648 1649 EXTERN_C_BEGIN 1650 #undef __FUNCT__ 1651 #define __FUNCT__ "MatCreate_Dense" 1652 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1653 { 1654 PetscErrorCode ierr; 1655 PetscMPIInt size; 1656 1657 PetscFunctionBegin; 1658 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1659 if (size == 1) { 1660 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1661 } else { 1662 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1663 } 1664 PetscFunctionReturn(0); 1665 } 1666 EXTERN_C_END 1667 1668 #undef __FUNCT__ 1669 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1670 /*@C 1671 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1672 1673 Not collective 1674 1675 Input Parameters: 1676 . A - the matrix 1677 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1678 to control all matrix memory allocation. 1679 1680 Notes: 1681 The dense format is fully compatible with standard Fortran 77 1682 storage by columns. 1683 1684 The data input variable is intended primarily for Fortran programmers 1685 who wish to allocate their own matrix memory space. Most users should 1686 set data=PETSC_NULL. 1687 1688 Level: intermediate 1689 1690 .keywords: matrix,dense, parallel 1691 1692 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1693 @*/ 1694 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1695 { 1696 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1697 1698 PetscFunctionBegin; 1699 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1700 if (f) { 1701 ierr = (*f)(mat,data);CHKERRQ(ierr); 1702 } 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatCreateMPIDense" 1708 /*@C 1709 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1710 1711 Collective on MPI_Comm 1712 1713 Input Parameters: 1714 + comm - MPI communicator 1715 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1716 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1717 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1718 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1719 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1720 to control all matrix memory allocation. 1721 1722 Output Parameter: 1723 . A - the matrix 1724 1725 Notes: 1726 The dense format is fully compatible with standard Fortran 77 1727 storage by columns. 1728 1729 The data input variable is intended primarily for Fortran programmers 1730 who wish to allocate their own matrix memory space. Most users should 1731 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1732 1733 The user MUST specify either the local or global matrix dimensions 1734 (possibly both). 1735 1736 Level: intermediate 1737 1738 .keywords: matrix,dense, parallel 1739 1740 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1741 @*/ 1742 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1743 { 1744 PetscErrorCode ierr; 1745 PetscMPIInt size; 1746 1747 PetscFunctionBegin; 1748 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1749 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1750 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1751 if (size > 1) { 1752 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1753 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1754 } else { 1755 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1756 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1757 } 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatDuplicate_MPIDense" 1763 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1764 { 1765 Mat mat; 1766 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1767 PetscErrorCode ierr; 1768 1769 PetscFunctionBegin; 1770 *newmat = 0; 1771 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1772 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1773 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1774 a = (Mat_MPIDense*)mat->data; 1775 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1776 mat->factor = A->factor; 1777 mat->assembled = PETSC_TRUE; 1778 mat->preallocated = PETSC_TRUE; 1779 1780 mat->rmap->rstart = A->rmap->rstart; 1781 mat->rmap->rend = A->rmap->rend; 1782 a->size = oldmat->size; 1783 a->rank = oldmat->rank; 1784 mat->insertmode = NOT_SET_VALUES; 1785 a->nvec = oldmat->nvec; 1786 a->donotstash = oldmat->donotstash; 1787 1788 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1789 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1790 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1791 1792 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1793 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1794 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1795 1796 *newmat = mat; 1797 PetscFunctionReturn(0); 1798 } 1799 1800 #include "petscsys.h" 1801 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1804 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat) 1805 { 1806 PetscErrorCode ierr; 1807 PetscMPIInt rank,size; 1808 PetscInt *rowners,i,m,nz,j; 1809 PetscScalar *array,*vals,*vals_ptr; 1810 MPI_Status status; 1811 1812 PetscFunctionBegin; 1813 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1814 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1815 1816 /* determine ownership of all rows */ 1817 m = M/size + ((M % size) > rank); 1818 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1819 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1820 rowners[0] = 0; 1821 for (i=2; i<=size; i++) { 1822 rowners[i] += rowners[i-1]; 1823 } 1824 1825 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1826 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1827 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1828 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1829 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1830 1831 if (!rank) { 1832 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1833 1834 /* read in my part of the matrix numerical values */ 1835 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1836 1837 /* insert into matrix-by row (this is why cannot directly read into array */ 1838 vals_ptr = vals; 1839 for (i=0; i<m; i++) { 1840 for (j=0; j<N; j++) { 1841 array[i + j*m] = *vals_ptr++; 1842 } 1843 } 1844 1845 /* read in other processors and ship out */ 1846 for (i=1; i<size; i++) { 1847 nz = (rowners[i+1] - rowners[i])*N; 1848 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1849 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1850 } 1851 } else { 1852 /* receive numeric values */ 1853 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1854 1855 /* receive message of values*/ 1856 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1857 1858 /* insert into matrix-by row (this is why cannot directly read into array */ 1859 vals_ptr = vals; 1860 for (i=0; i<m; i++) { 1861 for (j=0; j<N; j++) { 1862 array[i + j*m] = *vals_ptr++; 1863 } 1864 } 1865 } 1866 ierr = PetscFree(rowners);CHKERRQ(ierr); 1867 ierr = PetscFree(vals);CHKERRQ(ierr); 1868 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1869 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1870 PetscFunctionReturn(0); 1871 } 1872 1873 #undef __FUNCT__ 1874 #define __FUNCT__ "MatLoad_MPIDense" 1875 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1876 { 1877 Mat A; 1878 PetscScalar *vals,*svals; 1879 MPI_Comm comm = ((PetscObject)viewer)->comm; 1880 MPI_Status status; 1881 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1882 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1883 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1884 PetscInt i,nz,j,rstart,rend; 1885 int fd; 1886 PetscErrorCode ierr; 1887 1888 PetscFunctionBegin; 1889 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1890 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1891 if (!rank) { 1892 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1893 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1894 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1895 } 1896 1897 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1898 M = header[1]; N = header[2]; nz = header[3]; 1899 1900 /* 1901 Handle case where matrix is stored on disk as a dense matrix 1902 */ 1903 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1904 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 /* determine ownership of all rows */ 1909 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1910 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1911 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1912 rowners[0] = 0; 1913 for (i=2; i<=size; i++) { 1914 rowners[i] += rowners[i-1]; 1915 } 1916 rstart = rowners[rank]; 1917 rend = rowners[rank+1]; 1918 1919 /* distribute row lengths to all processors */ 1920 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1921 offlens = ourlens + (rend-rstart); 1922 if (!rank) { 1923 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1924 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1925 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1926 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1927 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1928 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1929 } else { 1930 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1931 } 1932 1933 if (!rank) { 1934 /* calculate the number of nonzeros on each processor */ 1935 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1936 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1937 for (i=0; i<size; i++) { 1938 for (j=rowners[i]; j< rowners[i+1]; j++) { 1939 procsnz[i] += rowlengths[j]; 1940 } 1941 } 1942 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1943 1944 /* determine max buffer needed and allocate it */ 1945 maxnz = 0; 1946 for (i=0; i<size; i++) { 1947 maxnz = PetscMax(maxnz,procsnz[i]); 1948 } 1949 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1950 1951 /* read in my part of the matrix column indices */ 1952 nz = procsnz[0]; 1953 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1954 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1955 1956 /* read in every one elses and ship off */ 1957 for (i=1; i<size; i++) { 1958 nz = procsnz[i]; 1959 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1960 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1961 } 1962 ierr = PetscFree(cols);CHKERRQ(ierr); 1963 } else { 1964 /* determine buffer space needed for message */ 1965 nz = 0; 1966 for (i=0; i<m; i++) { 1967 nz += ourlens[i]; 1968 } 1969 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1970 1971 /* receive message of column indices*/ 1972 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1973 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1974 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1975 } 1976 1977 /* loop over local rows, determining number of off diagonal entries */ 1978 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1979 jj = 0; 1980 for (i=0; i<m; i++) { 1981 for (j=0; j<ourlens[i]; j++) { 1982 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1983 jj++; 1984 } 1985 } 1986 1987 /* create our matrix */ 1988 for (i=0; i<m; i++) { 1989 ourlens[i] -= offlens[i]; 1990 } 1991 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1992 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1993 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1994 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1995 A = *newmat; 1996 for (i=0; i<m; i++) { 1997 ourlens[i] += offlens[i]; 1998 } 1999 2000 if (!rank) { 2001 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2002 2003 /* read in my part of the matrix numerical values */ 2004 nz = procsnz[0]; 2005 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2006 2007 /* insert into matrix */ 2008 jj = rstart; 2009 smycols = mycols; 2010 svals = vals; 2011 for (i=0; i<m; i++) { 2012 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2013 smycols += ourlens[i]; 2014 svals += ourlens[i]; 2015 jj++; 2016 } 2017 2018 /* read in other processors and ship out */ 2019 for (i=1; i<size; i++) { 2020 nz = procsnz[i]; 2021 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2022 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2023 } 2024 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2025 } else { 2026 /* receive numeric values */ 2027 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2028 2029 /* receive message of values*/ 2030 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2031 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2032 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2033 2034 /* insert into matrix */ 2035 jj = rstart; 2036 smycols = mycols; 2037 svals = vals; 2038 for (i=0; i<m; i++) { 2039 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2040 smycols += ourlens[i]; 2041 svals += ourlens[i]; 2042 jj++; 2043 } 2044 } 2045 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2046 ierr = PetscFree(vals);CHKERRQ(ierr); 2047 ierr = PetscFree(mycols);CHKERRQ(ierr); 2048 ierr = PetscFree(rowners);CHKERRQ(ierr); 2049 2050 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2051 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2052 PetscFunctionReturn(0); 2053 } 2054 2055 #undef __FUNCT__ 2056 #define __FUNCT__ "MatEqual_MPIDense" 2057 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2058 { 2059 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2060 Mat a,b; 2061 PetscTruth flg; 2062 PetscErrorCode ierr; 2063 2064 PetscFunctionBegin; 2065 a = matA->A; 2066 b = matB->A; 2067 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2068 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2069 PetscFunctionReturn(0); 2070 } 2071 2072 #if defined(PETSC_HAVE_PLAPACK) 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2076 /*@C 2077 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2078 Level: developer 2079 2080 .keywords: Petsc, destroy, package, PLAPACK 2081 .seealso: PetscFinalize() 2082 @*/ 2083 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2084 { 2085 PetscErrorCode ierr; 2086 2087 PetscFunctionBegin; 2088 ierr = PLA_Finalize();CHKERRQ(ierr); 2089 PetscFunctionReturn(0); 2090 } 2091 2092 #undef __FUNCT__ 2093 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2094 /*@C 2095 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2096 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2097 2098 Input Parameter: 2099 . comm - the communicator the matrix lives on 2100 2101 Level: developer 2102 2103 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2104 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2105 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2106 cannot overlap. 2107 2108 .keywords: Petsc, initialize, package, PLAPACK 2109 .seealso: PetscInitializePackage(), PetscInitialize() 2110 @*/ 2111 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2112 { 2113 PetscMPIInt size; 2114 PetscErrorCode ierr; 2115 2116 PetscFunctionBegin; 2117 if (!PLA_Initialized(PETSC_NULL)) { 2118 2119 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2120 Plapack_nprows = 1; 2121 Plapack_npcols = size; 2122 2123 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2124 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2125 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2126 #if defined(PETSC_USE_DEBUG) 2127 Plapack_ierror = 3; 2128 #else 2129 Plapack_ierror = 0; 2130 #endif 2131 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2132 if (Plapack_ierror){ 2133 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2134 } else { 2135 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2136 } 2137 2138 Plapack_nb_alg = 0; 2139 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2140 if (Plapack_nb_alg) { 2141 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2142 } 2143 PetscOptionsEnd(); 2144 2145 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2146 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2147 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2148 } 2149 PetscFunctionReturn(0); 2150 } 2151 2152 #endif 2153