1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 #endif 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatDenseGetLocalMatrix" 16 /*@ 17 18 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 19 matrix that represents the operator. For sequential matrices it returns itself. 20 21 Input Parameter: 22 . A - the Seq or MPI dense matrix 23 24 Output Parameter: 25 . B - the inner matrix 26 27 Level: intermediate 28 29 @*/ 30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 31 { 32 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 33 PetscErrorCode ierr; 34 PetscTruth flg; 35 36 PetscFunctionBegin; 37 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 38 if (flg) { 39 *B = mat->A; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatGetRow_MPIDense" 48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 49 { 50 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 51 PetscErrorCode ierr; 52 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 53 54 PetscFunctionBegin; 55 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 56 lrow = row - rstart; 57 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRestoreRow_MPIDense" 63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 64 { 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 69 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 70 PetscFunctionReturn(0); 71 } 72 73 EXTERN_C_BEGIN 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 79 PetscErrorCode ierr; 80 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 81 PetscScalar *array; 82 MPI_Comm comm; 83 84 PetscFunctionBegin; 85 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 86 87 /* The reuse aspect is not implemented efficiently */ 88 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 89 90 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 91 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatCreate(comm,B);CHKERRQ(ierr); 93 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 94 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 95 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 96 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 97 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 98 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 99 100 *iscopy = PETSC_TRUE; 101 PetscFunctionReturn(0); 102 } 103 EXTERN_C_END 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIDense" 107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 110 PetscErrorCode ierr; 111 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 112 PetscTruth roworiented = A->roworiented; 113 114 PetscFunctionBegin; 115 for (i=0; i<m; i++) { 116 if (idxm[i] < 0) continue; 117 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 118 if (idxm[i] >= rstart && idxm[i] < rend) { 119 row = idxm[i] - rstart; 120 if (roworiented) { 121 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 122 } else { 123 for (j=0; j<n; j++) { 124 if (idxn[j] < 0) continue; 125 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 126 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 127 } 128 } 129 } else { 130 if (!A->donotstash) { 131 if (roworiented) { 132 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 133 } else { 134 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 135 } 136 } 137 } 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetValues_MPIDense" 144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 145 { 146 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 147 PetscErrorCode ierr; 148 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 149 150 PetscFunctionBegin; 151 for (i=0; i<m; i++) { 152 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 153 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 154 if (idxm[i] >= rstart && idxm[i] < rend) { 155 row = idxm[i] - rstart; 156 for (j=0; j<n; j++) { 157 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 158 if (idxn[j] >= mat->cmap->N) { 159 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 160 } 161 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 162 } 163 } else { 164 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 165 } 166 } 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatGetArray_MPIDense" 172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 173 { 174 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 175 PetscErrorCode ierr; 176 177 PetscFunctionBegin; 178 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 185 { 186 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 187 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 188 PetscErrorCode ierr; 189 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 190 const PetscInt *irow,*icol; 191 PetscScalar *av,*bv,*v = lmat->v; 192 Mat newmat; 193 IS iscol_local; 194 195 PetscFunctionBegin; 196 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 197 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 198 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 199 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 200 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 201 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 202 203 /* No parallel redistribution currently supported! Should really check each index set 204 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 205 original matrix! */ 206 207 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 208 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 209 210 /* Check submatrix call */ 211 if (scall == MAT_REUSE_MATRIX) { 212 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 213 /* Really need to test rows and column sizes! */ 214 newmat = *B; 215 } else { 216 /* Create and fill new matrix */ 217 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 218 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 219 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 220 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 221 } 222 223 /* Now extract the data pointers and do the copy, column at a time */ 224 newmatd = (Mat_MPIDense*)newmat->data; 225 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 226 227 for (i=0; i<Ncols; i++) { 228 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 229 for (j=0; j<nrows; j++) { 230 *bv++ = av[irow[j] - rstart]; 231 } 232 } 233 234 /* Assemble the matrices so that the correct flags are set */ 235 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 236 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 237 238 /* Free work space */ 239 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 240 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 241 *B = newmat; 242 PetscFunctionReturn(0); 243 } 244 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatRestoreArray_MPIDense" 247 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 248 { 249 PetscFunctionBegin; 250 PetscFunctionReturn(0); 251 } 252 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 255 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 256 { 257 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 258 MPI_Comm comm = ((PetscObject)mat)->comm; 259 PetscErrorCode ierr; 260 PetscInt nstash,reallocs; 261 InsertMode addv; 262 263 PetscFunctionBegin; 264 /* make sure all processors are either in INSERTMODE or ADDMODE */ 265 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 266 if (addv == (ADD_VALUES|INSERT_VALUES)) { 267 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 268 } 269 mat->insertmode = addv; /* in case this processor had no cache */ 270 271 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 272 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 273 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 279 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 280 { 281 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 282 PetscErrorCode ierr; 283 PetscInt i,*row,*col,flg,j,rstart,ncols; 284 PetscMPIInt n; 285 PetscScalar *val; 286 InsertMode addv=mat->insertmode; 287 288 PetscFunctionBegin; 289 /* wait on receives */ 290 while (1) { 291 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 292 if (!flg) break; 293 294 for (i=0; i<n;) { 295 /* Now identify the consecutive vals belonging to the same row */ 296 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 297 if (j < n) ncols = j-i; 298 else ncols = n-i; 299 /* Now assemble all these values with a single function call */ 300 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 301 i = j; 302 } 303 } 304 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 305 306 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 307 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 308 309 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 310 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatZeroEntries_MPIDense" 317 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 318 { 319 PetscErrorCode ierr; 320 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 321 322 PetscFunctionBegin; 323 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 /* the code does not do the diagonal entries correctly unless the 328 matrix is square and the column and row owerships are identical. 329 This is a BUG. The only way to fix it seems to be to access 330 mdn->A and mdn->B directly and not through the MatZeroRows() 331 routine. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatZeroRows_MPIDense" 335 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 336 { 337 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 338 PetscErrorCode ierr; 339 PetscInt i,*owners = A->rmap->range; 340 PetscInt *nprocs,j,idx,nsends; 341 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 342 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 343 PetscInt *lens,*lrows,*values; 344 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 345 MPI_Comm comm = ((PetscObject)A)->comm; 346 MPI_Request *send_waits,*recv_waits; 347 MPI_Status recv_status,*send_status; 348 PetscTruth found; 349 350 PetscFunctionBegin; 351 /* first count number of contributors to each processor */ 352 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 353 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 354 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 355 for (i=0; i<N; i++) { 356 idx = rows[i]; 357 found = PETSC_FALSE; 358 for (j=0; j<size; j++) { 359 if (idx >= owners[j] && idx < owners[j+1]) { 360 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 361 } 362 } 363 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 364 } 365 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 366 367 /* inform other processors of number of messages and max length*/ 368 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 369 370 /* post receives: */ 371 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 372 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 373 for (i=0; i<nrecvs; i++) { 374 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 375 } 376 377 /* do sends: 378 1) starts[i] gives the starting index in svalues for stuff going to 379 the ith processor 380 */ 381 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 382 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 383 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 384 starts[0] = 0; 385 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 386 for (i=0; i<N; i++) { 387 svalues[starts[owner[i]]++] = rows[i]; 388 } 389 390 starts[0] = 0; 391 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 392 count = 0; 393 for (i=0; i<size; i++) { 394 if (nprocs[2*i+1]) { 395 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 396 } 397 } 398 ierr = PetscFree(starts);CHKERRQ(ierr); 399 400 base = owners[rank]; 401 402 /* wait on receives */ 403 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 404 source = lens + nrecvs; 405 count = nrecvs; slen = 0; 406 while (count) { 407 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 408 /* unpack receives into our local space */ 409 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 410 source[imdex] = recv_status.MPI_SOURCE; 411 lens[imdex] = n; 412 slen += n; 413 count--; 414 } 415 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 416 417 /* move the data into the send scatter */ 418 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 419 count = 0; 420 for (i=0; i<nrecvs; i++) { 421 values = rvalues + i*nmax; 422 for (j=0; j<lens[i]; j++) { 423 lrows[count++] = values[j] - base; 424 } 425 } 426 ierr = PetscFree(rvalues);CHKERRQ(ierr); 427 ierr = PetscFree(lens);CHKERRQ(ierr); 428 ierr = PetscFree(owner);CHKERRQ(ierr); 429 ierr = PetscFree(nprocs);CHKERRQ(ierr); 430 431 /* actually zap the local rows */ 432 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 433 ierr = PetscFree(lrows);CHKERRQ(ierr); 434 435 /* wait on sends */ 436 if (nsends) { 437 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 438 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 439 ierr = PetscFree(send_status);CHKERRQ(ierr); 440 } 441 ierr = PetscFree(send_waits);CHKERRQ(ierr); 442 ierr = PetscFree(svalues);CHKERRQ(ierr); 443 444 PetscFunctionReturn(0); 445 } 446 447 #undef __FUNCT__ 448 #define __FUNCT__ "MatMult_MPIDense" 449 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 450 { 451 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 452 PetscErrorCode ierr; 453 454 PetscFunctionBegin; 455 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 456 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 457 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatMultAdd_MPIDense" 463 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 464 { 465 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 466 PetscErrorCode ierr; 467 468 PetscFunctionBegin; 469 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 470 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatMultTranspose_MPIDense" 477 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 478 { 479 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 480 PetscErrorCode ierr; 481 PetscScalar zero = 0.0; 482 483 PetscFunctionBegin; 484 ierr = VecSet(yy,zero);CHKERRQ(ierr); 485 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 486 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 487 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 493 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 494 { 495 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 496 PetscErrorCode ierr; 497 498 PetscFunctionBegin; 499 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 500 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 501 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 502 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatGetDiagonal_MPIDense" 508 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 509 { 510 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 511 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 512 PetscErrorCode ierr; 513 PetscInt len,i,n,m = A->rmap->n,radd; 514 PetscScalar *x,zero = 0.0; 515 516 PetscFunctionBegin; 517 ierr = VecSet(v,zero);CHKERRQ(ierr); 518 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 519 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 520 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 521 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 522 radd = A->rmap->rstart*m; 523 for (i=0; i<len; i++) { 524 x[i] = aloc->v[radd + i*m + i]; 525 } 526 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 527 PetscFunctionReturn(0); 528 } 529 530 #undef __FUNCT__ 531 #define __FUNCT__ "MatDestroy_MPIDense" 532 PetscErrorCode MatDestroy_MPIDense(Mat mat) 533 { 534 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 535 PetscErrorCode ierr; 536 #if defined(PETSC_HAVE_PLAPACK) 537 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 538 #endif 539 540 PetscFunctionBegin; 541 542 #if defined(PETSC_USE_LOG) 543 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 544 #endif 545 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 546 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 547 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 548 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 549 #if defined(PETSC_HAVE_PLAPACK) 550 if (lu) { 551 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 552 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 553 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 554 555 if (lu->is_pla) { 556 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 557 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 558 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 559 } 560 } 561 #endif 562 563 ierr = PetscFree(mdn);CHKERRQ(ierr); 564 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 566 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 568 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "MatView_MPIDense_Binary" 575 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 576 { 577 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 578 PetscErrorCode ierr; 579 PetscViewerFormat format; 580 int fd; 581 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 582 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 583 PetscScalar *work,*v,*vv; 584 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 585 MPI_Status status; 586 587 PetscFunctionBegin; 588 if (mdn->size == 1) { 589 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 590 } else { 591 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 592 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 593 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 594 595 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 596 if (format == PETSC_VIEWER_NATIVE) { 597 598 if (!rank) { 599 /* store the matrix as a dense matrix */ 600 header[0] = MAT_FILE_COOKIE; 601 header[1] = mat->rmap->N; 602 header[2] = N; 603 header[3] = MATRIX_BINARY_FORMAT_DENSE; 604 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 605 606 /* get largest work array needed for transposing array */ 607 mmax = mat->rmap->n; 608 for (i=1; i<size; i++) { 609 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 610 } 611 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 612 613 /* write out local array, by rows */ 614 m = mat->rmap->n; 615 v = a->v; 616 for (j=0; j<N; j++) { 617 for (i=0; i<m; i++) { 618 work[j + i*N] = *v++; 619 } 620 } 621 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 622 /* get largest work array to receive messages from other processes, excludes process zero */ 623 mmax = 0; 624 for (i=1; i<size; i++) { 625 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 626 } 627 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 628 for(k = 1; k < size; k++) { 629 v = vv; 630 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 631 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 632 633 for(j = 0; j < N; j++) { 634 for(i = 0; i < m; i++) { 635 work[j + i*N] = *v++; 636 } 637 } 638 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(work);CHKERRQ(ierr); 641 ierr = PetscFree(vv);CHKERRQ(ierr); 642 } else { 643 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 644 } 645 } else { 646 SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE"); 647 } 648 } 649 PetscFunctionReturn(0); 650 } 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 654 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 655 { 656 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 657 PetscErrorCode ierr; 658 PetscMPIInt size = mdn->size,rank = mdn->rank; 659 const PetscViewerType vtype; 660 PetscTruth iascii,isdraw; 661 PetscViewer sviewer; 662 PetscViewerFormat format; 663 #if defined(PETSC_HAVE_PLAPACK) 664 Mat_Plapack *lu; 665 #endif 666 667 PetscFunctionBegin; 668 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 669 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 670 if (iascii) { 671 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 672 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 673 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 674 MatInfo info; 675 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 676 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 677 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 678 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 679 #if defined(PETSC_HAVE_PLAPACK) 680 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 681 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 682 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 683 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 684 if (mat->factor){ 685 lu=(Mat_Plapack*)(mat->spptr); 686 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 687 } 688 #else 689 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 690 #endif 691 PetscFunctionReturn(0); 692 } else if (format == PETSC_VIEWER_ASCII_INFO) { 693 PetscFunctionReturn(0); 694 } 695 } else if (isdraw) { 696 PetscDraw draw; 697 PetscTruth isnull; 698 699 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 700 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 701 if (isnull) PetscFunctionReturn(0); 702 } 703 704 if (size == 1) { 705 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 706 } else { 707 /* assemble the entire matrix onto first processor. */ 708 Mat A; 709 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 710 PetscInt *cols; 711 PetscScalar *vals; 712 713 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 714 if (!rank) { 715 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 716 } else { 717 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 718 } 719 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 720 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 721 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 722 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 723 724 /* Copy the matrix ... This isn't the most efficient means, 725 but it's quick for now */ 726 A->insertmode = INSERT_VALUES; 727 row = mat->rmap->rstart; m = mdn->A->rmap->n; 728 for (i=0; i<m; i++) { 729 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 730 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 731 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 732 row++; 733 } 734 735 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 736 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 737 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 738 if (!rank) { 739 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 740 } 741 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 742 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 743 ierr = MatDestroy(A);CHKERRQ(ierr); 744 } 745 PetscFunctionReturn(0); 746 } 747 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatView_MPIDense" 750 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 751 { 752 PetscErrorCode ierr; 753 PetscTruth iascii,isbinary,isdraw,issocket; 754 755 PetscFunctionBegin; 756 757 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 758 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 759 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 760 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 761 762 if (iascii || issocket || isdraw) { 763 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 764 } else if (isbinary) { 765 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 766 } else { 767 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 768 } 769 PetscFunctionReturn(0); 770 } 771 772 #undef __FUNCT__ 773 #define __FUNCT__ "MatGetInfo_MPIDense" 774 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 775 { 776 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 777 Mat mdn = mat->A; 778 PetscErrorCode ierr; 779 PetscReal isend[5],irecv[5]; 780 781 PetscFunctionBegin; 782 info->block_size = 1.0; 783 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 784 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 785 isend[3] = info->memory; isend[4] = info->mallocs; 786 if (flag == MAT_LOCAL) { 787 info->nz_used = isend[0]; 788 info->nz_allocated = isend[1]; 789 info->nz_unneeded = isend[2]; 790 info->memory = isend[3]; 791 info->mallocs = isend[4]; 792 } else if (flag == MAT_GLOBAL_MAX) { 793 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 794 info->nz_used = irecv[0]; 795 info->nz_allocated = irecv[1]; 796 info->nz_unneeded = irecv[2]; 797 info->memory = irecv[3]; 798 info->mallocs = irecv[4]; 799 } else if (flag == MAT_GLOBAL_SUM) { 800 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 801 info->nz_used = irecv[0]; 802 info->nz_allocated = irecv[1]; 803 info->nz_unneeded = irecv[2]; 804 info->memory = irecv[3]; 805 info->mallocs = irecv[4]; 806 } 807 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 808 info->fill_ratio_needed = 0; 809 info->factor_mallocs = 0; 810 PetscFunctionReturn(0); 811 } 812 813 #undef __FUNCT__ 814 #define __FUNCT__ "MatSetOption_MPIDense" 815 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 816 { 817 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 818 PetscErrorCode ierr; 819 820 PetscFunctionBegin; 821 switch (op) { 822 case MAT_NEW_NONZERO_LOCATIONS: 823 case MAT_NEW_NONZERO_LOCATION_ERR: 824 case MAT_NEW_NONZERO_ALLOCATION_ERR: 825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 826 break; 827 case MAT_ROW_ORIENTED: 828 a->roworiented = flg; 829 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 830 break; 831 case MAT_NEW_DIAGONALS: 832 case MAT_USE_HASH_TABLE: 833 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 834 break; 835 case MAT_IGNORE_OFF_PROC_ENTRIES: 836 a->donotstash = flg; 837 break; 838 case MAT_SYMMETRIC: 839 case MAT_STRUCTURALLY_SYMMETRIC: 840 case MAT_HERMITIAN: 841 case MAT_SYMMETRY_ETERNAL: 842 case MAT_IGNORE_LOWER_TRIANGULAR: 843 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 844 break; 845 default: 846 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatDiagonalScale_MPIDense" 854 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 855 { 856 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 857 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 858 PetscScalar *l,*r,x,*v; 859 PetscErrorCode ierr; 860 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 861 862 PetscFunctionBegin; 863 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 864 if (ll) { 865 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 866 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 867 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 868 for (i=0; i<m; i++) { 869 x = l[i]; 870 v = mat->v + i; 871 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 872 } 873 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 874 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 875 } 876 if (rr) { 877 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 878 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 879 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 882 for (i=0; i<n; i++) { 883 x = r[i]; 884 v = mat->v + i*m; 885 for (j=0; j<m; j++) { (*v++) *= x;} 886 } 887 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 888 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 889 } 890 PetscFunctionReturn(0); 891 } 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatNorm_MPIDense" 895 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 896 { 897 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 898 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 899 PetscErrorCode ierr; 900 PetscInt i,j; 901 PetscReal sum = 0.0; 902 PetscScalar *v = mat->v; 903 904 PetscFunctionBegin; 905 if (mdn->size == 1) { 906 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 907 } else { 908 if (type == NORM_FROBENIUS) { 909 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 910 #if defined(PETSC_USE_COMPLEX) 911 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 912 #else 913 sum += (*v)*(*v); v++; 914 #endif 915 } 916 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 917 *nrm = sqrt(*nrm); 918 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 919 } else if (type == NORM_1) { 920 PetscReal *tmp,*tmp2; 921 ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 922 tmp2 = tmp + A->cmap->N; 923 ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 924 *nrm = 0.0; 925 v = mat->v; 926 for (j=0; j<mdn->A->cmap->n; j++) { 927 for (i=0; i<mdn->A->rmap->n; i++) { 928 tmp[j] += PetscAbsScalar(*v); v++; 929 } 930 } 931 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 932 for (j=0; j<A->cmap->N; j++) { 933 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 934 } 935 ierr = PetscFree(tmp);CHKERRQ(ierr); 936 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 937 } else if (type == NORM_INFINITY) { /* max row norm */ 938 PetscReal ntemp; 939 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 940 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 941 } else { 942 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 943 } 944 } 945 PetscFunctionReturn(0); 946 } 947 948 #undef __FUNCT__ 949 #define __FUNCT__ "MatTranspose_MPIDense" 950 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 951 { 952 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 953 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 954 Mat B; 955 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 956 PetscErrorCode ierr; 957 PetscInt j,i; 958 PetscScalar *v; 959 960 PetscFunctionBegin; 961 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 962 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 963 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 964 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 965 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 966 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 967 } else { 968 B = *matout; 969 } 970 971 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 972 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 973 for (i=0; i<m; i++) rwork[i] = rstart + i; 974 for (j=0; j<n; j++) { 975 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 976 v += m; 977 } 978 ierr = PetscFree(rwork);CHKERRQ(ierr); 979 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 980 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 982 *matout = B; 983 } else { 984 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 985 } 986 PetscFunctionReturn(0); 987 } 988 989 #include "petscblaslapack.h" 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatScale_MPIDense" 992 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 993 { 994 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 995 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 996 PetscScalar oalpha = alpha; 997 PetscErrorCode ierr; 998 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N); 999 1000 PetscFunctionBegin; 1001 BLASscal_(&nz,&oalpha,a->v,&one); 1002 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 1003 PetscFunctionReturn(0); 1004 } 1005 1006 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1010 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1011 { 1012 PetscErrorCode ierr; 1013 1014 PetscFunctionBegin; 1015 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #if defined(PETSC_HAVE_PLAPACK) 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1023 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1024 { 1025 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1026 PetscErrorCode ierr; 1027 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1028 PetscScalar *array; 1029 PetscReal one = 1.0; 1030 1031 PetscFunctionBegin; 1032 /* Copy A into F->lu->A */ 1033 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1034 ierr = PLA_API_begin();CHKERRQ(ierr); 1035 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1036 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1037 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1038 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1039 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1040 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1041 ierr = PLA_API_end();CHKERRQ(ierr); 1042 lu->rstart = rstart; 1043 PetscFunctionReturn(0); 1044 } 1045 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1048 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1049 { 1050 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1051 PetscErrorCode ierr; 1052 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1053 PetscScalar *array; 1054 PetscReal one = 1.0; 1055 1056 PetscFunctionBegin; 1057 /* Copy F into A->lu->A */ 1058 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1059 ierr = PLA_API_begin();CHKERRQ(ierr); 1060 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1061 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1062 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1063 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1064 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1065 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1066 ierr = PLA_API_end();CHKERRQ(ierr); 1067 lu->rstart = rstart; 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1073 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1074 { 1075 PetscErrorCode ierr; 1076 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1077 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1078 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1079 PLA_Obj alpha = NULL,beta = NULL; 1080 1081 PetscFunctionBegin; 1082 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1083 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1084 1085 /* 1086 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1087 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1088 */ 1089 1090 /* do the multiply in PLA */ 1091 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1092 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1093 CHKMEMQ; 1094 1095 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1096 CHKMEMQ; 1097 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1098 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1099 1100 /* 1101 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1102 */ 1103 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1109 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1110 { 1111 PetscErrorCode ierr; 1112 PetscInt m=A->rmap->n,n=B->cmap->n; 1113 Mat Cmat; 1114 1115 PetscFunctionBegin; 1116 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1117 SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported"); 1118 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1119 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1120 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1121 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1122 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1123 1124 *C = Cmat; 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1130 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1131 { 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 if (scall == MAT_INITIAL_MATRIX){ 1136 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1137 } 1138 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNCT__ 1143 #define __FUNCT__ "MatSolve_MPIDense" 1144 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1145 { 1146 MPI_Comm comm = ((PetscObject)A)->comm; 1147 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1148 PetscErrorCode ierr; 1149 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1150 PetscScalar *array; 1151 PetscReal one = 1.0; 1152 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1153 PLA_Obj v_pla = NULL; 1154 PetscScalar *loc_buf; 1155 Vec loc_x; 1156 1157 PetscFunctionBegin; 1158 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1159 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1160 1161 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1162 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1163 PLA_Obj_set_to_zero(v_pla); 1164 1165 /* Copy b into rhs_pla */ 1166 PLA_API_begin(); 1167 PLA_Obj_API_open(v_pla); 1168 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1169 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1170 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1171 PLA_Obj_API_close(v_pla); 1172 PLA_API_end(); 1173 1174 if (A->factor == MAT_FACTOR_LU){ 1175 /* Apply the permutations to the right hand sides */ 1176 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1177 1178 /* Solve L y = b, overwriting b with y */ 1179 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1180 1181 /* Solve U x = y (=b), overwriting b with x */ 1182 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1183 } else { /* MAT_FACTOR_CHOLESKY */ 1184 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1185 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1186 PLA_NONUNIT_DIAG,lu->A,v_pla); 1187 } 1188 1189 /* Copy PLAPACK x into Petsc vector x */ 1190 PLA_Obj_local_length(v_pla, &loc_m); 1191 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1192 PLA_Obj_local_stride(v_pla, &loc_stride); 1193 /* 1194 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1195 */ 1196 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1197 if (!lu->pla_solved){ 1198 1199 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1200 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1201 1202 /* Create IS and cts for VecScatterring */ 1203 PLA_Obj_local_length(v_pla, &loc_m); 1204 PLA_Obj_local_stride(v_pla, &loc_stride); 1205 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1206 idx_petsc = idx_pla + loc_m; 1207 1208 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1209 for (i=0; i<loc_m; i+=lu->nb){ 1210 j = 0; 1211 while (j < lu->nb && i+j < loc_m){ 1212 idx_petsc[i+j] = rstart + j; j++; 1213 } 1214 rstart += size*lu->nb; 1215 } 1216 1217 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1218 1219 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1220 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1221 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1222 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1223 } 1224 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1225 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1226 1227 /* Free data */ 1228 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1229 PLA_Obj_free(&v_pla); 1230 1231 lu->pla_solved = PETSC_TRUE; 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1237 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1238 { 1239 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1240 PetscErrorCode ierr; 1241 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1242 PetscInt info_pla=0; 1243 PetscScalar *array,one = 1.0; 1244 1245 PetscFunctionBegin; 1246 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1247 PLA_Obj_free(&lu->A); 1248 PLA_Obj_free (&lu->pivots); 1249 } 1250 /* Create PLAPACK matrix object */ 1251 lu->A = NULL; lu->pivots = NULL; 1252 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1253 PLA_Obj_set_to_zero(lu->A); 1254 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1255 1256 /* Copy A into lu->A */ 1257 PLA_API_begin(); 1258 PLA_Obj_API_open(lu->A); 1259 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1260 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1261 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1262 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1263 PLA_Obj_API_close(lu->A); 1264 PLA_API_end(); 1265 1266 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1267 info_pla = PLA_LU(lu->A,lu->pivots); 1268 if (info_pla != 0) 1269 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1270 1271 lu->rstart = rstart; 1272 lu->mstruct = SAME_NONZERO_PATTERN; 1273 F->ops->solve = MatSolve_MPIDense; 1274 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1280 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1281 { 1282 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1283 PetscErrorCode ierr; 1284 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1285 PetscInt info_pla=0; 1286 PetscScalar *array,one = 1.0; 1287 1288 PetscFunctionBegin; 1289 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1290 PLA_Obj_free(&lu->A); 1291 } 1292 /* Create PLAPACK matrix object */ 1293 lu->A = NULL; 1294 lu->pivots = NULL; 1295 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1296 1297 /* Copy A into lu->A */ 1298 PLA_API_begin(); 1299 PLA_Obj_API_open(lu->A); 1300 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1301 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1302 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1303 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1304 PLA_Obj_API_close(lu->A); 1305 PLA_API_end(); 1306 1307 /* Factor P A -> Chol */ 1308 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1309 if (info_pla != 0) 1310 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1311 1312 lu->rstart = rstart; 1313 lu->mstruct = SAME_NONZERO_PATTERN; 1314 F->ops->solve = MatSolve_MPIDense; 1315 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1316 PetscFunctionReturn(0); 1317 } 1318 1319 /* Note the Petsc perm permutation is ignored */ 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1322 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1323 { 1324 PetscErrorCode ierr; 1325 PetscTruth issymmetric,set; 1326 1327 PetscFunctionBegin; 1328 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1329 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1330 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1331 PetscFunctionReturn(0); 1332 } 1333 1334 /* Note the Petsc r and c permutations are ignored */ 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1337 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1338 { 1339 PetscErrorCode ierr; 1340 PetscInt M = A->rmap->N; 1341 Mat_Plapack *lu; 1342 1343 PetscFunctionBegin; 1344 lu = (Mat_Plapack*)F->spptr; 1345 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1346 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1347 PetscFunctionReturn(0); 1348 } 1349 1350 EXTERN_C_BEGIN 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1353 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1354 { 1355 PetscFunctionBegin; 1356 *type = MAT_SOLVER_PLAPACK; 1357 PetscFunctionReturn(0); 1358 } 1359 EXTERN_C_END 1360 1361 EXTERN_C_BEGIN 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1364 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1365 { 1366 PetscErrorCode ierr; 1367 Mat_Plapack *lu; 1368 PetscMPIInt size; 1369 PetscInt M=A->rmap->N; 1370 1371 PetscFunctionBegin; 1372 /* Create the factorization matrix */ 1373 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1374 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1375 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1376 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1377 (*F)->spptr = (void*)lu; 1378 1379 /* Set default Plapack parameters */ 1380 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1381 lu->nb = M/size; 1382 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1383 1384 /* Set runtime options */ 1385 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1386 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1387 PetscOptionsEnd(); 1388 1389 /* Create object distribution template */ 1390 lu->templ = NULL; 1391 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1392 1393 /* Set the datatype */ 1394 #if defined(PETSC_USE_COMPLEX) 1395 lu->datatype = MPI_DOUBLE_COMPLEX; 1396 #else 1397 lu->datatype = MPI_DOUBLE; 1398 #endif 1399 1400 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1401 1402 1403 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1404 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1405 1406 if (ftype == MAT_FACTOR_LU) { 1407 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1408 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1409 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1410 } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1411 (*F)->factor = ftype; 1412 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 EXTERN_C_END 1416 #endif 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatAXPY_MPIDense" 1420 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1421 { 1422 PetscErrorCode ierr; 1423 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1424 1425 PetscFunctionBegin; 1426 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1427 PetscFunctionReturn(0); 1428 } 1429 1430 /* -------------------------------------------------------------------*/ 1431 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1432 MatGetRow_MPIDense, 1433 MatRestoreRow_MPIDense, 1434 MatMult_MPIDense, 1435 /* 4*/ MatMultAdd_MPIDense, 1436 MatMultTranspose_MPIDense, 1437 MatMultTransposeAdd_MPIDense, 1438 0, 1439 0, 1440 0, 1441 /*10*/ 0, 1442 0, 1443 0, 1444 0, 1445 MatTranspose_MPIDense, 1446 /*15*/ MatGetInfo_MPIDense, 1447 MatEqual_MPIDense, 1448 MatGetDiagonal_MPIDense, 1449 MatDiagonalScale_MPIDense, 1450 MatNorm_MPIDense, 1451 /*20*/ MatAssemblyBegin_MPIDense, 1452 MatAssemblyEnd_MPIDense, 1453 MatSetOption_MPIDense, 1454 MatZeroEntries_MPIDense, 1455 /*24*/ MatZeroRows_MPIDense, 1456 0, 1457 0, 1458 0, 1459 0, 1460 /*29*/ MatSetUpPreallocation_MPIDense, 1461 0, 1462 0, 1463 MatGetArray_MPIDense, 1464 MatRestoreArray_MPIDense, 1465 /*34*/ MatDuplicate_MPIDense, 1466 0, 1467 0, 1468 0, 1469 0, 1470 /*39*/ MatAXPY_MPIDense, 1471 MatGetSubMatrices_MPIDense, 1472 0, 1473 MatGetValues_MPIDense, 1474 0, 1475 /*44*/ 0, 1476 MatScale_MPIDense, 1477 0, 1478 0, 1479 0, 1480 /*49*/ 0, 1481 0, 1482 0, 1483 0, 1484 0, 1485 /*54*/ 0, 1486 0, 1487 0, 1488 0, 1489 0, 1490 /*59*/ MatGetSubMatrix_MPIDense, 1491 MatDestroy_MPIDense, 1492 MatView_MPIDense, 1493 0, 1494 0, 1495 /*64*/ 0, 1496 0, 1497 0, 1498 0, 1499 0, 1500 /*69*/ 0, 1501 0, 1502 0, 1503 0, 1504 0, 1505 /*74*/ 0, 1506 0, 1507 0, 1508 0, 1509 0, 1510 /*79*/ 0, 1511 0, 1512 0, 1513 0, 1514 /*83*/ MatLoad_MPIDense, 1515 0, 1516 0, 1517 0, 1518 0, 1519 0, 1520 /*89*/ 1521 #if defined(PETSC_HAVE_PLAPACK) 1522 MatMatMult_MPIDense_MPIDense, 1523 MatMatMultSymbolic_MPIDense_MPIDense, 1524 MatMatMultNumeric_MPIDense_MPIDense, 1525 #else 1526 0, 1527 0, 1528 0, 1529 #endif 1530 0, 1531 /*94*/ 0, 1532 0, 1533 0, 1534 0}; 1535 1536 EXTERN_C_BEGIN 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1539 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1540 { 1541 Mat_MPIDense *a; 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 mat->preallocated = PETSC_TRUE; 1546 /* Note: For now, when data is specified above, this assumes the user correctly 1547 allocates the local dense storage space. We should add error checking. */ 1548 1549 a = (Mat_MPIDense*)mat->data; 1550 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1551 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1552 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1553 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1554 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 EXTERN_C_END 1558 1559 /*MC 1560 MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1561 1562 run config/configure.py with the option --download-plapack 1563 1564 1565 Options Database Keys: 1566 . -mat_plapack_nprows <n> - number of rows in processor partition 1567 . -mat_plapack_npcols <n> - number of columns in processor partition 1568 . -mat_plapack_nb <n> - block size of template vector 1569 . -mat_plapack_nb_alg <n> - algorithmic block size 1570 - -mat_plapack_ckerror <n> - error checking flag 1571 1572 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1573 1574 M*/ 1575 1576 EXTERN_C_BEGIN 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatCreate_MPIDense" 1579 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1580 { 1581 Mat_MPIDense *a; 1582 PetscErrorCode ierr; 1583 1584 PetscFunctionBegin; 1585 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1586 mat->data = (void*)a; 1587 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1588 mat->mapping = 0; 1589 1590 mat->insertmode = NOT_SET_VALUES; 1591 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1592 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1593 1594 ierr = PetscMapSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1595 ierr = PetscMapSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1596 ierr = PetscMapSetUp(mat->rmap);CHKERRQ(ierr); 1597 ierr = PetscMapSetUp(mat->cmap);CHKERRQ(ierr); 1598 a->nvec = mat->cmap->n; 1599 1600 /* build cache for off array entries formed */ 1601 a->donotstash = PETSC_FALSE; 1602 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1603 1604 /* stuff used for matrix vector multiply */ 1605 a->lvec = 0; 1606 a->Mvctx = 0; 1607 a->roworiented = PETSC_TRUE; 1608 1609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1610 "MatGetDiagonalBlock_MPIDense", 1611 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1612 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1613 "MatMPIDenseSetPreallocation_MPIDense", 1614 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1615 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1616 "MatMatMult_MPIAIJ_MPIDense", 1617 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1619 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1620 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1621 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1622 "MatMatMultNumeric_MPIAIJ_MPIDense", 1623 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1624 #if defined(PETSC_HAVE_PLAPACK) 1625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_plapack_C", 1626 "MatGetFactor_mpidense_plapack", 1627 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1628 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1629 #endif 1630 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1631 1632 PetscFunctionReturn(0); 1633 } 1634 EXTERN_C_END 1635 1636 /*MC 1637 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1638 1639 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1640 and MATMPIDENSE otherwise. 1641 1642 Options Database Keys: 1643 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1644 1645 Level: beginner 1646 1647 1648 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1649 M*/ 1650 1651 EXTERN_C_BEGIN 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatCreate_Dense" 1654 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1655 { 1656 PetscErrorCode ierr; 1657 PetscMPIInt size; 1658 1659 PetscFunctionBegin; 1660 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1661 if (size == 1) { 1662 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1663 } else { 1664 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1665 } 1666 PetscFunctionReturn(0); 1667 } 1668 EXTERN_C_END 1669 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1672 /*@C 1673 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1674 1675 Not collective 1676 1677 Input Parameters: 1678 . A - the matrix 1679 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1680 to control all matrix memory allocation. 1681 1682 Notes: 1683 The dense format is fully compatible with standard Fortran 77 1684 storage by columns. 1685 1686 The data input variable is intended primarily for Fortran programmers 1687 who wish to allocate their own matrix memory space. Most users should 1688 set data=PETSC_NULL. 1689 1690 Level: intermediate 1691 1692 .keywords: matrix,dense, parallel 1693 1694 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1695 @*/ 1696 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1697 { 1698 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1699 1700 PetscFunctionBegin; 1701 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1702 if (f) { 1703 ierr = (*f)(mat,data);CHKERRQ(ierr); 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatCreateMPIDense" 1710 /*@C 1711 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1712 1713 Collective on MPI_Comm 1714 1715 Input Parameters: 1716 + comm - MPI communicator 1717 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1718 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1719 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1720 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1721 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1722 to control all matrix memory allocation. 1723 1724 Output Parameter: 1725 . A - the matrix 1726 1727 Notes: 1728 The dense format is fully compatible with standard Fortran 77 1729 storage by columns. 1730 1731 The data input variable is intended primarily for Fortran programmers 1732 who wish to allocate their own matrix memory space. Most users should 1733 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1734 1735 The user MUST specify either the local or global matrix dimensions 1736 (possibly both). 1737 1738 Level: intermediate 1739 1740 .keywords: matrix,dense, parallel 1741 1742 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1743 @*/ 1744 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1745 { 1746 PetscErrorCode ierr; 1747 PetscMPIInt size; 1748 1749 PetscFunctionBegin; 1750 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1751 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1752 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1753 if (size > 1) { 1754 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1755 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1756 } else { 1757 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1758 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1759 } 1760 PetscFunctionReturn(0); 1761 } 1762 1763 #undef __FUNCT__ 1764 #define __FUNCT__ "MatDuplicate_MPIDense" 1765 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1766 { 1767 Mat mat; 1768 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1769 PetscErrorCode ierr; 1770 1771 PetscFunctionBegin; 1772 *newmat = 0; 1773 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1774 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1775 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1776 a = (Mat_MPIDense*)mat->data; 1777 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1778 mat->factor = A->factor; 1779 mat->assembled = PETSC_TRUE; 1780 mat->preallocated = PETSC_TRUE; 1781 1782 mat->rmap->rstart = A->rmap->rstart; 1783 mat->rmap->rend = A->rmap->rend; 1784 a->size = oldmat->size; 1785 a->rank = oldmat->rank; 1786 mat->insertmode = NOT_SET_VALUES; 1787 a->nvec = oldmat->nvec; 1788 a->donotstash = oldmat->donotstash; 1789 1790 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1791 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1792 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1793 1794 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1795 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1796 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1797 1798 *newmat = mat; 1799 PetscFunctionReturn(0); 1800 } 1801 1802 #include "petscsys.h" 1803 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1806 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat) 1807 { 1808 PetscErrorCode ierr; 1809 PetscMPIInt rank,size; 1810 PetscInt *rowners,i,m,nz,j; 1811 PetscScalar *array,*vals,*vals_ptr; 1812 MPI_Status status; 1813 1814 PetscFunctionBegin; 1815 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1816 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1817 1818 /* determine ownership of all rows */ 1819 m = M/size + ((M % size) > rank); 1820 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1821 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1822 rowners[0] = 0; 1823 for (i=2; i<=size; i++) { 1824 rowners[i] += rowners[i-1]; 1825 } 1826 1827 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1828 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1829 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1830 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1831 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1832 1833 if (!rank) { 1834 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1835 1836 /* read in my part of the matrix numerical values */ 1837 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1838 1839 /* insert into matrix-by row (this is why cannot directly read into array */ 1840 vals_ptr = vals; 1841 for (i=0; i<m; i++) { 1842 for (j=0; j<N; j++) { 1843 array[i + j*m] = *vals_ptr++; 1844 } 1845 } 1846 1847 /* read in other processors and ship out */ 1848 for (i=1; i<size; i++) { 1849 nz = (rowners[i+1] - rowners[i])*N; 1850 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1851 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1852 } 1853 } else { 1854 /* receive numeric values */ 1855 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1856 1857 /* receive message of values*/ 1858 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1859 1860 /* insert into matrix-by row (this is why cannot directly read into array */ 1861 vals_ptr = vals; 1862 for (i=0; i<m; i++) { 1863 for (j=0; j<N; j++) { 1864 array[i + j*m] = *vals_ptr++; 1865 } 1866 } 1867 } 1868 ierr = PetscFree(rowners);CHKERRQ(ierr); 1869 ierr = PetscFree(vals);CHKERRQ(ierr); 1870 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1871 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNCT__ 1876 #define __FUNCT__ "MatLoad_MPIDense" 1877 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1878 { 1879 Mat A; 1880 PetscScalar *vals,*svals; 1881 MPI_Comm comm = ((PetscObject)viewer)->comm; 1882 MPI_Status status; 1883 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1884 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1885 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1886 PetscInt i,nz,j,rstart,rend; 1887 int fd; 1888 PetscErrorCode ierr; 1889 1890 PetscFunctionBegin; 1891 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1892 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1893 if (!rank) { 1894 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1895 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1896 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1897 } 1898 1899 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1900 M = header[1]; N = header[2]; nz = header[3]; 1901 1902 /* 1903 Handle case where matrix is stored on disk as a dense matrix 1904 */ 1905 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1906 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 /* determine ownership of all rows */ 1911 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1912 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1913 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1914 rowners[0] = 0; 1915 for (i=2; i<=size; i++) { 1916 rowners[i] += rowners[i-1]; 1917 } 1918 rstart = rowners[rank]; 1919 rend = rowners[rank+1]; 1920 1921 /* distribute row lengths to all processors */ 1922 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1923 offlens = ourlens + (rend-rstart); 1924 if (!rank) { 1925 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1926 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1927 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1928 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1929 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1930 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1931 } else { 1932 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1933 } 1934 1935 if (!rank) { 1936 /* calculate the number of nonzeros on each processor */ 1937 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1938 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1939 for (i=0; i<size; i++) { 1940 for (j=rowners[i]; j< rowners[i+1]; j++) { 1941 procsnz[i] += rowlengths[j]; 1942 } 1943 } 1944 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1945 1946 /* determine max buffer needed and allocate it */ 1947 maxnz = 0; 1948 for (i=0; i<size; i++) { 1949 maxnz = PetscMax(maxnz,procsnz[i]); 1950 } 1951 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1952 1953 /* read in my part of the matrix column indices */ 1954 nz = procsnz[0]; 1955 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1956 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1957 1958 /* read in every one elses and ship off */ 1959 for (i=1; i<size; i++) { 1960 nz = procsnz[i]; 1961 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1962 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1963 } 1964 ierr = PetscFree(cols);CHKERRQ(ierr); 1965 } else { 1966 /* determine buffer space needed for message */ 1967 nz = 0; 1968 for (i=0; i<m; i++) { 1969 nz += ourlens[i]; 1970 } 1971 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1972 1973 /* receive message of column indices*/ 1974 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1975 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1976 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1977 } 1978 1979 /* loop over local rows, determining number of off diagonal entries */ 1980 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1981 jj = 0; 1982 for (i=0; i<m; i++) { 1983 for (j=0; j<ourlens[i]; j++) { 1984 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1985 jj++; 1986 } 1987 } 1988 1989 /* create our matrix */ 1990 for (i=0; i<m; i++) { 1991 ourlens[i] -= offlens[i]; 1992 } 1993 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1994 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1995 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1996 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1997 A = *newmat; 1998 for (i=0; i<m; i++) { 1999 ourlens[i] += offlens[i]; 2000 } 2001 2002 if (!rank) { 2003 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2004 2005 /* read in my part of the matrix numerical values */ 2006 nz = procsnz[0]; 2007 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2008 2009 /* insert into matrix */ 2010 jj = rstart; 2011 smycols = mycols; 2012 svals = vals; 2013 for (i=0; i<m; i++) { 2014 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2015 smycols += ourlens[i]; 2016 svals += ourlens[i]; 2017 jj++; 2018 } 2019 2020 /* read in other processors and ship out */ 2021 for (i=1; i<size; i++) { 2022 nz = procsnz[i]; 2023 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2024 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2025 } 2026 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2027 } else { 2028 /* receive numeric values */ 2029 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2030 2031 /* receive message of values*/ 2032 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2033 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2034 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2035 2036 /* insert into matrix */ 2037 jj = rstart; 2038 smycols = mycols; 2039 svals = vals; 2040 for (i=0; i<m; i++) { 2041 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2042 smycols += ourlens[i]; 2043 svals += ourlens[i]; 2044 jj++; 2045 } 2046 } 2047 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2048 ierr = PetscFree(vals);CHKERRQ(ierr); 2049 ierr = PetscFree(mycols);CHKERRQ(ierr); 2050 ierr = PetscFree(rowners);CHKERRQ(ierr); 2051 2052 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2053 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2054 PetscFunctionReturn(0); 2055 } 2056 2057 #undef __FUNCT__ 2058 #define __FUNCT__ "MatEqual_MPIDense" 2059 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2060 { 2061 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2062 Mat a,b; 2063 PetscTruth flg; 2064 PetscErrorCode ierr; 2065 2066 PetscFunctionBegin; 2067 a = matA->A; 2068 b = matB->A; 2069 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2070 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #if defined(PETSC_HAVE_PLAPACK) 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2078 /*@C 2079 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2080 Level: developer 2081 2082 .keywords: Petsc, destroy, package, PLAPACK 2083 .seealso: PetscFinalize() 2084 @*/ 2085 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2086 { 2087 PetscErrorCode ierr; 2088 2089 PetscFunctionBegin; 2090 ierr = PLA_Finalize();CHKERRQ(ierr); 2091 PetscFunctionReturn(0); 2092 } 2093 2094 #undef __FUNCT__ 2095 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2096 /*@C 2097 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2098 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2099 2100 Input Parameter: 2101 . comm - the communicator the matrix lives on 2102 2103 Level: developer 2104 2105 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2106 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2107 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2108 cannot overlap. 2109 2110 .keywords: Petsc, initialize, package, PLAPACK 2111 .seealso: PetscInitializePackage(), PetscInitialize() 2112 @*/ 2113 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2114 { 2115 PetscMPIInt size; 2116 PetscErrorCode ierr; 2117 2118 PetscFunctionBegin; 2119 if (!PLA_Initialized(PETSC_NULL)) { 2120 2121 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2122 Plapack_nprows = 1; 2123 Plapack_npcols = size; 2124 2125 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2126 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2127 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2128 #if defined(PETSC_USE_DEBUG) 2129 Plapack_ierror = 3; 2130 #else 2131 Plapack_ierror = 0; 2132 #endif 2133 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2134 if (Plapack_ierror){ 2135 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2136 } else { 2137 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2138 } 2139 2140 Plapack_nb_alg = 0; 2141 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2142 if (Plapack_nb_alg) { 2143 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2144 } 2145 PetscOptionsEnd(); 2146 2147 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2148 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2149 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2150 } 2151 PetscFunctionReturn(0); 2152 } 2153 2154 #endif 2155