1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 #endif 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatDenseGetLocalMatrix" 16 /*@ 17 18 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 19 matrix that represents the operator. For sequential matrices it returns itself. 20 21 Input Parameter: 22 . A - the Seq or MPI dense matrix 23 24 Output Parameter: 25 . B - the inner matrix 26 27 Level: intermediate 28 29 @*/ 30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 31 { 32 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 33 PetscErrorCode ierr; 34 PetscTruth flg; 35 36 PetscFunctionBegin; 37 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 38 if (flg) { 39 *B = mat->A; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatGetRow_MPIDense" 48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 49 { 50 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 51 PetscErrorCode ierr; 52 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 53 54 PetscFunctionBegin; 55 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 56 lrow = row - rstart; 57 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRestoreRow_MPIDense" 63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 64 { 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 69 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 70 PetscFunctionReturn(0); 71 } 72 73 EXTERN_C_BEGIN 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 79 PetscErrorCode ierr; 80 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 81 PetscScalar *array; 82 MPI_Comm comm; 83 84 PetscFunctionBegin; 85 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 86 87 /* The reuse aspect is not implemented efficiently */ 88 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 89 90 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 91 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatCreate(comm,B);CHKERRQ(ierr); 93 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 94 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 95 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 96 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 97 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 98 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 99 100 *iscopy = PETSC_TRUE; 101 PetscFunctionReturn(0); 102 } 103 EXTERN_C_END 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIDense" 107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 110 PetscErrorCode ierr; 111 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 112 PetscTruth roworiented = A->roworiented; 113 114 PetscFunctionBegin; 115 for (i=0; i<m; i++) { 116 if (idxm[i] < 0) continue; 117 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 118 if (idxm[i] >= rstart && idxm[i] < rend) { 119 row = idxm[i] - rstart; 120 if (roworiented) { 121 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 122 } else { 123 for (j=0; j<n; j++) { 124 if (idxn[j] < 0) continue; 125 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 126 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 127 } 128 } 129 } else { 130 if (!A->donotstash) { 131 if (roworiented) { 132 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 133 } else { 134 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 135 } 136 } 137 } 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetValues_MPIDense" 144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 145 { 146 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 147 PetscErrorCode ierr; 148 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 149 150 PetscFunctionBegin; 151 for (i=0; i<m; i++) { 152 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 153 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 154 if (idxm[i] >= rstart && idxm[i] < rend) { 155 row = idxm[i] - rstart; 156 for (j=0; j<n; j++) { 157 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 158 if (idxn[j] >= mat->cmap->N) { 159 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 160 } 161 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 162 } 163 } else { 164 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 165 } 166 } 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatGetArray_MPIDense" 172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 173 { 174 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 175 PetscErrorCode ierr; 176 177 PetscFunctionBegin; 178 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 185 { 186 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 187 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 188 PetscErrorCode ierr; 189 PetscInt i,j,rstart,rend,nrows,ncols,nlrows,nlcols; 190 const PetscInt *irow,*icol; 191 PetscScalar *av,*bv,*v = lmat->v; 192 Mat newmat; 193 194 PetscFunctionBegin; 195 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 196 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 197 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 198 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 199 200 /* No parallel redistribution currently supported! Should really check each index set 201 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 202 original matrix! */ 203 204 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 205 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 206 207 /* Check submatrix call */ 208 if (scall == MAT_REUSE_MATRIX) { 209 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 210 /* Really need to test rows and column sizes! */ 211 newmat = *B; 212 } else { 213 /* Create and fill new matrix */ 214 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 215 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 216 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 217 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 218 } 219 220 /* Now extract the data pointers and do the copy, column at a time */ 221 newmatd = (Mat_MPIDense*)newmat->data; 222 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 223 224 for (i=0; i<ncols; i++) { 225 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 226 for (j=0; j<nrows; j++) { 227 *bv++ = av[irow[j] - rstart]; 228 } 229 } 230 231 /* Assemble the matrices so that the correct flags are set */ 232 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 234 235 /* Free work space */ 236 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 237 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 238 *B = newmat; 239 PetscFunctionReturn(0); 240 } 241 242 #undef __FUNCT__ 243 #define __FUNCT__ "MatRestoreArray_MPIDense" 244 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 245 { 246 PetscFunctionBegin; 247 PetscFunctionReturn(0); 248 } 249 250 #undef __FUNCT__ 251 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 252 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 253 { 254 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 255 MPI_Comm comm = ((PetscObject)mat)->comm; 256 PetscErrorCode ierr; 257 PetscInt nstash,reallocs; 258 InsertMode addv; 259 260 PetscFunctionBegin; 261 /* make sure all processors are either in INSERTMODE or ADDMODE */ 262 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 263 if (addv == (ADD_VALUES|INSERT_VALUES)) { 264 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 265 } 266 mat->insertmode = addv; /* in case this processor had no cache */ 267 268 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 269 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 270 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 276 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 277 { 278 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 279 PetscErrorCode ierr; 280 PetscInt i,*row,*col,flg,j,rstart,ncols; 281 PetscMPIInt n; 282 PetscScalar *val; 283 InsertMode addv=mat->insertmode; 284 285 PetscFunctionBegin; 286 /* wait on receives */ 287 while (1) { 288 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 289 if (!flg) break; 290 291 for (i=0; i<n;) { 292 /* Now identify the consecutive vals belonging to the same row */ 293 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 294 if (j < n) ncols = j-i; 295 else ncols = n-i; 296 /* Now assemble all these values with a single function call */ 297 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 298 i = j; 299 } 300 } 301 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 302 303 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 304 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 305 306 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 307 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 308 } 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatZeroEntries_MPIDense" 314 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 315 { 316 PetscErrorCode ierr; 317 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 318 319 PetscFunctionBegin; 320 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 324 /* the code does not do the diagonal entries correctly unless the 325 matrix is square and the column and row owerships are identical. 326 This is a BUG. The only way to fix it seems to be to access 327 mdn->A and mdn->B directly and not through the MatZeroRows() 328 routine. 329 */ 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatZeroRows_MPIDense" 332 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 333 { 334 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 335 PetscErrorCode ierr; 336 PetscInt i,*owners = A->rmap->range; 337 PetscInt *nprocs,j,idx,nsends; 338 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 339 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 340 PetscInt *lens,*lrows,*values; 341 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 342 MPI_Comm comm = ((PetscObject)A)->comm; 343 MPI_Request *send_waits,*recv_waits; 344 MPI_Status recv_status,*send_status; 345 PetscTruth found; 346 347 PetscFunctionBegin; 348 /* first count number of contributors to each processor */ 349 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 350 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 352 for (i=0; i<N; i++) { 353 idx = rows[i]; 354 found = PETSC_FALSE; 355 for (j=0; j<size; j++) { 356 if (idx >= owners[j] && idx < owners[j+1]) { 357 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 358 } 359 } 360 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 361 } 362 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 363 364 /* inform other processors of number of messages and max length*/ 365 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 366 367 /* post receives: */ 368 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 369 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 370 for (i=0; i<nrecvs; i++) { 371 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 372 } 373 374 /* do sends: 375 1) starts[i] gives the starting index in svalues for stuff going to 376 the ith processor 377 */ 378 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 379 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 380 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 381 starts[0] = 0; 382 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 383 for (i=0; i<N; i++) { 384 svalues[starts[owner[i]]++] = rows[i]; 385 } 386 387 starts[0] = 0; 388 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 389 count = 0; 390 for (i=0; i<size; i++) { 391 if (nprocs[2*i+1]) { 392 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 393 } 394 } 395 ierr = PetscFree(starts);CHKERRQ(ierr); 396 397 base = owners[rank]; 398 399 /* wait on receives */ 400 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 401 source = lens + nrecvs; 402 count = nrecvs; slen = 0; 403 while (count) { 404 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 405 /* unpack receives into our local space */ 406 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 407 source[imdex] = recv_status.MPI_SOURCE; 408 lens[imdex] = n; 409 slen += n; 410 count--; 411 } 412 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 413 414 /* move the data into the send scatter */ 415 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 416 count = 0; 417 for (i=0; i<nrecvs; i++) { 418 values = rvalues + i*nmax; 419 for (j=0; j<lens[i]; j++) { 420 lrows[count++] = values[j] - base; 421 } 422 } 423 ierr = PetscFree(rvalues);CHKERRQ(ierr); 424 ierr = PetscFree(lens);CHKERRQ(ierr); 425 ierr = PetscFree(owner);CHKERRQ(ierr); 426 ierr = PetscFree(nprocs);CHKERRQ(ierr); 427 428 /* actually zap the local rows */ 429 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 430 ierr = PetscFree(lrows);CHKERRQ(ierr); 431 432 /* wait on sends */ 433 if (nsends) { 434 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 435 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 436 ierr = PetscFree(send_status);CHKERRQ(ierr); 437 } 438 ierr = PetscFree(send_waits);CHKERRQ(ierr); 439 ierr = PetscFree(svalues);CHKERRQ(ierr); 440 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatMult_MPIDense" 446 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 447 { 448 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 449 PetscErrorCode ierr; 450 451 PetscFunctionBegin; 452 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 453 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 454 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatMultAdd_MPIDense" 460 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 461 { 462 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 463 PetscErrorCode ierr; 464 465 PetscFunctionBegin; 466 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 468 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatMultTranspose_MPIDense" 474 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 475 { 476 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 477 PetscErrorCode ierr; 478 PetscScalar zero = 0.0; 479 480 PetscFunctionBegin; 481 ierr = VecSet(yy,zero);CHKERRQ(ierr); 482 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 483 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 484 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNCT__ 489 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 491 { 492 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 493 PetscErrorCode ierr; 494 495 PetscFunctionBegin; 496 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 497 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 498 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 499 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 500 PetscFunctionReturn(0); 501 } 502 503 #undef __FUNCT__ 504 #define __FUNCT__ "MatGetDiagonal_MPIDense" 505 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 506 { 507 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 508 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 509 PetscErrorCode ierr; 510 PetscInt len,i,n,m = A->rmap->n,radd; 511 PetscScalar *x,zero = 0.0; 512 513 PetscFunctionBegin; 514 ierr = VecSet(v,zero);CHKERRQ(ierr); 515 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 516 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 517 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 518 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 519 radd = A->rmap->rstart*m; 520 for (i=0; i<len; i++) { 521 x[i] = aloc->v[radd + i*m + i]; 522 } 523 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 524 PetscFunctionReturn(0); 525 } 526 527 #undef __FUNCT__ 528 #define __FUNCT__ "MatDestroy_MPIDense" 529 PetscErrorCode MatDestroy_MPIDense(Mat mat) 530 { 531 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 532 PetscErrorCode ierr; 533 #if defined(PETSC_HAVE_PLAPACK) 534 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 535 #endif 536 537 PetscFunctionBegin; 538 539 #if defined(PETSC_USE_LOG) 540 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 541 #endif 542 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 543 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 544 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 545 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 546 #if defined(PETSC_HAVE_PLAPACK) 547 if (lu) { 548 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 549 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 550 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 551 552 if (lu->is_pla) { 553 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 554 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 555 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 556 } 557 } 558 #endif 559 560 ierr = PetscFree(mdn);CHKERRQ(ierr); 561 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 563 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNCT__ 571 #define __FUNCT__ "MatView_MPIDense_Binary" 572 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 573 { 574 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 575 PetscErrorCode ierr; 576 PetscViewerFormat format; 577 int fd; 578 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 579 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 580 PetscScalar *work,*v,*vv; 581 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 582 MPI_Status status; 583 584 PetscFunctionBegin; 585 if (mdn->size == 1) { 586 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 587 } else { 588 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 589 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 590 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 591 592 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 593 if (format == PETSC_VIEWER_NATIVE) { 594 595 if (!rank) { 596 /* store the matrix as a dense matrix */ 597 header[0] = MAT_FILE_COOKIE; 598 header[1] = mat->rmap->N; 599 header[2] = N; 600 header[3] = MATRIX_BINARY_FORMAT_DENSE; 601 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 602 603 /* get largest work array needed for transposing array */ 604 mmax = mat->rmap->n; 605 for (i=1; i<size; i++) { 606 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 607 } 608 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 609 610 /* write out local array, by rows */ 611 m = mat->rmap->n; 612 v = a->v; 613 for (j=0; j<N; j++) { 614 for (i=0; i<m; i++) { 615 work[j + i*N] = *v++; 616 } 617 } 618 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 619 /* get largest work array to receive messages from other processes, excludes process zero */ 620 mmax = 0; 621 for (i=1; i<size; i++) { 622 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 623 } 624 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 625 for(k = 1; k < size; k++) { 626 v = vv; 627 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 628 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 629 630 for(j = 0; j < N; j++) { 631 for(i = 0; i < m; i++) { 632 work[j + i*N] = *v++; 633 } 634 } 635 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 636 } 637 ierr = PetscFree(work);CHKERRQ(ierr); 638 ierr = PetscFree(vv);CHKERRQ(ierr); 639 } else { 640 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 641 } 642 } else { 643 SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE"); 644 } 645 } 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 651 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 652 { 653 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 654 PetscErrorCode ierr; 655 PetscMPIInt size = mdn->size,rank = mdn->rank; 656 const PetscViewerType vtype; 657 PetscTruth iascii,isdraw; 658 PetscViewer sviewer; 659 PetscViewerFormat format; 660 #if defined(PETSC_HAVE_PLAPACK) 661 Mat_Plapack *lu; 662 #endif 663 664 PetscFunctionBegin; 665 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 666 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 667 if (iascii) { 668 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 669 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 670 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 671 MatInfo info; 672 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 673 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 674 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 675 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 676 #if defined(PETSC_HAVE_PLAPACK) 677 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 678 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 679 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 680 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 681 if (mat->factor){ 682 lu=(Mat_Plapack*)(mat->spptr); 683 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 684 } 685 #else 686 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 687 #endif 688 PetscFunctionReturn(0); 689 } else if (format == PETSC_VIEWER_ASCII_INFO) { 690 PetscFunctionReturn(0); 691 } 692 } else if (isdraw) { 693 PetscDraw draw; 694 PetscTruth isnull; 695 696 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 697 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 698 if (isnull) PetscFunctionReturn(0); 699 } 700 701 if (size == 1) { 702 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 703 } else { 704 /* assemble the entire matrix onto first processor. */ 705 Mat A; 706 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 707 PetscInt *cols; 708 PetscScalar *vals; 709 710 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 711 if (!rank) { 712 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 713 } else { 714 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 715 } 716 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 717 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 718 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 719 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 720 721 /* Copy the matrix ... This isn't the most efficient means, 722 but it's quick for now */ 723 A->insertmode = INSERT_VALUES; 724 row = mat->rmap->rstart; m = mdn->A->rmap->n; 725 for (i=0; i<m; i++) { 726 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 727 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 728 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 729 row++; 730 } 731 732 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 733 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 734 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 735 if (!rank) { 736 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 737 } 738 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 739 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 740 ierr = MatDestroy(A);CHKERRQ(ierr); 741 } 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatView_MPIDense" 747 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 748 { 749 PetscErrorCode ierr; 750 PetscTruth iascii,isbinary,isdraw,issocket; 751 752 PetscFunctionBegin; 753 754 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 755 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 756 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 757 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 758 759 if (iascii || issocket || isdraw) { 760 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 761 } else if (isbinary) { 762 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 763 } else { 764 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 765 } 766 PetscFunctionReturn(0); 767 } 768 769 #undef __FUNCT__ 770 #define __FUNCT__ "MatGetInfo_MPIDense" 771 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 772 { 773 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 774 Mat mdn = mat->A; 775 PetscErrorCode ierr; 776 PetscReal isend[5],irecv[5]; 777 778 PetscFunctionBegin; 779 info->block_size = 1.0; 780 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 781 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 782 isend[3] = info->memory; isend[4] = info->mallocs; 783 if (flag == MAT_LOCAL) { 784 info->nz_used = isend[0]; 785 info->nz_allocated = isend[1]; 786 info->nz_unneeded = isend[2]; 787 info->memory = isend[3]; 788 info->mallocs = isend[4]; 789 } else if (flag == MAT_GLOBAL_MAX) { 790 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 791 info->nz_used = irecv[0]; 792 info->nz_allocated = irecv[1]; 793 info->nz_unneeded = irecv[2]; 794 info->memory = irecv[3]; 795 info->mallocs = irecv[4]; 796 } else if (flag == MAT_GLOBAL_SUM) { 797 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 798 info->nz_used = irecv[0]; 799 info->nz_allocated = irecv[1]; 800 info->nz_unneeded = irecv[2]; 801 info->memory = irecv[3]; 802 info->mallocs = irecv[4]; 803 } 804 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 805 info->fill_ratio_needed = 0; 806 info->factor_mallocs = 0; 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatSetOption_MPIDense" 812 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 813 { 814 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 815 PetscErrorCode ierr; 816 817 PetscFunctionBegin; 818 switch (op) { 819 case MAT_NEW_NONZERO_LOCATIONS: 820 case MAT_NEW_NONZERO_LOCATION_ERR: 821 case MAT_NEW_NONZERO_ALLOCATION_ERR: 822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 823 break; 824 case MAT_ROW_ORIENTED: 825 a->roworiented = flg; 826 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 827 break; 828 case MAT_NEW_DIAGONALS: 829 case MAT_USE_HASH_TABLE: 830 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 831 break; 832 case MAT_IGNORE_OFF_PROC_ENTRIES: 833 a->donotstash = flg; 834 break; 835 case MAT_SYMMETRIC: 836 case MAT_STRUCTURALLY_SYMMETRIC: 837 case MAT_HERMITIAN: 838 case MAT_SYMMETRY_ETERNAL: 839 case MAT_IGNORE_LOWER_TRIANGULAR: 840 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 841 break; 842 default: 843 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 844 } 845 PetscFunctionReturn(0); 846 } 847 848 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatDiagonalScale_MPIDense" 851 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 852 { 853 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 854 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 855 PetscScalar *l,*r,x,*v; 856 PetscErrorCode ierr; 857 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 858 859 PetscFunctionBegin; 860 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 861 if (ll) { 862 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 863 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 864 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 865 for (i=0; i<m; i++) { 866 x = l[i]; 867 v = mat->v + i; 868 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 869 } 870 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 871 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 872 } 873 if (rr) { 874 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 875 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 876 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 879 for (i=0; i<n; i++) { 880 x = r[i]; 881 v = mat->v + i*m; 882 for (j=0; j<m; j++) { (*v++) *= x;} 883 } 884 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 885 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 886 } 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatNorm_MPIDense" 892 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 893 { 894 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 895 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 896 PetscErrorCode ierr; 897 PetscInt i,j; 898 PetscReal sum = 0.0; 899 PetscScalar *v = mat->v; 900 901 PetscFunctionBegin; 902 if (mdn->size == 1) { 903 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 904 } else { 905 if (type == NORM_FROBENIUS) { 906 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 907 #if defined(PETSC_USE_COMPLEX) 908 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 909 #else 910 sum += (*v)*(*v); v++; 911 #endif 912 } 913 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 914 *nrm = sqrt(*nrm); 915 ierr = PetscLogFlops(2*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 916 } else if (type == NORM_1) { 917 PetscReal *tmp,*tmp2; 918 ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 919 tmp2 = tmp + A->cmap->N; 920 ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 921 *nrm = 0.0; 922 v = mat->v; 923 for (j=0; j<mdn->A->cmap->n; j++) { 924 for (i=0; i<mdn->A->rmap->n; i++) { 925 tmp[j] += PetscAbsScalar(*v); v++; 926 } 927 } 928 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 929 for (j=0; j<A->cmap->N; j++) { 930 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 931 } 932 ierr = PetscFree(tmp);CHKERRQ(ierr); 933 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 934 } else if (type == NORM_INFINITY) { /* max row norm */ 935 PetscReal ntemp; 936 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 937 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 938 } else { 939 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 940 } 941 } 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatTranspose_MPIDense" 947 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 948 { 949 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 950 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 951 Mat B; 952 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 953 PetscErrorCode ierr; 954 PetscInt j,i; 955 PetscScalar *v; 956 957 PetscFunctionBegin; 958 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 959 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 960 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 961 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 962 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 963 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 964 } else { 965 B = *matout; 966 } 967 968 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 969 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 970 for (i=0; i<m; i++) rwork[i] = rstart + i; 971 for (j=0; j<n; j++) { 972 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 973 v += m; 974 } 975 ierr = PetscFree(rwork);CHKERRQ(ierr); 976 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 979 *matout = B; 980 } else { 981 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 982 } 983 PetscFunctionReturn(0); 984 } 985 986 #include "petscblaslapack.h" 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatScale_MPIDense" 989 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 990 { 991 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 992 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 993 PetscScalar oalpha = alpha; 994 PetscErrorCode ierr; 995 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N); 996 997 PetscFunctionBegin; 998 BLASscal_(&nz,&oalpha,a->v,&one); 999 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1007 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1008 { 1009 PetscErrorCode ierr; 1010 1011 PetscFunctionBegin; 1012 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 #if defined(PETSC_HAVE_PLAPACK) 1017 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1020 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1021 { 1022 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1023 PetscErrorCode ierr; 1024 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1025 PetscScalar *array; 1026 PetscReal one = 1.0; 1027 1028 PetscFunctionBegin; 1029 /* Copy A into F->lu->A */ 1030 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1031 ierr = PLA_API_begin();CHKERRQ(ierr); 1032 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1033 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1034 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1035 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1036 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1037 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1038 ierr = PLA_API_end();CHKERRQ(ierr); 1039 lu->rstart = rstart; 1040 PetscFunctionReturn(0); 1041 } 1042 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1045 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1046 { 1047 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1048 PetscErrorCode ierr; 1049 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1050 PetscScalar *array; 1051 PetscReal one = 1.0; 1052 1053 PetscFunctionBegin; 1054 /* Copy F into A->lu->A */ 1055 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1056 ierr = PLA_API_begin();CHKERRQ(ierr); 1057 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1058 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1059 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1060 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1061 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1062 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1063 ierr = PLA_API_end();CHKERRQ(ierr); 1064 lu->rstart = rstart; 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1070 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1071 { 1072 PetscErrorCode ierr; 1073 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1074 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1075 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1076 PLA_Obj alpha = NULL,beta = NULL; 1077 1078 PetscFunctionBegin; 1079 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1080 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1081 1082 /* 1083 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1084 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1085 */ 1086 1087 /* do the multiply in PLA */ 1088 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1089 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1090 CHKMEMQ; 1091 1092 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1093 CHKMEMQ; 1094 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1095 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1096 1097 /* 1098 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1099 */ 1100 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1106 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1107 { 1108 PetscErrorCode ierr; 1109 PetscInt m=A->rmap->n,n=B->cmap->n; 1110 Mat Cmat; 1111 1112 PetscFunctionBegin; 1113 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1114 SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported"); 1115 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1116 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1117 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1118 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1119 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1120 1121 *C = Cmat; 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1127 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1128 { 1129 PetscErrorCode ierr; 1130 1131 PetscFunctionBegin; 1132 if (scall == MAT_INITIAL_MATRIX){ 1133 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1134 } 1135 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1136 PetscFunctionReturn(0); 1137 } 1138 1139 #undef __FUNCT__ 1140 #define __FUNCT__ "MatSolve_MPIDense" 1141 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1142 { 1143 MPI_Comm comm = ((PetscObject)A)->comm; 1144 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1145 PetscErrorCode ierr; 1146 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1147 PetscScalar *array; 1148 PetscReal one = 1.0; 1149 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1150 PLA_Obj v_pla = NULL; 1151 PetscScalar *loc_buf; 1152 Vec loc_x; 1153 1154 PetscFunctionBegin; 1155 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1156 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1157 1158 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1159 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1160 PLA_Obj_set_to_zero(v_pla); 1161 1162 /* Copy b into rhs_pla */ 1163 PLA_API_begin(); 1164 PLA_Obj_API_open(v_pla); 1165 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1166 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1167 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1168 PLA_Obj_API_close(v_pla); 1169 PLA_API_end(); 1170 1171 if (A->factor == MAT_FACTOR_LU){ 1172 /* Apply the permutations to the right hand sides */ 1173 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1174 1175 /* Solve L y = b, overwriting b with y */ 1176 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1177 1178 /* Solve U x = y (=b), overwriting b with x */ 1179 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1180 } else { /* MAT_FACTOR_CHOLESKY */ 1181 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1182 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1183 PLA_NONUNIT_DIAG,lu->A,v_pla); 1184 } 1185 1186 /* Copy PLAPACK x into Petsc vector x */ 1187 PLA_Obj_local_length(v_pla, &loc_m); 1188 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1189 PLA_Obj_local_stride(v_pla, &loc_stride); 1190 /* 1191 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1192 */ 1193 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1194 if (!lu->pla_solved){ 1195 1196 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1197 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1198 1199 /* Create IS and cts for VecScatterring */ 1200 PLA_Obj_local_length(v_pla, &loc_m); 1201 PLA_Obj_local_stride(v_pla, &loc_stride); 1202 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1203 idx_petsc = idx_pla + loc_m; 1204 1205 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1206 for (i=0; i<loc_m; i+=lu->nb){ 1207 j = 0; 1208 while (j < lu->nb && i+j < loc_m){ 1209 idx_petsc[i+j] = rstart + j; j++; 1210 } 1211 rstart += size*lu->nb; 1212 } 1213 1214 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1215 1216 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1217 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1218 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1219 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1220 } 1221 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1222 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 1224 /* Free data */ 1225 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1226 PLA_Obj_free(&v_pla); 1227 1228 lu->pla_solved = PETSC_TRUE; 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1234 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1235 { 1236 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1237 PetscErrorCode ierr; 1238 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1239 PetscInt info_pla=0; 1240 PetscScalar *array,one = 1.0; 1241 1242 PetscFunctionBegin; 1243 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1244 PLA_Obj_free(&lu->A); 1245 PLA_Obj_free (&lu->pivots); 1246 } 1247 /* Create PLAPACK matrix object */ 1248 lu->A = NULL; lu->pivots = NULL; 1249 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1250 PLA_Obj_set_to_zero(lu->A); 1251 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1252 1253 /* Copy A into lu->A */ 1254 PLA_API_begin(); 1255 PLA_Obj_API_open(lu->A); 1256 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1257 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1258 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1259 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1260 PLA_Obj_API_close(lu->A); 1261 PLA_API_end(); 1262 1263 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1264 info_pla = PLA_LU(lu->A,lu->pivots); 1265 if (info_pla != 0) 1266 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1267 1268 lu->rstart = rstart; 1269 lu->mstruct = SAME_NONZERO_PATTERN; 1270 F->ops->solve = MatSolve_MPIDense; 1271 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1277 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1278 { 1279 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1280 PetscErrorCode ierr; 1281 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1282 PetscInt info_pla=0; 1283 PetscScalar *array,one = 1.0; 1284 1285 PetscFunctionBegin; 1286 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1287 PLA_Obj_free(&lu->A); 1288 } 1289 /* Create PLAPACK matrix object */ 1290 lu->A = NULL; 1291 lu->pivots = NULL; 1292 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1293 1294 /* Copy A into lu->A */ 1295 PLA_API_begin(); 1296 PLA_Obj_API_open(lu->A); 1297 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1298 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1299 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1300 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1301 PLA_Obj_API_close(lu->A); 1302 PLA_API_end(); 1303 1304 /* Factor P A -> Chol */ 1305 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1306 if (info_pla != 0) 1307 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1308 1309 lu->rstart = rstart; 1310 lu->mstruct = SAME_NONZERO_PATTERN; 1311 F->ops->solve = MatSolve_MPIDense; 1312 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1313 PetscFunctionReturn(0); 1314 } 1315 1316 /* Note the Petsc perm permutation is ignored */ 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1319 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1320 { 1321 PetscErrorCode ierr; 1322 PetscTruth issymmetric,set; 1323 1324 PetscFunctionBegin; 1325 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1326 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1327 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1328 PetscFunctionReturn(0); 1329 } 1330 1331 /* Note the Petsc r and c permutations are ignored */ 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1334 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1335 { 1336 PetscErrorCode ierr; 1337 PetscInt M = A->rmap->N; 1338 Mat_Plapack *lu; 1339 1340 PetscFunctionBegin; 1341 lu = (Mat_Plapack*)F->spptr; 1342 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1343 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1344 PetscFunctionReturn(0); 1345 } 1346 1347 EXTERN_C_BEGIN 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1350 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1351 { 1352 PetscFunctionBegin; 1353 *type = MAT_SOLVER_PLAPACK; 1354 PetscFunctionReturn(0); 1355 } 1356 EXTERN_C_END 1357 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1360 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1361 { 1362 PetscErrorCode ierr; 1363 Mat_Plapack *lu; 1364 PetscMPIInt size; 1365 PetscInt M=A->rmap->N; 1366 1367 PetscFunctionBegin; 1368 /* Create the factorization matrix */ 1369 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1370 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1371 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1372 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1373 (*F)->spptr = (void*)lu; 1374 1375 /* Set default Plapack parameters */ 1376 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1377 lu->nb = M/size; 1378 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1379 1380 /* Set runtime options */ 1381 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1382 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1383 PetscOptionsEnd(); 1384 1385 /* Create object distribution template */ 1386 lu->templ = NULL; 1387 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1388 1389 /* Set the datatype */ 1390 #if defined(PETSC_USE_COMPLEX) 1391 lu->datatype = MPI_DOUBLE_COMPLEX; 1392 #else 1393 lu->datatype = MPI_DOUBLE; 1394 #endif 1395 1396 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1397 1398 1399 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1400 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1401 1402 if (ftype == MAT_FACTOR_LU) { 1403 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1404 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1405 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1406 } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1407 (*F)->factor = ftype; 1408 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 #endif 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1415 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1416 { 1417 #if defined(PETSC_HAVE_PLAPACK) 1418 PetscErrorCode ierr; 1419 #endif 1420 1421 PetscFunctionBegin; 1422 #if defined(PETSC_HAVE_PLAPACK) 1423 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1424 #else 1425 SETERRQ1(PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1426 #endif 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatAXPY_MPIDense" 1432 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1433 { 1434 PetscErrorCode ierr; 1435 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1436 1437 PetscFunctionBegin; 1438 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1439 PetscFunctionReturn(0); 1440 } 1441 1442 /* -------------------------------------------------------------------*/ 1443 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1444 MatGetRow_MPIDense, 1445 MatRestoreRow_MPIDense, 1446 MatMult_MPIDense, 1447 /* 4*/ MatMultAdd_MPIDense, 1448 MatMultTranspose_MPIDense, 1449 MatMultTransposeAdd_MPIDense, 1450 0, 1451 0, 1452 0, 1453 /*10*/ 0, 1454 0, 1455 0, 1456 0, 1457 MatTranspose_MPIDense, 1458 /*15*/ MatGetInfo_MPIDense, 1459 MatEqual_MPIDense, 1460 MatGetDiagonal_MPIDense, 1461 MatDiagonalScale_MPIDense, 1462 MatNorm_MPIDense, 1463 /*20*/ MatAssemblyBegin_MPIDense, 1464 MatAssemblyEnd_MPIDense, 1465 0, 1466 MatSetOption_MPIDense, 1467 MatZeroEntries_MPIDense, 1468 /*25*/ MatZeroRows_MPIDense, 1469 0, 1470 0, 1471 0, 1472 0, 1473 /*30*/ MatSetUpPreallocation_MPIDense, 1474 0, 1475 0, 1476 MatGetArray_MPIDense, 1477 MatRestoreArray_MPIDense, 1478 /*35*/ MatDuplicate_MPIDense, 1479 0, 1480 0, 1481 0, 1482 0, 1483 /*40*/ MatAXPY_MPIDense, 1484 MatGetSubMatrices_MPIDense, 1485 0, 1486 MatGetValues_MPIDense, 1487 0, 1488 /*45*/ 0, 1489 MatScale_MPIDense, 1490 0, 1491 0, 1492 0, 1493 /*50*/ 0, 1494 0, 1495 0, 1496 0, 1497 0, 1498 /*55*/ 0, 1499 0, 1500 0, 1501 0, 1502 0, 1503 /*60*/ MatGetSubMatrix_MPIDense, 1504 MatDestroy_MPIDense, 1505 MatView_MPIDense, 1506 0, 1507 0, 1508 /*65*/ 0, 1509 0, 1510 0, 1511 0, 1512 0, 1513 /*70*/ 0, 1514 0, 1515 0, 1516 0, 1517 0, 1518 /*75*/ 0, 1519 0, 1520 0, 1521 0, 1522 0, 1523 /*80*/ 0, 1524 0, 1525 0, 1526 0, 1527 /*84*/ MatLoad_MPIDense, 1528 0, 1529 0, 1530 0, 1531 0, 1532 0, 1533 /*90*/ 1534 #if defined(PETSC_HAVE_PLAPACK) 1535 MatMatMult_MPIDense_MPIDense, 1536 MatMatMultSymbolic_MPIDense_MPIDense, 1537 MatMatMultNumeric_MPIDense_MPIDense, 1538 #else 1539 0, 1540 0, 1541 0, 1542 #endif 1543 0, 1544 /*95*/ 0, 1545 0, 1546 0, 1547 0}; 1548 1549 EXTERN_C_BEGIN 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1552 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1553 { 1554 Mat_MPIDense *a; 1555 PetscErrorCode ierr; 1556 1557 PetscFunctionBegin; 1558 mat->preallocated = PETSC_TRUE; 1559 /* Note: For now, when data is specified above, this assumes the user correctly 1560 allocates the local dense storage space. We should add error checking. */ 1561 1562 a = (Mat_MPIDense*)mat->data; 1563 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1564 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1565 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1566 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1567 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1568 PetscFunctionReturn(0); 1569 } 1570 EXTERN_C_END 1571 1572 /*MC 1573 MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1574 1575 run config/configure.py with the option --download-plapack 1576 1577 1578 Options Database Keys: 1579 . -mat_plapack_nprows <n> - number of rows in processor partition 1580 . -mat_plapack_npcols <n> - number of columns in processor partition 1581 . -mat_plapack_nb <n> - block size of template vector 1582 . -mat_plapack_nb_alg <n> - algorithmic block size 1583 - -mat_plapack_ckerror <n> - error checking flag 1584 1585 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1586 1587 M*/ 1588 1589 EXTERN_C_BEGIN 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatCreate_MPIDense" 1592 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1593 { 1594 Mat_MPIDense *a; 1595 PetscErrorCode ierr; 1596 1597 PetscFunctionBegin; 1598 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1599 mat->data = (void*)a; 1600 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1601 mat->mapping = 0; 1602 1603 mat->insertmode = NOT_SET_VALUES; 1604 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1605 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1606 1607 ierr = PetscMapSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1608 ierr = PetscMapSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1609 ierr = PetscMapSetUp(mat->rmap);CHKERRQ(ierr); 1610 ierr = PetscMapSetUp(mat->cmap);CHKERRQ(ierr); 1611 a->nvec = mat->cmap->n; 1612 1613 /* build cache for off array entries formed */ 1614 a->donotstash = PETSC_FALSE; 1615 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1616 1617 /* stuff used for matrix vector multiply */ 1618 a->lvec = 0; 1619 a->Mvctx = 0; 1620 a->roworiented = PETSC_TRUE; 1621 1622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1623 "MatGetDiagonalBlock_MPIDense", 1624 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1626 "MatMPIDenseSetPreallocation_MPIDense", 1627 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1628 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1629 "MatMatMult_MPIAIJ_MPIDense", 1630 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1632 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1633 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1634 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1635 "MatMatMultNumeric_MPIAIJ_MPIDense", 1636 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1637 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_petsc_C", 1638 "MatGetFactor_mpidense_petsc", 1639 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1640 #if defined(PETSC_HAVE_PLAPACK) 1641 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_plapack_C", 1642 "MatGetFactor_mpidense_plapack", 1643 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1644 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1645 #endif 1646 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1647 1648 PetscFunctionReturn(0); 1649 } 1650 EXTERN_C_END 1651 1652 /*MC 1653 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1654 1655 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1656 and MATMPIDENSE otherwise. 1657 1658 Options Database Keys: 1659 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1660 1661 Level: beginner 1662 1663 1664 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1665 M*/ 1666 1667 EXTERN_C_BEGIN 1668 #undef __FUNCT__ 1669 #define __FUNCT__ "MatCreate_Dense" 1670 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1671 { 1672 PetscErrorCode ierr; 1673 PetscMPIInt size; 1674 1675 PetscFunctionBegin; 1676 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1677 if (size == 1) { 1678 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1679 } else { 1680 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1681 } 1682 PetscFunctionReturn(0); 1683 } 1684 EXTERN_C_END 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1688 /*@C 1689 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1690 1691 Not collective 1692 1693 Input Parameters: 1694 . A - the matrix 1695 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1696 to control all matrix memory allocation. 1697 1698 Notes: 1699 The dense format is fully compatible with standard Fortran 77 1700 storage by columns. 1701 1702 The data input variable is intended primarily for Fortran programmers 1703 who wish to allocate their own matrix memory space. Most users should 1704 set data=PETSC_NULL. 1705 1706 Level: intermediate 1707 1708 .keywords: matrix,dense, parallel 1709 1710 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1711 @*/ 1712 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1713 { 1714 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1715 1716 PetscFunctionBegin; 1717 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1718 if (f) { 1719 ierr = (*f)(mat,data);CHKERRQ(ierr); 1720 } 1721 PetscFunctionReturn(0); 1722 } 1723 1724 #undef __FUNCT__ 1725 #define __FUNCT__ "MatCreateMPIDense" 1726 /*@C 1727 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1728 1729 Collective on MPI_Comm 1730 1731 Input Parameters: 1732 + comm - MPI communicator 1733 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1734 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1735 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1736 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1737 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1738 to control all matrix memory allocation. 1739 1740 Output Parameter: 1741 . A - the matrix 1742 1743 Notes: 1744 The dense format is fully compatible with standard Fortran 77 1745 storage by columns. 1746 1747 The data input variable is intended primarily for Fortran programmers 1748 who wish to allocate their own matrix memory space. Most users should 1749 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1750 1751 The user MUST specify either the local or global matrix dimensions 1752 (possibly both). 1753 1754 Level: intermediate 1755 1756 .keywords: matrix,dense, parallel 1757 1758 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1759 @*/ 1760 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1761 { 1762 PetscErrorCode ierr; 1763 PetscMPIInt size; 1764 1765 PetscFunctionBegin; 1766 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1767 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1768 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1769 if (size > 1) { 1770 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1771 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1772 } else { 1773 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1774 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1775 } 1776 PetscFunctionReturn(0); 1777 } 1778 1779 #undef __FUNCT__ 1780 #define __FUNCT__ "MatDuplicate_MPIDense" 1781 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1782 { 1783 Mat mat; 1784 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1785 PetscErrorCode ierr; 1786 1787 PetscFunctionBegin; 1788 *newmat = 0; 1789 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1790 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1791 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1792 a = (Mat_MPIDense*)mat->data; 1793 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1794 mat->factor = A->factor; 1795 mat->assembled = PETSC_TRUE; 1796 mat->preallocated = PETSC_TRUE; 1797 1798 mat->rmap->rstart = A->rmap->rstart; 1799 mat->rmap->rend = A->rmap->rend; 1800 a->size = oldmat->size; 1801 a->rank = oldmat->rank; 1802 mat->insertmode = NOT_SET_VALUES; 1803 a->nvec = oldmat->nvec; 1804 a->donotstash = oldmat->donotstash; 1805 1806 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1807 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1808 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1809 1810 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1811 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1812 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1813 1814 *newmat = mat; 1815 PetscFunctionReturn(0); 1816 } 1817 1818 #include "petscsys.h" 1819 1820 #undef __FUNCT__ 1821 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1822 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat) 1823 { 1824 PetscErrorCode ierr; 1825 PetscMPIInt rank,size; 1826 PetscInt *rowners,i,m,nz,j; 1827 PetscScalar *array,*vals,*vals_ptr; 1828 MPI_Status status; 1829 1830 PetscFunctionBegin; 1831 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1832 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1833 1834 /* determine ownership of all rows */ 1835 m = M/size + ((M % size) > rank); 1836 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1837 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1838 rowners[0] = 0; 1839 for (i=2; i<=size; i++) { 1840 rowners[i] += rowners[i-1]; 1841 } 1842 1843 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1844 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1845 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1846 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1847 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1848 1849 if (!rank) { 1850 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1851 1852 /* read in my part of the matrix numerical values */ 1853 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1854 1855 /* insert into matrix-by row (this is why cannot directly read into array */ 1856 vals_ptr = vals; 1857 for (i=0; i<m; i++) { 1858 for (j=0; j<N; j++) { 1859 array[i + j*m] = *vals_ptr++; 1860 } 1861 } 1862 1863 /* read in other processors and ship out */ 1864 for (i=1; i<size; i++) { 1865 nz = (rowners[i+1] - rowners[i])*N; 1866 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1867 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1868 } 1869 } else { 1870 /* receive numeric values */ 1871 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1872 1873 /* receive message of values*/ 1874 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1875 1876 /* insert into matrix-by row (this is why cannot directly read into array */ 1877 vals_ptr = vals; 1878 for (i=0; i<m; i++) { 1879 for (j=0; j<N; j++) { 1880 array[i + j*m] = *vals_ptr++; 1881 } 1882 } 1883 } 1884 ierr = PetscFree(rowners);CHKERRQ(ierr); 1885 ierr = PetscFree(vals);CHKERRQ(ierr); 1886 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1887 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1888 PetscFunctionReturn(0); 1889 } 1890 1891 #undef __FUNCT__ 1892 #define __FUNCT__ "MatLoad_MPIDense" 1893 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1894 { 1895 Mat A; 1896 PetscScalar *vals,*svals; 1897 MPI_Comm comm = ((PetscObject)viewer)->comm; 1898 MPI_Status status; 1899 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1900 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1901 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1902 PetscInt i,nz,j,rstart,rend; 1903 int fd; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1908 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1909 if (!rank) { 1910 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1911 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1912 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1913 } 1914 1915 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1916 M = header[1]; N = header[2]; nz = header[3]; 1917 1918 /* 1919 Handle case where matrix is stored on disk as a dense matrix 1920 */ 1921 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1922 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1923 PetscFunctionReturn(0); 1924 } 1925 1926 /* determine ownership of all rows */ 1927 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1928 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1929 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1930 rowners[0] = 0; 1931 for (i=2; i<=size; i++) { 1932 rowners[i] += rowners[i-1]; 1933 } 1934 rstart = rowners[rank]; 1935 rend = rowners[rank+1]; 1936 1937 /* distribute row lengths to all processors */ 1938 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1939 offlens = ourlens + (rend-rstart); 1940 if (!rank) { 1941 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1942 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1943 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1944 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1945 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1946 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1947 } else { 1948 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1949 } 1950 1951 if (!rank) { 1952 /* calculate the number of nonzeros on each processor */ 1953 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1954 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1955 for (i=0; i<size; i++) { 1956 for (j=rowners[i]; j< rowners[i+1]; j++) { 1957 procsnz[i] += rowlengths[j]; 1958 } 1959 } 1960 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1961 1962 /* determine max buffer needed and allocate it */ 1963 maxnz = 0; 1964 for (i=0; i<size; i++) { 1965 maxnz = PetscMax(maxnz,procsnz[i]); 1966 } 1967 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1968 1969 /* read in my part of the matrix column indices */ 1970 nz = procsnz[0]; 1971 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1972 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1973 1974 /* read in every one elses and ship off */ 1975 for (i=1; i<size; i++) { 1976 nz = procsnz[i]; 1977 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1978 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1979 } 1980 ierr = PetscFree(cols);CHKERRQ(ierr); 1981 } else { 1982 /* determine buffer space needed for message */ 1983 nz = 0; 1984 for (i=0; i<m; i++) { 1985 nz += ourlens[i]; 1986 } 1987 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1988 1989 /* receive message of column indices*/ 1990 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1991 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1992 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1993 } 1994 1995 /* loop over local rows, determining number of off diagonal entries */ 1996 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1997 jj = 0; 1998 for (i=0; i<m; i++) { 1999 for (j=0; j<ourlens[i]; j++) { 2000 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2001 jj++; 2002 } 2003 } 2004 2005 /* create our matrix */ 2006 for (i=0; i<m; i++) { 2007 ourlens[i] -= offlens[i]; 2008 } 2009 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 2010 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2011 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 2012 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 2013 A = *newmat; 2014 for (i=0; i<m; i++) { 2015 ourlens[i] += offlens[i]; 2016 } 2017 2018 if (!rank) { 2019 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2020 2021 /* read in my part of the matrix numerical values */ 2022 nz = procsnz[0]; 2023 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2024 2025 /* insert into matrix */ 2026 jj = rstart; 2027 smycols = mycols; 2028 svals = vals; 2029 for (i=0; i<m; i++) { 2030 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2031 smycols += ourlens[i]; 2032 svals += ourlens[i]; 2033 jj++; 2034 } 2035 2036 /* read in other processors and ship out */ 2037 for (i=1; i<size; i++) { 2038 nz = procsnz[i]; 2039 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2040 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2041 } 2042 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2043 } else { 2044 /* receive numeric values */ 2045 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2046 2047 /* receive message of values*/ 2048 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2049 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2050 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2051 2052 /* insert into matrix */ 2053 jj = rstart; 2054 smycols = mycols; 2055 svals = vals; 2056 for (i=0; i<m; i++) { 2057 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2058 smycols += ourlens[i]; 2059 svals += ourlens[i]; 2060 jj++; 2061 } 2062 } 2063 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2064 ierr = PetscFree(vals);CHKERRQ(ierr); 2065 ierr = PetscFree(mycols);CHKERRQ(ierr); 2066 ierr = PetscFree(rowners);CHKERRQ(ierr); 2067 2068 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2069 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2070 PetscFunctionReturn(0); 2071 } 2072 2073 #undef __FUNCT__ 2074 #define __FUNCT__ "MatEqual_MPIDense" 2075 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2076 { 2077 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2078 Mat a,b; 2079 PetscTruth flg; 2080 PetscErrorCode ierr; 2081 2082 PetscFunctionBegin; 2083 a = matA->A; 2084 b = matB->A; 2085 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2086 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2087 PetscFunctionReturn(0); 2088 } 2089 2090 #if defined(PETSC_HAVE_PLAPACK) 2091 2092 #undef __FUNCT__ 2093 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2094 /*@C 2095 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2096 Level: developer 2097 2098 .keywords: Petsc, destroy, package, PLAPACK 2099 .seealso: PetscFinalize() 2100 @*/ 2101 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2102 { 2103 PetscErrorCode ierr; 2104 2105 PetscFunctionBegin; 2106 ierr = PLA_Finalize();CHKERRQ(ierr); 2107 PetscFunctionReturn(0); 2108 } 2109 2110 #undef __FUNCT__ 2111 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2112 /*@C 2113 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2114 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2115 2116 Input Parameter: 2117 . comm - the communicator the matrix lives on 2118 2119 Level: developer 2120 2121 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2122 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2123 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2124 cannot overlap. 2125 2126 .keywords: Petsc, initialize, package, PLAPACK 2127 .seealso: PetscInitializePackage(), PetscInitialize() 2128 @*/ 2129 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2130 { 2131 PetscMPIInt size; 2132 PetscErrorCode ierr; 2133 2134 PetscFunctionBegin; 2135 if (!PLA_Initialized(PETSC_NULL)) { 2136 2137 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2138 Plapack_nprows = 1; 2139 Plapack_npcols = size; 2140 2141 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2142 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2143 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2144 #if defined(PETSC_USE_DEBUG) 2145 Plapack_ierror = 3; 2146 #else 2147 Plapack_ierror = 0; 2148 #endif 2149 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2150 if (Plapack_ierror){ 2151 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2152 } else { 2153 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2154 } 2155 2156 Plapack_nb_alg = 0; 2157 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2158 if (Plapack_nb_alg) { 2159 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2160 } 2161 PetscOptionsEnd(); 2162 2163 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2164 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2165 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2166 } 2167 PetscFunctionReturn(0); 2168 } 2169 2170 #endif 2171