1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 #endif 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatDenseGetLocalMatrix" 16 /*@ 17 18 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 19 matrix that represents the operator. For sequential matrices it returns itself. 20 21 Input Parameter: 22 . A - the Seq or MPI dense matrix 23 24 Output Parameter: 25 . B - the inner matrix 26 27 Level: intermediate 28 29 @*/ 30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 31 { 32 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 33 PetscErrorCode ierr; 34 PetscTruth flg; 35 36 PetscFunctionBegin; 37 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 38 if (flg) { 39 *B = mat->A; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatGetRow_MPIDense" 48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 49 { 50 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 51 PetscErrorCode ierr; 52 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 53 54 PetscFunctionBegin; 55 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 56 lrow = row - rstart; 57 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRestoreRow_MPIDense" 63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 64 { 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 69 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 70 PetscFunctionReturn(0); 71 } 72 73 EXTERN_C_BEGIN 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 79 PetscErrorCode ierr; 80 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 81 PetscScalar *array; 82 MPI_Comm comm; 83 84 PetscFunctionBegin; 85 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 86 87 /* The reuse aspect is not implemented efficiently */ 88 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 89 90 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 91 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatCreate(comm,B);CHKERRQ(ierr); 93 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 94 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 95 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 96 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 97 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 98 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 99 100 *iscopy = PETSC_TRUE; 101 PetscFunctionReturn(0); 102 } 103 EXTERN_C_END 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIDense" 107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 110 PetscErrorCode ierr; 111 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 112 PetscTruth roworiented = A->roworiented; 113 114 PetscFunctionBegin; 115 for (i=0; i<m; i++) { 116 if (idxm[i] < 0) continue; 117 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 118 if (idxm[i] >= rstart && idxm[i] < rend) { 119 row = idxm[i] - rstart; 120 if (roworiented) { 121 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 122 } else { 123 for (j=0; j<n; j++) { 124 if (idxn[j] < 0) continue; 125 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 126 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 127 } 128 } 129 } else { 130 if (!A->donotstash) { 131 if (roworiented) { 132 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 133 } else { 134 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 135 } 136 } 137 } 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetValues_MPIDense" 144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 145 { 146 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 147 PetscErrorCode ierr; 148 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 149 150 PetscFunctionBegin; 151 for (i=0; i<m; i++) { 152 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 153 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 154 if (idxm[i] >= rstart && idxm[i] < rend) { 155 row = idxm[i] - rstart; 156 for (j=0; j<n; j++) { 157 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 158 if (idxn[j] >= mat->cmap->N) { 159 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 160 } 161 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 162 } 163 } else { 164 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 165 } 166 } 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatGetArray_MPIDense" 172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 173 { 174 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 175 PetscErrorCode ierr; 176 177 PetscFunctionBegin; 178 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 185 { 186 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 187 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 188 PetscErrorCode ierr; 189 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 190 PetscScalar *av,*bv,*v = lmat->v; 191 Mat newmat; 192 193 PetscFunctionBegin; 194 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 195 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 196 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 197 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 198 199 /* No parallel redistribution currently supported! Should really check each index set 200 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 201 original matrix! */ 202 203 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 204 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 205 206 /* Check submatrix call */ 207 if (scall == MAT_REUSE_MATRIX) { 208 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 209 /* Really need to test rows and column sizes! */ 210 newmat = *B; 211 } else { 212 /* Create and fill new matrix */ 213 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 214 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 215 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 216 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 217 } 218 219 /* Now extract the data pointers and do the copy, column at a time */ 220 newmatd = (Mat_MPIDense*)newmat->data; 221 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 222 223 for (i=0; i<ncols; i++) { 224 av = v + nlrows*icol[i]; 225 for (j=0; j<nrows; j++) { 226 *bv++ = av[irow[j] - rstart]; 227 } 228 } 229 230 /* Assemble the matrices so that the correct flags are set */ 231 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 232 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 234 /* Free work space */ 235 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 236 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 237 *B = newmat; 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatRestoreArray_MPIDense" 243 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 244 { 245 PetscFunctionBegin; 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 251 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 252 { 253 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 254 MPI_Comm comm = ((PetscObject)mat)->comm; 255 PetscErrorCode ierr; 256 PetscInt nstash,reallocs; 257 InsertMode addv; 258 259 PetscFunctionBegin; 260 /* make sure all processors are either in INSERTMODE or ADDMODE */ 261 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 262 if (addv == (ADD_VALUES|INSERT_VALUES)) { 263 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 264 } 265 mat->insertmode = addv; /* in case this processor had no cache */ 266 267 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 268 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 269 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 275 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 276 { 277 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,*row,*col,flg,j,rstart,ncols; 280 PetscMPIInt n; 281 PetscScalar *val; 282 InsertMode addv=mat->insertmode; 283 284 PetscFunctionBegin; 285 /* wait on receives */ 286 while (1) { 287 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 288 if (!flg) break; 289 290 for (i=0; i<n;) { 291 /* Now identify the consecutive vals belonging to the same row */ 292 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 293 if (j < n) ncols = j-i; 294 else ncols = n-i; 295 /* Now assemble all these values with a single function call */ 296 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 297 i = j; 298 } 299 } 300 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 301 302 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 303 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 304 305 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 306 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 307 } 308 PetscFunctionReturn(0); 309 } 310 311 #undef __FUNCT__ 312 #define __FUNCT__ "MatZeroEntries_MPIDense" 313 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 314 { 315 PetscErrorCode ierr; 316 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 317 318 PetscFunctionBegin; 319 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 /* the code does not do the diagonal entries correctly unless the 324 matrix is square and the column and row owerships are identical. 325 This is a BUG. The only way to fix it seems to be to access 326 mdn->A and mdn->B directly and not through the MatZeroRows() 327 routine. 328 */ 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatZeroRows_MPIDense" 331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 332 { 333 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 334 PetscErrorCode ierr; 335 PetscInt i,*owners = A->rmap->range; 336 PetscInt *nprocs,j,idx,nsends; 337 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 338 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 339 PetscInt *lens,*lrows,*values; 340 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 341 MPI_Comm comm = ((PetscObject)A)->comm; 342 MPI_Request *send_waits,*recv_waits; 343 MPI_Status recv_status,*send_status; 344 PetscTruth found; 345 346 PetscFunctionBegin; 347 /* first count number of contributors to each processor */ 348 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 349 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 351 for (i=0; i<N; i++) { 352 idx = rows[i]; 353 found = PETSC_FALSE; 354 for (j=0; j<size; j++) { 355 if (idx >= owners[j] && idx < owners[j+1]) { 356 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 357 } 358 } 359 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 360 } 361 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 362 363 /* inform other processors of number of messages and max length*/ 364 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 365 366 /* post receives: */ 367 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 368 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 369 for (i=0; i<nrecvs; i++) { 370 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 371 } 372 373 /* do sends: 374 1) starts[i] gives the starting index in svalues for stuff going to 375 the ith processor 376 */ 377 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 378 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 379 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 380 starts[0] = 0; 381 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 382 for (i=0; i<N; i++) { 383 svalues[starts[owner[i]]++] = rows[i]; 384 } 385 386 starts[0] = 0; 387 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 388 count = 0; 389 for (i=0; i<size; i++) { 390 if (nprocs[2*i+1]) { 391 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 392 } 393 } 394 ierr = PetscFree(starts);CHKERRQ(ierr); 395 396 base = owners[rank]; 397 398 /* wait on receives */ 399 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 400 source = lens + nrecvs; 401 count = nrecvs; slen = 0; 402 while (count) { 403 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 404 /* unpack receives into our local space */ 405 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 406 source[imdex] = recv_status.MPI_SOURCE; 407 lens[imdex] = n; 408 slen += n; 409 count--; 410 } 411 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 412 413 /* move the data into the send scatter */ 414 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 415 count = 0; 416 for (i=0; i<nrecvs; i++) { 417 values = rvalues + i*nmax; 418 for (j=0; j<lens[i]; j++) { 419 lrows[count++] = values[j] - base; 420 } 421 } 422 ierr = PetscFree(rvalues);CHKERRQ(ierr); 423 ierr = PetscFree(lens);CHKERRQ(ierr); 424 ierr = PetscFree(owner);CHKERRQ(ierr); 425 ierr = PetscFree(nprocs);CHKERRQ(ierr); 426 427 /* actually zap the local rows */ 428 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 429 ierr = PetscFree(lrows);CHKERRQ(ierr); 430 431 /* wait on sends */ 432 if (nsends) { 433 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 434 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 435 ierr = PetscFree(send_status);CHKERRQ(ierr); 436 } 437 ierr = PetscFree(send_waits);CHKERRQ(ierr); 438 ierr = PetscFree(svalues);CHKERRQ(ierr); 439 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatMult_MPIDense" 445 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 446 { 447 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 448 PetscErrorCode ierr; 449 450 PetscFunctionBegin; 451 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 452 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 453 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatMultAdd_MPIDense" 459 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 460 { 461 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 462 PetscErrorCode ierr; 463 464 PetscFunctionBegin; 465 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "MatMultTranspose_MPIDense" 473 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 474 { 475 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 476 PetscErrorCode ierr; 477 PetscScalar zero = 0.0; 478 479 PetscFunctionBegin; 480 ierr = VecSet(yy,zero);CHKERRQ(ierr); 481 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 482 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 483 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 489 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 490 { 491 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 496 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 497 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 498 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #undef __FUNCT__ 503 #define __FUNCT__ "MatGetDiagonal_MPIDense" 504 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 505 { 506 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 507 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 508 PetscErrorCode ierr; 509 PetscInt len,i,n,m = A->rmap->n,radd; 510 PetscScalar *x,zero = 0.0; 511 512 PetscFunctionBegin; 513 ierr = VecSet(v,zero);CHKERRQ(ierr); 514 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 515 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 516 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 517 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 518 radd = A->rmap->rstart*m; 519 for (i=0; i<len; i++) { 520 x[i] = aloc->v[radd + i*m + i]; 521 } 522 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatDestroy_MPIDense" 528 PetscErrorCode MatDestroy_MPIDense(Mat mat) 529 { 530 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 531 PetscErrorCode ierr; 532 #if defined(PETSC_HAVE_PLAPACK) 533 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 534 #endif 535 536 PetscFunctionBegin; 537 538 #if defined(PETSC_USE_LOG) 539 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 540 #endif 541 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 542 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 543 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 544 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 545 #if defined(PETSC_HAVE_PLAPACK) 546 if (lu) { 547 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 548 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 549 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 550 551 if (lu->is_pla) { 552 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 553 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 554 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 555 } 556 } 557 #endif 558 559 ierr = PetscFree(mdn);CHKERRQ(ierr); 560 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 561 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 563 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatView_MPIDense_Binary" 571 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 572 { 573 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 574 PetscErrorCode ierr; 575 PetscViewerFormat format; 576 int fd; 577 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 578 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 579 PetscScalar *work,*v,*vv; 580 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 581 MPI_Status status; 582 583 PetscFunctionBegin; 584 if (mdn->size == 1) { 585 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 586 } else { 587 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 588 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 589 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 590 591 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 592 if (format == PETSC_VIEWER_BINARY_NATIVE) { 593 594 if (!rank) { 595 /* store the matrix as a dense matrix */ 596 header[0] = MAT_FILE_COOKIE; 597 header[1] = mat->rmap->N; 598 header[2] = N; 599 header[3] = MATRIX_BINARY_FORMAT_DENSE; 600 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 601 602 /* get largest work array needed for transposing array */ 603 mmax = mat->rmap->n; 604 for (i=1; i<size; i++) { 605 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 606 } 607 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 608 609 /* write out local array, by rows */ 610 m = mat->rmap->n; 611 v = a->v; 612 for (j=0; j<N; j++) { 613 for (i=0; i<m; i++) { 614 work[j + i*N] = *v++; 615 } 616 } 617 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 618 /* get largest work array to receive messages from other processes, excludes process zero */ 619 mmax = 0; 620 for (i=1; i<size; i++) { 621 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 622 } 623 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 624 for(k = 1; k < size; k++) { 625 v = vv; 626 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 627 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 628 629 for(j = 0; j < N; j++) { 630 for(i = 0; i < m; i++) { 631 work[j + i*N] = *v++; 632 } 633 } 634 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 635 } 636 ierr = PetscFree(work);CHKERRQ(ierr); 637 ierr = PetscFree(vv);CHKERRQ(ierr); 638 } else { 639 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 640 } 641 } else { 642 SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_BINARY_NATIVE"); 643 } 644 } 645 PetscFunctionReturn(0); 646 } 647 648 #undef __FUNCT__ 649 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 650 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 651 { 652 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 653 PetscErrorCode ierr; 654 PetscMPIInt size = mdn->size,rank = mdn->rank; 655 const PetscViewerType vtype; 656 PetscTruth iascii,isdraw; 657 PetscViewer sviewer; 658 PetscViewerFormat format; 659 #if defined(PETSC_HAVE_PLAPACK) 660 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 661 #endif 662 663 PetscFunctionBegin; 664 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 665 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 666 if (iascii) { 667 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 668 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 669 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 670 MatInfo info; 671 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 672 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 673 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 674 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 675 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 676 #if defined(PETSC_HAVE_PLAPACK) 677 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 678 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",lu->Plapack_nprows, lu->Plapack_npcols);CHKERRQ(ierr); 679 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 680 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",lu->Plapack_ierror);CHKERRQ(ierr); 681 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",lu->Plapack_nb_alg);CHKERRQ(ierr); 682 #endif 683 PetscFunctionReturn(0); 684 } else if (format == PETSC_VIEWER_ASCII_INFO) { 685 PetscFunctionReturn(0); 686 } 687 } else if (isdraw) { 688 PetscDraw draw; 689 PetscTruth isnull; 690 691 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 692 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 693 if (isnull) PetscFunctionReturn(0); 694 } 695 696 if (size == 1) { 697 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 698 } else { 699 /* assemble the entire matrix onto first processor. */ 700 Mat A; 701 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 702 PetscInt *cols; 703 PetscScalar *vals; 704 705 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 706 if (!rank) { 707 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 708 } else { 709 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 710 } 711 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 712 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 713 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 714 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 715 716 /* Copy the matrix ... This isn't the most efficient means, 717 but it's quick for now */ 718 A->insertmode = INSERT_VALUES; 719 row = mat->rmap->rstart; m = mdn->A->rmap->n; 720 for (i=0; i<m; i++) { 721 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 722 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 723 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 724 row++; 725 } 726 727 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 728 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 729 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 730 if (!rank) { 731 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 732 } 733 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 734 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 735 ierr = MatDestroy(A);CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIDense" 742 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 743 { 744 PetscErrorCode ierr; 745 PetscTruth iascii,isbinary,isdraw,issocket; 746 747 PetscFunctionBegin; 748 749 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 750 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 751 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 752 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 753 754 if (iascii || issocket || isdraw) { 755 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 756 } else if (isbinary) { 757 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 758 } else { 759 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 760 } 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatGetInfo_MPIDense" 766 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 767 { 768 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 769 Mat mdn = mat->A; 770 PetscErrorCode ierr; 771 PetscReal isend[5],irecv[5]; 772 773 PetscFunctionBegin; 774 info->rows_global = (double)A->rmap->N; 775 info->columns_global = (double)A->cmap->N; 776 info->rows_local = (double)A->rmap->n; 777 info->columns_local = (double)A->cmap->N; 778 info->block_size = 1.0; 779 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 780 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 781 isend[3] = info->memory; isend[4] = info->mallocs; 782 if (flag == MAT_LOCAL) { 783 info->nz_used = isend[0]; 784 info->nz_allocated = isend[1]; 785 info->nz_unneeded = isend[2]; 786 info->memory = isend[3]; 787 info->mallocs = isend[4]; 788 } else if (flag == MAT_GLOBAL_MAX) { 789 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 790 info->nz_used = irecv[0]; 791 info->nz_allocated = irecv[1]; 792 info->nz_unneeded = irecv[2]; 793 info->memory = irecv[3]; 794 info->mallocs = irecv[4]; 795 } else if (flag == MAT_GLOBAL_SUM) { 796 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 797 info->nz_used = irecv[0]; 798 info->nz_allocated = irecv[1]; 799 info->nz_unneeded = irecv[2]; 800 info->memory = irecv[3]; 801 info->mallocs = irecv[4]; 802 } 803 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 804 info->fill_ratio_needed = 0; 805 info->factor_mallocs = 0; 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatSetOption_MPIDense" 811 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 812 { 813 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 814 PetscErrorCode ierr; 815 816 PetscFunctionBegin; 817 switch (op) { 818 case MAT_NEW_NONZERO_LOCATIONS: 819 case MAT_NEW_NONZERO_LOCATION_ERR: 820 case MAT_NEW_NONZERO_ALLOCATION_ERR: 821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 822 break; 823 case MAT_ROW_ORIENTED: 824 a->roworiented = flg; 825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 826 break; 827 case MAT_NEW_DIAGONALS: 828 case MAT_USE_HASH_TABLE: 829 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 830 break; 831 case MAT_IGNORE_OFF_PROC_ENTRIES: 832 a->donotstash = flg; 833 break; 834 case MAT_SYMMETRIC: 835 case MAT_STRUCTURALLY_SYMMETRIC: 836 case MAT_HERMITIAN: 837 case MAT_SYMMETRY_ETERNAL: 838 case MAT_IGNORE_LOWER_TRIANGULAR: 839 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 840 break; 841 default: 842 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 843 } 844 PetscFunctionReturn(0); 845 } 846 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatDiagonalScale_MPIDense" 850 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 851 { 852 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 853 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 854 PetscScalar *l,*r,x,*v; 855 PetscErrorCode ierr; 856 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 857 858 PetscFunctionBegin; 859 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 860 if (ll) { 861 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 862 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 863 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 864 for (i=0; i<m; i++) { 865 x = l[i]; 866 v = mat->v + i; 867 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 868 } 869 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 870 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 871 } 872 if (rr) { 873 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 874 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 875 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 876 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 878 for (i=0; i<n; i++) { 879 x = r[i]; 880 v = mat->v + i*m; 881 for (j=0; j<m; j++) { (*v++) *= x;} 882 } 883 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 884 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 885 } 886 PetscFunctionReturn(0); 887 } 888 889 #undef __FUNCT__ 890 #define __FUNCT__ "MatNorm_MPIDense" 891 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 892 { 893 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 894 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 895 PetscErrorCode ierr; 896 PetscInt i,j; 897 PetscReal sum = 0.0; 898 PetscScalar *v = mat->v; 899 900 PetscFunctionBegin; 901 if (mdn->size == 1) { 902 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 903 } else { 904 if (type == NORM_FROBENIUS) { 905 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 906 #if defined(PETSC_USE_COMPLEX) 907 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 908 #else 909 sum += (*v)*(*v); v++; 910 #endif 911 } 912 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 913 *nrm = sqrt(*nrm); 914 ierr = PetscLogFlops(2*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 915 } else if (type == NORM_1) { 916 PetscReal *tmp,*tmp2; 917 ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 918 tmp2 = tmp + A->cmap->N; 919 ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 920 *nrm = 0.0; 921 v = mat->v; 922 for (j=0; j<mdn->A->cmap->n; j++) { 923 for (i=0; i<mdn->A->rmap->n; i++) { 924 tmp[j] += PetscAbsScalar(*v); v++; 925 } 926 } 927 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 928 for (j=0; j<A->cmap->N; j++) { 929 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 930 } 931 ierr = PetscFree(tmp);CHKERRQ(ierr); 932 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 933 } else if (type == NORM_INFINITY) { /* max row norm */ 934 PetscReal ntemp; 935 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 936 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 937 } else { 938 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 939 } 940 } 941 PetscFunctionReturn(0); 942 } 943 944 #undef __FUNCT__ 945 #define __FUNCT__ "MatTranspose_MPIDense" 946 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 947 { 948 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 949 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 950 Mat B; 951 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 952 PetscErrorCode ierr; 953 PetscInt j,i; 954 PetscScalar *v; 955 956 PetscFunctionBegin; 957 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 958 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 959 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 960 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 961 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 962 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 963 } else { 964 B = *matout; 965 } 966 967 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 968 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 969 for (i=0; i<m; i++) rwork[i] = rstart + i; 970 for (j=0; j<n; j++) { 971 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 972 v += m; 973 } 974 ierr = PetscFree(rwork);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 978 *matout = B; 979 } else { 980 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 981 } 982 PetscFunctionReturn(0); 983 } 984 985 #include "petscblaslapack.h" 986 #undef __FUNCT__ 987 #define __FUNCT__ "MatScale_MPIDense" 988 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 989 { 990 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 991 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 992 PetscScalar oalpha = alpha; 993 PetscErrorCode ierr; 994 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N); 995 996 PetscFunctionBegin; 997 BLASscal_(&nz,&oalpha,a->v,&one); 998 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1003 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1006 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1007 { 1008 PetscErrorCode ierr; 1009 1010 PetscFunctionBegin; 1011 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #if defined(PETSC_HAVE_PLAPACK) 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1019 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1020 { 1021 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1022 PetscErrorCode ierr; 1023 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1024 PetscScalar *array; 1025 PetscReal one = 1.0; 1026 1027 PetscFunctionBegin; 1028 /* Copy A into F->lu->A */ 1029 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1030 ierr = PLA_API_begin();CHKERRQ(ierr); 1031 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1032 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1033 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1034 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1035 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1036 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1037 ierr = PLA_API_end();CHKERRQ(ierr); 1038 lu->rstart = rstart; 1039 PetscFunctionReturn(0); 1040 } 1041 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1044 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1045 { 1046 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1047 PetscErrorCode ierr; 1048 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1049 PetscScalar *array; 1050 PetscReal one = 1.0; 1051 1052 PetscFunctionBegin; 1053 /* Copy F into A->lu->A */ 1054 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1055 ierr = PLA_API_begin();CHKERRQ(ierr); 1056 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1057 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1058 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1059 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1060 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1061 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1062 ierr = PLA_API_end();CHKERRQ(ierr); 1063 lu->rstart = rstart; 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1069 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1070 { 1071 PetscErrorCode ierr; 1072 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1073 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1074 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1075 PLA_Obj alpha = NULL,beta = NULL; 1076 1077 PetscFunctionBegin; 1078 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1079 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1080 1081 /* 1082 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1083 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1084 */ 1085 1086 /* do the multiply in PLA */ 1087 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1088 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1089 CHKMEMQ; 1090 1091 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1092 CHKMEMQ; 1093 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1094 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1095 1096 /* 1097 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1098 */ 1099 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1105 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1106 { 1107 PetscErrorCode ierr; 1108 PetscInt m=A->rmap->n,n=B->cmap->n; 1109 Mat Cmat; 1110 1111 PetscFunctionBegin; 1112 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1113 SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported"); 1114 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1115 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1116 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1117 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1118 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1119 //ierr = MatMPIDenseCreatePlapack(A);CHKERRQ(ierr); 1120 //ierr = MatMPIDenseCreatePlapack(B);CHKERRQ(ierr); 1121 //ierr = MatMPIDenseCreatePlapack(Cmat);CHKERRQ(ierr); 1122 *C = Cmat; 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1128 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1129 { 1130 PetscErrorCode ierr; 1131 1132 PetscFunctionBegin; 1133 if (scall == MAT_INITIAL_MATRIX){ 1134 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1135 } 1136 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatSolve_MPIDense" 1142 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1143 { 1144 MPI_Comm comm = ((PetscObject)A)->comm; 1145 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1146 PetscErrorCode ierr; 1147 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1148 PetscScalar *array; 1149 PetscReal one = 1.0; 1150 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1151 PLA_Obj v_pla = NULL; 1152 PetscScalar *loc_buf; 1153 Vec loc_x; 1154 1155 PetscFunctionBegin; 1156 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1157 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1158 1159 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1160 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1161 PLA_Obj_set_to_zero(v_pla); 1162 1163 /* Copy b into rhs_pla */ 1164 PLA_API_begin(); 1165 PLA_Obj_API_open(v_pla); 1166 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1167 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1168 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1169 PLA_Obj_API_close(v_pla); 1170 PLA_API_end(); 1171 1172 if (A->factor == MAT_FACTOR_LU){ 1173 /* Apply the permutations to the right hand sides */ 1174 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1175 1176 /* Solve L y = b, overwriting b with y */ 1177 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1178 1179 /* Solve U x = y (=b), overwriting b with x */ 1180 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1181 } else { /*MAT_FACTOR_CHOLESKY */ 1182 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1183 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1184 PLA_NONUNIT_DIAG,lu->A,v_pla); 1185 } 1186 1187 /* Copy PLAPACK x into Petsc vector x */ 1188 PLA_Obj_local_length(v_pla, &loc_m); 1189 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1190 PLA_Obj_local_stride(v_pla, &loc_stride); 1191 /* 1192 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1193 */ 1194 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1195 if (!lu->pla_solved){ 1196 1197 PLA_Temp_comm_row_info(lu->templ,&lu->comm_2d,&r_rank,&r_nproc); 1198 PLA_Temp_comm_col_info(lu->templ,&lu->comm_2d,&c_rank,&c_nproc); 1199 /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */ 1200 1201 /* Create IS and cts for VecScatterring */ 1202 PLA_Obj_local_length(v_pla, &loc_m); 1203 PLA_Obj_local_stride(v_pla, &loc_stride); 1204 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1205 idx_petsc = idx_pla + loc_m; 1206 1207 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1208 for (i=0; i<loc_m; i+=lu->nb){ 1209 j = 0; 1210 while (j < lu->nb && i+j < loc_m){ 1211 idx_petsc[i+j] = rstart + j; j++; 1212 } 1213 rstart += size*lu->nb; 1214 } 1215 1216 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1217 1218 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1219 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1220 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1221 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1222 } 1223 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1225 1226 /* Free data */ 1227 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1228 PLA_Obj_free(&v_pla); 1229 1230 lu->pla_solved = PETSC_TRUE; 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1236 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1237 { 1238 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1239 PetscErrorCode ierr; 1240 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1241 PetscInt info_pla=0; 1242 PetscScalar *array,one = 1.0; 1243 1244 PetscFunctionBegin; 1245 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1246 PLA_Obj_free(&lu->A); 1247 PLA_Obj_free (&lu->pivots); 1248 } 1249 /* Create PLAPACK matrix object */ 1250 lu->A = NULL; lu->pivots = NULL; 1251 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1252 PLA_Obj_set_to_zero(lu->A); 1253 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1254 1255 /* Copy A into lu->A */ 1256 PLA_API_begin(); 1257 PLA_Obj_API_open(lu->A); 1258 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1259 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1260 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1261 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1262 PLA_Obj_API_close(lu->A); 1263 PLA_API_end(); 1264 1265 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1266 info_pla = PLA_LU(lu->A,lu->pivots); 1267 if (info_pla != 0) 1268 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1269 1270 lu->CleanUpPlapack = PETSC_TRUE; 1271 lu->rstart = rstart; 1272 lu->mstruct = SAME_NONZERO_PATTERN; 1273 1274 (*F)->assembled = PETSC_TRUE; /* required by -ksp_view */ 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1280 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1281 { 1282 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1283 PetscErrorCode ierr; 1284 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1285 PetscInt info_pla=0; 1286 PetscScalar *array,one = 1.0; 1287 1288 PetscFunctionBegin; 1289 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1290 PLA_Obj_free(&lu->A); 1291 } 1292 /* Create PLAPACK matrix object */ 1293 lu->A = NULL; 1294 lu->pivots = NULL; 1295 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1296 1297 /* Copy A into lu->A */ 1298 PLA_API_begin(); 1299 PLA_Obj_API_open(lu->A); 1300 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1301 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1302 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1303 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1304 PLA_Obj_API_close(lu->A); 1305 PLA_API_end(); 1306 1307 /* Factor P A -> Chol */ 1308 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1309 if (info_pla != 0) 1310 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1311 1312 lu->CleanUpPlapack = PETSC_TRUE; 1313 lu->rstart = rstart; 1314 lu->mstruct = SAME_NONZERO_PATTERN; 1315 1316 (*F)->assembled = PETSC_TRUE; /* required by -ksp_view */ 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatFactorSymbolic_Plapack_Private" 1322 PetscErrorCode MatFactorSymbolic_Plapack_Private(Mat A,MatFactorInfo *info,Mat *F) 1323 { 1324 Mat B = *F; 1325 Mat_Plapack *lu; 1326 PetscErrorCode ierr; 1327 PetscInt M=A->rmap->N; 1328 MPI_Comm comm=((PetscObject)A)->comm,comm_2d; 1329 PetscMPIInt size; 1330 PetscInt ierror; 1331 1332 PetscFunctionBegin; 1333 lu = (Mat_Plapack*)(B->spptr); 1334 1335 /* Set default Plapack parameters */ 1336 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1337 lu->nprows = 1; lu->npcols = size; 1338 ierror = 0; 1339 lu->nb = M/size; 1340 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1341 1342 /* Set runtime options */ 1343 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1344 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",lu->nprows,&lu->nprows,PETSC_NULL);CHKERRQ(ierr); 1345 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",lu->npcols,&lu->npcols,PETSC_NULL);CHKERRQ(ierr); 1346 1347 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1348 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr); 1349 if (ierror){ 1350 PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE ); 1351 } else { 1352 PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE ); 1353 } 1354 lu->ierror = ierror; 1355 1356 lu->nb_alg = 0; 1357 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr); 1358 if (lu->nb_alg){ 1359 pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg); 1360 } 1361 PetscOptionsEnd(); 1362 1363 1364 /* Create a 2D communicator */ 1365 PLA_Comm_1D_to_2D(comm,lu->nprows,lu->npcols,&comm_2d); 1366 lu->comm_2d = comm_2d; 1367 1368 /* Initialize PLAPACK */ 1369 PLA_Init(comm_2d); 1370 1371 /* Create object distribution template */ 1372 lu->templ = NULL; 1373 PLA_Temp_create(lu->nb, 0, &lu->templ); 1374 1375 /* Use suggested nb_alg if it is not provided by user */ 1376 if (lu->nb_alg == 0){ 1377 PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg); 1378 pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg); 1379 } 1380 1381 /* Set the datatype */ 1382 #if defined(PETSC_USE_COMPLEX) 1383 lu->datatype = MPI_DOUBLE_COMPLEX; 1384 #else 1385 lu->datatype = MPI_DOUBLE; 1386 #endif 1387 1388 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1389 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1390 lu->CleanUpPlapack = PETSC_TRUE; 1391 *F = B; 1392 PetscFunctionReturn(0); 1393 } 1394 1395 /* Note the Petsc perm permutation is ignored */ 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1398 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F) 1399 { 1400 PetscErrorCode ierr; 1401 PetscTruth issymmetric,set; 1402 1403 PetscFunctionBegin; 1404 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1405 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1406 ierr = MatFactorSymbolic_Plapack_Private(A,info,F);CHKERRQ(ierr); 1407 (*F)->factor = MAT_FACTOR_CHOLESKY; 1408 PetscFunctionReturn(0); 1409 } 1410 1411 /* Note the Petsc r and c permutations are ignored */ 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1414 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 1415 { 1416 PetscErrorCode ierr; 1417 PetscInt M = A->rmap->N; 1418 Mat_Plapack *lu; 1419 1420 PetscFunctionBegin; 1421 ierr = MatFactorSymbolic_Plapack_Private(A,info,F);CHKERRQ(ierr); 1422 lu = (Mat_Plapack*)(*F)->spptr; 1423 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1424 (*F)->factor = MAT_FACTOR_LU; 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1430 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1431 { 1432 PetscErrorCode ierr; 1433 Mat_Plapack *lu; 1434 PetscMPIInt size; 1435 PetscInt M; 1436 1437 PetscFunctionBegin; 1438 /* Create the factorization matrix */ 1439 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1440 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1441 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1442 ierr = PetscNewLog(A,Mat_Plapack,&lu);CHKERRQ(ierr); 1443 A->spptr = (void*)lu; 1444 1445 lu = (Mat_Plapack*)(A->spptr); 1446 1447 /* Set default Plapack parameters */ 1448 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1449 lu->nb = M/size; 1450 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1451 1452 /* Set runtime options */ 1453 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1454 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1455 PetscOptionsEnd(); 1456 1457 /* Create object distribution template */ 1458 lu->templ = NULL; 1459 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1460 1461 /* Set the datatype */ 1462 #if defined(PETSC_USE_COMPLEX) 1463 lu->datatype = MPI_DOUBLE_COMPLEX; 1464 #else 1465 lu->datatype = MPI_DOUBLE; 1466 #endif 1467 1468 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1469 1470 1471 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1472 1473 if (ftype == MAT_FACTOR_LU) { 1474 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1475 (*F)->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1476 (*F)->ops->solve = MatSolve_MPIDense; 1477 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1478 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1479 (*F)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1480 (*F)->ops->solve = MatSolve_MPIDense; 1481 } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1482 1483 PetscFunctionReturn(0); 1484 } 1485 #endif 1486 1487 /* -------------------------------------------------------------------*/ 1488 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1489 MatGetRow_MPIDense, 1490 MatRestoreRow_MPIDense, 1491 MatMult_MPIDense, 1492 /* 4*/ MatMultAdd_MPIDense, 1493 MatMultTranspose_MPIDense, 1494 MatMultTransposeAdd_MPIDense, 1495 #if defined(PETSC_HAVE_PLAPACK) 1496 MatSolve_MPIDense, 1497 #else 1498 0, 1499 #endif 1500 0, 1501 0, 1502 /*10*/ 0, 1503 0, 1504 0, 1505 0, 1506 MatTranspose_MPIDense, 1507 /*15*/ MatGetInfo_MPIDense, 1508 MatEqual_MPIDense, 1509 MatGetDiagonal_MPIDense, 1510 MatDiagonalScale_MPIDense, 1511 MatNorm_MPIDense, 1512 /*20*/ MatAssemblyBegin_MPIDense, 1513 MatAssemblyEnd_MPIDense, 1514 0, 1515 MatSetOption_MPIDense, 1516 MatZeroEntries_MPIDense, 1517 /*25*/ MatZeroRows_MPIDense, 1518 #if defined(PETSC_HAVE_PLAPACK) 1519 MatLUFactorSymbolic_MPIDense, 1520 MatLUFactorNumeric_MPIDense, 1521 MatCholeskyFactorSymbolic_MPIDense, 1522 MatCholeskyFactorNumeric_MPIDense, 1523 #else 1524 0, 1525 0, 1526 0, 1527 0, 1528 #endif 1529 /*30*/ MatSetUpPreallocation_MPIDense, 1530 0, 1531 0, 1532 MatGetArray_MPIDense, 1533 MatRestoreArray_MPIDense, 1534 /*35*/ MatDuplicate_MPIDense, 1535 0, 1536 0, 1537 0, 1538 0, 1539 /*40*/ 0, 1540 MatGetSubMatrices_MPIDense, 1541 0, 1542 MatGetValues_MPIDense, 1543 0, 1544 /*45*/ 0, 1545 MatScale_MPIDense, 1546 0, 1547 0, 1548 0, 1549 /*50*/ 0, 1550 0, 1551 0, 1552 0, 1553 0, 1554 /*55*/ 0, 1555 0, 1556 0, 1557 0, 1558 0, 1559 /*60*/ MatGetSubMatrix_MPIDense, 1560 MatDestroy_MPIDense, 1561 MatView_MPIDense, 1562 0, 1563 0, 1564 /*65*/ 0, 1565 0, 1566 0, 1567 0, 1568 0, 1569 /*70*/ 0, 1570 0, 1571 0, 1572 0, 1573 0, 1574 /*75*/ 0, 1575 0, 1576 0, 1577 0, 1578 0, 1579 /*80*/ 0, 1580 0, 1581 0, 1582 0, 1583 /*84*/ MatLoad_MPIDense, 1584 0, 1585 0, 1586 0, 1587 0, 1588 0, 1589 /*90*/ 1590 #if defined(PETSC_HAVE_PLAPACK) 1591 MatMatMult_MPIDense_MPIDense, 1592 MatMatMultSymbolic_MPIDense_MPIDense, 1593 MatMatMultNumeric_MPIDense_MPIDense, 1594 #else 1595 0, 1596 0, 1597 0, 1598 #endif 1599 0, 1600 /*95*/ 0, 1601 0, 1602 0, 1603 0}; 1604 1605 EXTERN_C_BEGIN 1606 #undef __FUNCT__ 1607 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1608 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1609 { 1610 Mat_MPIDense *a; 1611 PetscErrorCode ierr; 1612 1613 PetscFunctionBegin; 1614 mat->preallocated = PETSC_TRUE; 1615 /* Note: For now, when data is specified above, this assumes the user correctly 1616 allocates the local dense storage space. We should add error checking. */ 1617 1618 a = (Mat_MPIDense*)mat->data; 1619 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1620 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1621 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1622 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1623 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1624 PetscFunctionReturn(0); 1625 } 1626 EXTERN_C_END 1627 1628 /*MC 1629 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1630 1631 Options Database Keys: 1632 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1633 1634 Level: beginner 1635 1636 MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR) 1637 for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed 1638 (run config/configure.py with the option --download-plapack) 1639 1640 1641 Options Database Keys: 1642 . -mat_plapack_nprows <n> - number of rows in processor partition 1643 . -mat_plapack_npcols <n> - number of columns in processor partition 1644 . -mat_plapack_nb <n> - block size of template vector 1645 . -mat_plapack_nb_alg <n> - algorithmic block size 1646 - -mat_plapack_ckerror <n> - error checking flag 1647 1648 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE 1649 M*/ 1650 1651 EXTERN_C_BEGIN 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatCreate_MPIDense" 1654 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1655 { 1656 Mat_MPIDense *a; 1657 PetscErrorCode ierr; 1658 1659 PetscFunctionBegin; 1660 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1661 mat->data = (void*)a; 1662 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1663 mat->mapping = 0; 1664 1665 mat->insertmode = NOT_SET_VALUES; 1666 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1667 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1668 1669 mat->rmap->bs = mat->cmap->bs = 1; 1670 ierr = PetscMapSetUp(mat->rmap);CHKERRQ(ierr); 1671 ierr = PetscMapSetUp(mat->cmap);CHKERRQ(ierr); 1672 a->nvec = mat->cmap->n; 1673 1674 /* build cache for off array entries formed */ 1675 a->donotstash = PETSC_FALSE; 1676 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1677 1678 /* stuff used for matrix vector multiply */ 1679 a->lvec = 0; 1680 a->Mvctx = 0; 1681 a->roworiented = PETSC_TRUE; 1682 1683 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1684 "MatGetDiagonalBlock_MPIDense", 1685 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1686 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1687 "MatMPIDenseSetPreallocation_MPIDense", 1688 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1689 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1690 "MatMatMult_MPIAIJ_MPIDense", 1691 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1692 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1693 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1694 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1695 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1696 "MatMatMultNumeric_MPIAIJ_MPIDense", 1697 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1698 #if defined(PETSC_HAVE_PLAPACK) 1699 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_plapack_C", 1700 "MatGetFactor_mpidense_plapack", 1701 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1702 #endif 1703 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1704 1705 PetscFunctionReturn(0); 1706 } 1707 EXTERN_C_END 1708 1709 /*MC 1710 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1711 1712 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1713 and MATMPIDENSE otherwise. 1714 1715 Options Database Keys: 1716 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1717 1718 Level: beginner 1719 1720 1721 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1722 M*/ 1723 1724 EXTERN_C_BEGIN 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatCreate_Dense" 1727 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1728 { 1729 PetscErrorCode ierr; 1730 PetscMPIInt size; 1731 1732 PetscFunctionBegin; 1733 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1734 if (size == 1) { 1735 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1736 } else { 1737 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1738 } 1739 PetscFunctionReturn(0); 1740 } 1741 EXTERN_C_END 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1745 /*@C 1746 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1747 1748 Not collective 1749 1750 Input Parameters: 1751 . A - the matrix 1752 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1753 to control all matrix memory allocation. 1754 1755 Notes: 1756 The dense format is fully compatible with standard Fortran 77 1757 storage by columns. 1758 1759 The data input variable is intended primarily for Fortran programmers 1760 who wish to allocate their own matrix memory space. Most users should 1761 set data=PETSC_NULL. 1762 1763 Level: intermediate 1764 1765 .keywords: matrix,dense, parallel 1766 1767 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1768 @*/ 1769 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1770 { 1771 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1772 1773 PetscFunctionBegin; 1774 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1775 if (f) { 1776 ierr = (*f)(mat,data);CHKERRQ(ierr); 1777 } 1778 PetscFunctionReturn(0); 1779 } 1780 1781 #undef __FUNCT__ 1782 #define __FUNCT__ "MatCreateMPIDense" 1783 /*@C 1784 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1785 1786 Collective on MPI_Comm 1787 1788 Input Parameters: 1789 + comm - MPI communicator 1790 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1791 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1792 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1793 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1794 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1795 to control all matrix memory allocation. 1796 1797 Output Parameter: 1798 . A - the matrix 1799 1800 Notes: 1801 The dense format is fully compatible with standard Fortran 77 1802 storage by columns. 1803 1804 The data input variable is intended primarily for Fortran programmers 1805 who wish to allocate their own matrix memory space. Most users should 1806 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1807 1808 The user MUST specify either the local or global matrix dimensions 1809 (possibly both). 1810 1811 Level: intermediate 1812 1813 .keywords: matrix,dense, parallel 1814 1815 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1816 @*/ 1817 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1818 { 1819 PetscErrorCode ierr; 1820 PetscMPIInt size; 1821 1822 PetscFunctionBegin; 1823 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1824 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1825 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1826 if (size > 1) { 1827 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1828 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1829 } else { 1830 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1831 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1832 } 1833 PetscFunctionReturn(0); 1834 } 1835 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatDuplicate_MPIDense" 1838 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1839 { 1840 Mat mat; 1841 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1842 PetscErrorCode ierr; 1843 1844 PetscFunctionBegin; 1845 *newmat = 0; 1846 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1847 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1848 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1849 a = (Mat_MPIDense*)mat->data; 1850 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1851 mat->factor = A->factor; 1852 mat->assembled = PETSC_TRUE; 1853 mat->preallocated = PETSC_TRUE; 1854 1855 mat->rmap->rstart = A->rmap->rstart; 1856 mat->rmap->rend = A->rmap->rend; 1857 a->size = oldmat->size; 1858 a->rank = oldmat->rank; 1859 mat->insertmode = NOT_SET_VALUES; 1860 a->nvec = oldmat->nvec; 1861 a->donotstash = oldmat->donotstash; 1862 1863 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1864 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1865 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1866 1867 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1868 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1869 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1870 1871 #if defined(PETSC_HAVE_PLAPACK) 1872 ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr); 1873 #endif 1874 *newmat = mat; 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #include "petscsys.h" 1879 1880 #undef __FUNCT__ 1881 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1882 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat) 1883 { 1884 PetscErrorCode ierr; 1885 PetscMPIInt rank,size; 1886 PetscInt *rowners,i,m,nz,j; 1887 PetscScalar *array,*vals,*vals_ptr; 1888 MPI_Status status; 1889 1890 PetscFunctionBegin; 1891 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1892 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1893 1894 /* determine ownership of all rows */ 1895 m = M/size + ((M % size) > rank); 1896 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1897 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1898 rowners[0] = 0; 1899 for (i=2; i<=size; i++) { 1900 rowners[i] += rowners[i-1]; 1901 } 1902 1903 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1904 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1905 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1906 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1907 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1908 1909 if (!rank) { 1910 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1911 1912 /* read in my part of the matrix numerical values */ 1913 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1914 1915 /* insert into matrix-by row (this is why cannot directly read into array */ 1916 vals_ptr = vals; 1917 for (i=0; i<m; i++) { 1918 for (j=0; j<N; j++) { 1919 array[i + j*m] = *vals_ptr++; 1920 } 1921 } 1922 1923 /* read in other processors and ship out */ 1924 for (i=1; i<size; i++) { 1925 nz = (rowners[i+1] - rowners[i])*N; 1926 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1927 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1928 } 1929 } else { 1930 /* receive numeric values */ 1931 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1932 1933 /* receive message of values*/ 1934 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1935 1936 /* insert into matrix-by row (this is why cannot directly read into array */ 1937 vals_ptr = vals; 1938 for (i=0; i<m; i++) { 1939 for (j=0; j<N; j++) { 1940 array[i + j*m] = *vals_ptr++; 1941 } 1942 } 1943 } 1944 ierr = PetscFree(rowners);CHKERRQ(ierr); 1945 ierr = PetscFree(vals);CHKERRQ(ierr); 1946 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1947 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatLoad_MPIDense" 1953 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1954 { 1955 Mat A; 1956 PetscScalar *vals,*svals; 1957 MPI_Comm comm = ((PetscObject)viewer)->comm; 1958 MPI_Status status; 1959 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1960 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1961 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1962 PetscInt i,nz,j,rstart,rend; 1963 int fd; 1964 PetscErrorCode ierr; 1965 1966 PetscFunctionBegin; 1967 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1968 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1969 if (!rank) { 1970 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1971 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1972 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1973 } 1974 1975 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1976 M = header[1]; N = header[2]; nz = header[3]; 1977 1978 /* 1979 Handle case where matrix is stored on disk as a dense matrix 1980 */ 1981 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1982 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1983 PetscFunctionReturn(0); 1984 } 1985 1986 /* determine ownership of all rows */ 1987 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1988 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1989 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1990 rowners[0] = 0; 1991 for (i=2; i<=size; i++) { 1992 rowners[i] += rowners[i-1]; 1993 } 1994 rstart = rowners[rank]; 1995 rend = rowners[rank+1]; 1996 1997 /* distribute row lengths to all processors */ 1998 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1999 offlens = ourlens + (rend-rstart); 2000 if (!rank) { 2001 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2002 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2003 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2004 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2005 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2006 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2007 } else { 2008 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2009 } 2010 2011 if (!rank) { 2012 /* calculate the number of nonzeros on each processor */ 2013 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2014 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2015 for (i=0; i<size; i++) { 2016 for (j=rowners[i]; j< rowners[i+1]; j++) { 2017 procsnz[i] += rowlengths[j]; 2018 } 2019 } 2020 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2021 2022 /* determine max buffer needed and allocate it */ 2023 maxnz = 0; 2024 for (i=0; i<size; i++) { 2025 maxnz = PetscMax(maxnz,procsnz[i]); 2026 } 2027 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2028 2029 /* read in my part of the matrix column indices */ 2030 nz = procsnz[0]; 2031 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2032 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2033 2034 /* read in every one elses and ship off */ 2035 for (i=1; i<size; i++) { 2036 nz = procsnz[i]; 2037 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2038 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2039 } 2040 ierr = PetscFree(cols);CHKERRQ(ierr); 2041 } else { 2042 /* determine buffer space needed for message */ 2043 nz = 0; 2044 for (i=0; i<m; i++) { 2045 nz += ourlens[i]; 2046 } 2047 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2048 2049 /* receive message of column indices*/ 2050 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2051 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2052 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2053 } 2054 2055 /* loop over local rows, determining number of off diagonal entries */ 2056 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2057 jj = 0; 2058 for (i=0; i<m; i++) { 2059 for (j=0; j<ourlens[i]; j++) { 2060 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2061 jj++; 2062 } 2063 } 2064 2065 /* create our matrix */ 2066 for (i=0; i<m; i++) { 2067 ourlens[i] -= offlens[i]; 2068 } 2069 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 2070 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2071 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 2072 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 2073 A = *newmat; 2074 for (i=0; i<m; i++) { 2075 ourlens[i] += offlens[i]; 2076 } 2077 2078 if (!rank) { 2079 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2080 2081 /* read in my part of the matrix numerical values */ 2082 nz = procsnz[0]; 2083 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2084 2085 /* insert into matrix */ 2086 jj = rstart; 2087 smycols = mycols; 2088 svals = vals; 2089 for (i=0; i<m; i++) { 2090 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2091 smycols += ourlens[i]; 2092 svals += ourlens[i]; 2093 jj++; 2094 } 2095 2096 /* read in other processors and ship out */ 2097 for (i=1; i<size; i++) { 2098 nz = procsnz[i]; 2099 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2100 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2101 } 2102 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2103 } else { 2104 /* receive numeric values */ 2105 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2106 2107 /* receive message of values*/ 2108 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2109 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2110 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2111 2112 /* insert into matrix */ 2113 jj = rstart; 2114 smycols = mycols; 2115 svals = vals; 2116 for (i=0; i<m; i++) { 2117 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2118 smycols += ourlens[i]; 2119 svals += ourlens[i]; 2120 jj++; 2121 } 2122 } 2123 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2124 ierr = PetscFree(vals);CHKERRQ(ierr); 2125 ierr = PetscFree(mycols);CHKERRQ(ierr); 2126 ierr = PetscFree(rowners);CHKERRQ(ierr); 2127 2128 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2129 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2130 PetscFunctionReturn(0); 2131 } 2132 2133 #undef __FUNCT__ 2134 #define __FUNCT__ "MatEqual_MPIDense" 2135 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2136 { 2137 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2138 Mat a,b; 2139 PetscTruth flg; 2140 PetscErrorCode ierr; 2141 2142 PetscFunctionBegin; 2143 a = matA->A; 2144 b = matB->A; 2145 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2146 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 #if defined(PETSC_HAVE_PLAPACK) 2151 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2154 /*@C 2155 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2156 Level: developer 2157 2158 .keywords: Petsc, destroy, package, PLAPACK 2159 .seealso: PetscFinalize() 2160 @*/ 2161 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2162 { 2163 PetscErrorCode ierr; 2164 2165 PetscFunctionBegin; 2166 ierr = PLA_Finalize();CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 #undef __FUNCT__ 2171 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2172 /*@C 2173 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2174 called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize() 2175 when using static libraries. 2176 2177 Input Parameter: 2178 path - The dynamic library path, or PETSC_NULL 2179 2180 Level: developer 2181 2182 .keywords: Petsc, initialize, package, PLAPACK 2183 .seealso: PetscInitializePackage(), PetscInitialize() 2184 @*/ 2185 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[]) 2186 { 2187 MPI_Comm comm = PETSC_COMM_WORLD; 2188 PetscMPIInt size; 2189 PetscErrorCode ierr; 2190 2191 PetscFunctionBegin; 2192 if (!PLA_Initialized(PETSC_NULL)) { 2193 2194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2195 Plapack_nprows = 1; 2196 Plapack_npcols = size; 2197 2198 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2199 ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2200 ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2201 #if defined(PETSC_USE_DEBUG) 2202 Plapack_ierror = 3; 2203 #else 2204 Plapack_ierror = 0; 2205 #endif 2206 ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2207 if (Plapack_ierror){ 2208 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2209 } else { 2210 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2211 } 2212 2213 Plapack_nb_alg = 0; 2214 ierr = PetscOptionsInt("-plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2215 if (Plapack_nb_alg) { 2216 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2217 } 2218 PetscOptionsEnd(); 2219 2220 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2221 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2222 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2223 } 2224 PetscFunctionReturn(0); 2225 } 2226 2227 2228 #endif 2229