1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 PetscInt Plapack_nprows,Plapack_npcols; 10 MPI_Comm Plapack_comm_2d; 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatDenseGetLocalMatrix" 14 /*@ 15 16 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 17 matrix that represents the operator. For sequential matrices it returns itself. 18 19 Input Parameter: 20 . A - the Seq or MPI dense matrix 21 22 Output Parameter: 23 . B - the inner matrix 24 25 Level: intermediate 26 27 @*/ 28 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 29 { 30 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 31 PetscErrorCode ierr; 32 PetscTruth flg; 33 34 PetscFunctionBegin; 35 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 36 if (flg) { 37 *B = mat->A; 38 } else { 39 *B = A; 40 } 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatGetRow_MPIDense" 46 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 47 { 48 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 49 PetscErrorCode ierr; 50 PetscInt lrow,rstart = A->rmap.rstart,rend = A->rmap.rend; 51 52 PetscFunctionBegin; 53 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 54 lrow = row - rstart; 55 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatRestoreRow_MPIDense" 61 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 62 { 63 PetscErrorCode ierr; 64 65 PetscFunctionBegin; 66 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 67 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 68 PetscFunctionReturn(0); 69 } 70 71 EXTERN_C_BEGIN 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 74 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 75 { 76 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 77 PetscErrorCode ierr; 78 PetscInt m = A->rmap.n,rstart = A->rmap.rstart; 79 PetscScalar *array; 80 MPI_Comm comm; 81 82 PetscFunctionBegin; 83 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 84 85 /* The reuse aspect is not implemented efficiently */ 86 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 87 88 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 89 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 90 ierr = MatCreate(comm,B);CHKERRQ(ierr); 91 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 92 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 93 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 94 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 95 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 96 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 97 98 *iscopy = PETSC_TRUE; 99 PetscFunctionReturn(0); 100 } 101 EXTERN_C_END 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatSetValues_MPIDense" 105 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 106 { 107 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 108 PetscErrorCode ierr; 109 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 110 PetscTruth roworiented = A->roworiented; 111 112 PetscFunctionBegin; 113 for (i=0; i<m; i++) { 114 if (idxm[i] < 0) continue; 115 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 116 if (idxm[i] >= rstart && idxm[i] < rend) { 117 row = idxm[i] - rstart; 118 if (roworiented) { 119 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 120 } else { 121 for (j=0; j<n; j++) { 122 if (idxn[j] < 0) continue; 123 if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 124 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 125 } 126 } 127 } else { 128 if (!A->donotstash) { 129 if (roworiented) { 130 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 131 } else { 132 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 133 } 134 } 135 } 136 } 137 PetscFunctionReturn(0); 138 } 139 140 #undef __FUNCT__ 141 #define __FUNCT__ "MatGetValues_MPIDense" 142 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 143 { 144 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 145 PetscErrorCode ierr; 146 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 147 148 PetscFunctionBegin; 149 for (i=0; i<m; i++) { 150 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 151 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 152 if (idxm[i] >= rstart && idxm[i] < rend) { 153 row = idxm[i] - rstart; 154 for (j=0; j<n; j++) { 155 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 156 if (idxn[j] >= mat->cmap.N) { 157 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 158 } 159 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 160 } 161 } else { 162 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 163 } 164 } 165 PetscFunctionReturn(0); 166 } 167 168 #undef __FUNCT__ 169 #define __FUNCT__ "MatGetArray_MPIDense" 170 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 171 { 172 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 182 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 183 { 184 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 185 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 186 PetscErrorCode ierr; 187 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 188 PetscScalar *av,*bv,*v = lmat->v; 189 Mat newmat; 190 191 PetscFunctionBegin; 192 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 193 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 194 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 195 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 196 197 /* No parallel redistribution currently supported! Should really check each index set 198 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 199 original matrix! */ 200 201 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 202 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 203 204 /* Check submatrix call */ 205 if (scall == MAT_REUSE_MATRIX) { 206 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 207 /* Really need to test rows and column sizes! */ 208 newmat = *B; 209 } else { 210 /* Create and fill new matrix */ 211 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 212 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 213 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 214 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 215 } 216 217 /* Now extract the data pointers and do the copy, column at a time */ 218 newmatd = (Mat_MPIDense*)newmat->data; 219 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 220 221 for (i=0; i<ncols; i++) { 222 av = v + nlrows*icol[i]; 223 for (j=0; j<nrows; j++) { 224 *bv++ = av[irow[j] - rstart]; 225 } 226 } 227 228 /* Assemble the matrices so that the correct flags are set */ 229 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 230 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 231 232 /* Free work space */ 233 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 234 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 235 *B = newmat; 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatRestoreArray_MPIDense" 241 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 242 { 243 PetscFunctionBegin; 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 250 { 251 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 252 MPI_Comm comm = ((PetscObject)mat)->comm; 253 PetscErrorCode ierr; 254 PetscInt nstash,reallocs; 255 InsertMode addv; 256 257 PetscFunctionBegin; 258 /* make sure all processors are either in INSERTMODE or ADDMODE */ 259 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 260 if (addv == (ADD_VALUES|INSERT_VALUES)) { 261 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 262 } 263 mat->insertmode = addv; /* in case this processor had no cache */ 264 265 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 266 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 267 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 273 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 274 { 275 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 276 PetscErrorCode ierr; 277 PetscInt i,*row,*col,flg,j,rstart,ncols; 278 PetscMPIInt n; 279 PetscScalar *val; 280 InsertMode addv=mat->insertmode; 281 282 PetscFunctionBegin; 283 /* wait on receives */ 284 while (1) { 285 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 286 if (!flg) break; 287 288 for (i=0; i<n;) { 289 /* Now identify the consecutive vals belonging to the same row */ 290 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 291 if (j < n) ncols = j-i; 292 else ncols = n-i; 293 /* Now assemble all these values with a single function call */ 294 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 295 i = j; 296 } 297 } 298 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 299 300 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 301 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 302 303 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 304 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 305 } 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatZeroEntries_MPIDense" 311 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 312 { 313 PetscErrorCode ierr; 314 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 315 316 PetscFunctionBegin; 317 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320 321 /* the code does not do the diagonal entries correctly unless the 322 matrix is square and the column and row owerships are identical. 323 This is a BUG. The only way to fix it seems to be to access 324 mdn->A and mdn->B directly and not through the MatZeroRows() 325 routine. 326 */ 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatZeroRows_MPIDense" 329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 330 { 331 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 332 PetscErrorCode ierr; 333 PetscInt i,*owners = A->rmap.range; 334 PetscInt *nprocs,j,idx,nsends; 335 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 336 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 337 PetscInt *lens,*lrows,*values; 338 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 339 MPI_Comm comm = ((PetscObject)A)->comm; 340 MPI_Request *send_waits,*recv_waits; 341 MPI_Status recv_status,*send_status; 342 PetscTruth found; 343 344 PetscFunctionBegin; 345 /* first count number of contributors to each processor */ 346 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 347 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 348 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 349 for (i=0; i<N; i++) { 350 idx = rows[i]; 351 found = PETSC_FALSE; 352 for (j=0; j<size; j++) { 353 if (idx >= owners[j] && idx < owners[j+1]) { 354 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 355 } 356 } 357 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 358 } 359 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 360 361 /* inform other processors of number of messages and max length*/ 362 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 363 364 /* post receives: */ 365 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 366 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 367 for (i=0; i<nrecvs; i++) { 368 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 369 } 370 371 /* do sends: 372 1) starts[i] gives the starting index in svalues for stuff going to 373 the ith processor 374 */ 375 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 376 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 377 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 378 starts[0] = 0; 379 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 380 for (i=0; i<N; i++) { 381 svalues[starts[owner[i]]++] = rows[i]; 382 } 383 384 starts[0] = 0; 385 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 386 count = 0; 387 for (i=0; i<size; i++) { 388 if (nprocs[2*i+1]) { 389 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 390 } 391 } 392 ierr = PetscFree(starts);CHKERRQ(ierr); 393 394 base = owners[rank]; 395 396 /* wait on receives */ 397 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 398 source = lens + nrecvs; 399 count = nrecvs; slen = 0; 400 while (count) { 401 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 402 /* unpack receives into our local space */ 403 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 404 source[imdex] = recv_status.MPI_SOURCE; 405 lens[imdex] = n; 406 slen += n; 407 count--; 408 } 409 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 410 411 /* move the data into the send scatter */ 412 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 413 count = 0; 414 for (i=0; i<nrecvs; i++) { 415 values = rvalues + i*nmax; 416 for (j=0; j<lens[i]; j++) { 417 lrows[count++] = values[j] - base; 418 } 419 } 420 ierr = PetscFree(rvalues);CHKERRQ(ierr); 421 ierr = PetscFree(lens);CHKERRQ(ierr); 422 ierr = PetscFree(owner);CHKERRQ(ierr); 423 ierr = PetscFree(nprocs);CHKERRQ(ierr); 424 425 /* actually zap the local rows */ 426 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 427 ierr = PetscFree(lrows);CHKERRQ(ierr); 428 429 /* wait on sends */ 430 if (nsends) { 431 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 432 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 433 ierr = PetscFree(send_status);CHKERRQ(ierr); 434 } 435 ierr = PetscFree(send_waits);CHKERRQ(ierr); 436 ierr = PetscFree(svalues);CHKERRQ(ierr); 437 438 PetscFunctionReturn(0); 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatMult_MPIDense" 443 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 444 { 445 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 446 PetscErrorCode ierr; 447 448 PetscFunctionBegin; 449 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 450 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 451 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 452 PetscFunctionReturn(0); 453 } 454 455 #undef __FUNCT__ 456 #define __FUNCT__ "MatMultAdd_MPIDense" 457 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 458 { 459 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 460 PetscErrorCode ierr; 461 462 PetscFunctionBegin; 463 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 464 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 465 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatMultTranspose_MPIDense" 471 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 472 { 473 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 474 PetscErrorCode ierr; 475 PetscScalar zero = 0.0; 476 477 PetscFunctionBegin; 478 ierr = VecSet(yy,zero);CHKERRQ(ierr); 479 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 480 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 481 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 487 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 488 { 489 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 490 PetscErrorCode ierr; 491 492 PetscFunctionBegin; 493 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 494 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 495 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 496 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatGetDiagonal_MPIDense" 502 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 503 { 504 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 505 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 506 PetscErrorCode ierr; 507 PetscInt len,i,n,m = A->rmap.n,radd; 508 PetscScalar *x,zero = 0.0; 509 510 PetscFunctionBegin; 511 ierr = VecSet(v,zero);CHKERRQ(ierr); 512 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 513 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 514 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 515 len = PetscMin(a->A->rmap.n,a->A->cmap.n); 516 radd = A->rmap.rstart*m; 517 for (i=0; i<len; i++) { 518 x[i] = aloc->v[radd + i*m + i]; 519 } 520 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 521 PetscFunctionReturn(0); 522 } 523 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatDestroy_MPIDense" 526 PetscErrorCode MatDestroy_MPIDense(Mat mat) 527 { 528 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 529 PetscErrorCode ierr; 530 #if defined(PETSC_HAVE_PLAPACK) 531 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 532 #endif 533 534 PetscFunctionBegin; 535 536 #if defined(PETSC_USE_LOG) 537 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 538 #endif 539 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 540 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 541 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 542 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 543 #if defined(PETSC_HAVE_PLAPACK) 544 if (lu->CleanUpPlapack) { 545 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 546 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 547 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 548 549 550 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 551 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 552 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 553 } 554 #endif 555 556 ierr = PetscFree(mdn);CHKERRQ(ierr); 557 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 558 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 559 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 560 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 561 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatView_MPIDense_Binary" 568 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 569 { 570 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 571 PetscErrorCode ierr; 572 PetscViewerFormat format; 573 int fd; 574 PetscInt header[4],mmax,N = mat->cmap.N,i,j,m,k; 575 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 576 PetscScalar *work,*v,*vv; 577 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 578 MPI_Status status; 579 580 PetscFunctionBegin; 581 if (mdn->size == 1) { 582 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 583 } else { 584 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 585 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 586 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 587 588 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 589 if (format == PETSC_VIEWER_BINARY_NATIVE) { 590 591 if (!rank) { 592 /* store the matrix as a dense matrix */ 593 header[0] = MAT_FILE_COOKIE; 594 header[1] = mat->rmap.N; 595 header[2] = N; 596 header[3] = MATRIX_BINARY_FORMAT_DENSE; 597 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 598 599 /* get largest work array needed for transposing array */ 600 mmax = mat->rmap.n; 601 for (i=1; i<size; i++) { 602 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 603 } 604 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 605 606 /* write out local array, by rows */ 607 m = mat->rmap.n; 608 v = a->v; 609 for (j=0; j<N; j++) { 610 for (i=0; i<m; i++) { 611 work[j + i*N] = *v++; 612 } 613 } 614 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 615 /* get largest work array to receive messages from other processes, excludes process zero */ 616 mmax = 0; 617 for (i=1; i<size; i++) { 618 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 619 } 620 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 621 v = vv; 622 for (k=1; k<size; k++) { 623 m = mat->rmap.range[k+1] - mat->rmap.range[k]; 624 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 625 626 for (j=0; j<N; j++) { 627 for (i=0; i<m; i++) { 628 work[j + i*N] = *v++; 629 } 630 } 631 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 632 } 633 ierr = PetscFree(work);CHKERRQ(ierr); 634 ierr = PetscFree(vv);CHKERRQ(ierr); 635 } else { 636 ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 637 } 638 } 639 } 640 PetscFunctionReturn(0); 641 } 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 645 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 646 { 647 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 648 PetscErrorCode ierr; 649 PetscMPIInt size = mdn->size,rank = mdn->rank; 650 PetscViewerType vtype; 651 PetscTruth iascii,isdraw; 652 PetscViewer sviewer; 653 PetscViewerFormat format; 654 #if defined(PETSC_HAVE_PLAPACK) 655 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 656 #endif 657 658 PetscFunctionBegin; 659 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 660 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 661 if (iascii) { 662 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 663 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 664 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 665 MatInfo info; 666 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 667 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n, 668 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 669 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 670 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 671 #if defined(PETSC_HAVE_PLAPACK) 672 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 673 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 674 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 675 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",lu->ierror);CHKERRQ(ierr); 676 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",lu->nb_alg);CHKERRQ(ierr); 677 #endif 678 PetscFunctionReturn(0); 679 } else if (format == PETSC_VIEWER_ASCII_INFO) { 680 PetscFunctionReturn(0); 681 } 682 } else if (isdraw) { 683 PetscDraw draw; 684 PetscTruth isnull; 685 686 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 687 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 688 if (isnull) PetscFunctionReturn(0); 689 } 690 691 if (size == 1) { 692 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 693 } else { 694 /* assemble the entire matrix onto first processor. */ 695 Mat A; 696 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz; 697 PetscInt *cols; 698 PetscScalar *vals; 699 700 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 701 if (!rank) { 702 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 703 } else { 704 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 705 } 706 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 707 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 708 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 709 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 710 711 /* Copy the matrix ... This isn't the most efficient means, 712 but it's quick for now */ 713 A->insertmode = INSERT_VALUES; 714 row = mat->rmap.rstart; m = mdn->A->rmap.n; 715 for (i=0; i<m; i++) { 716 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 717 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 718 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 719 row++; 720 } 721 722 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 723 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 724 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 725 if (!rank) { 726 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 727 } 728 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 729 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 730 ierr = MatDestroy(A);CHKERRQ(ierr); 731 } 732 PetscFunctionReturn(0); 733 } 734 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatView_MPIDense" 737 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 738 { 739 PetscErrorCode ierr; 740 PetscTruth iascii,isbinary,isdraw,issocket; 741 742 PetscFunctionBegin; 743 744 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 745 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 746 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 747 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 748 749 if (iascii || issocket || isdraw) { 750 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 751 } else if (isbinary) { 752 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 753 } else { 754 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 755 } 756 PetscFunctionReturn(0); 757 } 758 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatGetInfo_MPIDense" 761 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 762 { 763 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 764 Mat mdn = mat->A; 765 PetscErrorCode ierr; 766 PetscReal isend[5],irecv[5]; 767 768 PetscFunctionBegin; 769 info->rows_global = (double)A->rmap.N; 770 info->columns_global = (double)A->cmap.N; 771 info->rows_local = (double)A->rmap.n; 772 info->columns_local = (double)A->cmap.N; 773 info->block_size = 1.0; 774 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 775 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 776 isend[3] = info->memory; isend[4] = info->mallocs; 777 if (flag == MAT_LOCAL) { 778 info->nz_used = isend[0]; 779 info->nz_allocated = isend[1]; 780 info->nz_unneeded = isend[2]; 781 info->memory = isend[3]; 782 info->mallocs = isend[4]; 783 } else if (flag == MAT_GLOBAL_MAX) { 784 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 785 info->nz_used = irecv[0]; 786 info->nz_allocated = irecv[1]; 787 info->nz_unneeded = irecv[2]; 788 info->memory = irecv[3]; 789 info->mallocs = irecv[4]; 790 } else if (flag == MAT_GLOBAL_SUM) { 791 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 792 info->nz_used = irecv[0]; 793 info->nz_allocated = irecv[1]; 794 info->nz_unneeded = irecv[2]; 795 info->memory = irecv[3]; 796 info->mallocs = irecv[4]; 797 } 798 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 799 info->fill_ratio_needed = 0; 800 info->factor_mallocs = 0; 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatSetOption_MPIDense" 806 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 807 { 808 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 809 PetscErrorCode ierr; 810 811 PetscFunctionBegin; 812 switch (op) { 813 case MAT_NEW_NONZERO_LOCATIONS: 814 case MAT_NEW_NONZERO_LOCATION_ERR: 815 case MAT_NEW_NONZERO_ALLOCATION_ERR: 816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 817 break; 818 case MAT_ROW_ORIENTED: 819 a->roworiented = flg; 820 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 821 break; 822 case MAT_NEW_DIAGONALS: 823 case MAT_USE_HASH_TABLE: 824 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 825 break; 826 case MAT_IGNORE_OFF_PROC_ENTRIES: 827 a->donotstash = flg; 828 break; 829 case MAT_SYMMETRIC: 830 case MAT_STRUCTURALLY_SYMMETRIC: 831 case MAT_HERMITIAN: 832 case MAT_SYMMETRY_ETERNAL: 833 case MAT_IGNORE_LOWER_TRIANGULAR: 834 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 835 break; 836 default: 837 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 838 } 839 PetscFunctionReturn(0); 840 } 841 842 843 #undef __FUNCT__ 844 #define __FUNCT__ "MatDiagonalScale_MPIDense" 845 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 846 { 847 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 848 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 849 PetscScalar *l,*r,x,*v; 850 PetscErrorCode ierr; 851 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n; 852 853 PetscFunctionBegin; 854 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 855 if (ll) { 856 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 857 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 858 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 859 for (i=0; i<m; i++) { 860 x = l[i]; 861 v = mat->v + i; 862 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 863 } 864 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 865 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 866 } 867 if (rr) { 868 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 869 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 870 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 873 for (i=0; i<n; i++) { 874 x = r[i]; 875 v = mat->v + i*m; 876 for (j=0; j<m; j++) { (*v++) *= x;} 877 } 878 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 879 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 880 } 881 PetscFunctionReturn(0); 882 } 883 884 #undef __FUNCT__ 885 #define __FUNCT__ "MatNorm_MPIDense" 886 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 887 { 888 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 889 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 890 PetscErrorCode ierr; 891 PetscInt i,j; 892 PetscReal sum = 0.0; 893 PetscScalar *v = mat->v; 894 895 PetscFunctionBegin; 896 if (mdn->size == 1) { 897 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 898 } else { 899 if (type == NORM_FROBENIUS) { 900 for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) { 901 #if defined(PETSC_USE_COMPLEX) 902 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 903 #else 904 sum += (*v)*(*v); v++; 905 #endif 906 } 907 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 908 *nrm = sqrt(*nrm); 909 ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr); 910 } else if (type == NORM_1) { 911 PetscReal *tmp,*tmp2; 912 ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 913 tmp2 = tmp + A->cmap.N; 914 ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 915 *nrm = 0.0; 916 v = mat->v; 917 for (j=0; j<mdn->A->cmap.n; j++) { 918 for (i=0; i<mdn->A->rmap.n; i++) { 919 tmp[j] += PetscAbsScalar(*v); v++; 920 } 921 } 922 ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 923 for (j=0; j<A->cmap.N; j++) { 924 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 925 } 926 ierr = PetscFree(tmp);CHKERRQ(ierr); 927 ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr); 928 } else if (type == NORM_INFINITY) { /* max row norm */ 929 PetscReal ntemp; 930 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 931 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 932 } else { 933 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 934 } 935 } 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatTranspose_MPIDense" 941 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 942 { 943 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 944 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 945 Mat B; 946 PetscInt M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart; 947 PetscErrorCode ierr; 948 PetscInt j,i; 949 PetscScalar *v; 950 951 PetscFunctionBegin; 952 if (A == *matout && M != N) { 953 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 954 } 955 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 956 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 957 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 958 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 959 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 960 } else { 961 B = *matout; 962 } 963 964 m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v; 965 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 966 for (i=0; i<m; i++) rwork[i] = rstart + i; 967 for (j=0; j<n; j++) { 968 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 969 v += m; 970 } 971 ierr = PetscFree(rwork);CHKERRQ(ierr); 972 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 973 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 974 if (*matout != A) { 975 *matout = B; 976 } else { 977 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 978 } 979 PetscFunctionReturn(0); 980 } 981 982 #include "petscblaslapack.h" 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatScale_MPIDense" 985 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 986 { 987 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 988 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 989 PetscScalar oalpha = alpha; 990 PetscErrorCode ierr; 991 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N); 992 993 PetscFunctionBegin; 994 BLASscal_(&nz,&oalpha,a->v,&one); 995 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 996 PetscFunctionReturn(0); 997 } 998 999 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1000 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1003 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1004 { 1005 PetscErrorCode ierr; 1006 1007 PetscFunctionBegin; 1008 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1014 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1015 { 1016 PetscErrorCode ierr; 1017 PetscInt m=A->rmap.n,n=B->cmap.n; 1018 Mat Cmat; 1019 1020 PetscFunctionBegin; 1021 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 1022 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1023 ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr); 1024 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1025 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 1026 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1027 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1028 *C = Cmat; 1029 PetscFunctionReturn(0); 1030 } 1031 1032 #if defined(PETSC_HAVE_PLAPACK) 1033 #undef __FUNCT__ 1034 #define __FUNCT__ "MatSolve_MPIDense" 1035 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1036 { 1037 MPI_Comm comm = ((PetscObject)A)->comm,dummy_comm; 1038 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1039 PetscErrorCode ierr; 1040 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1041 PetscScalar *array; 1042 PetscReal one = 1.0; 1043 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1044 PLA_Obj v_pla = NULL; 1045 PetscScalar *loc_buf; 1046 Vec loc_x; 1047 1048 PetscFunctionBegin; 1049 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1050 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1051 1052 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1053 ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr); 1054 ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr); 1055 1056 /* Copy b into rhs_pla */ 1057 ierr = PLA_API_begin();CHKERRQ(ierr); 1058 ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr); 1059 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1060 ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr); 1061 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1062 ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr); 1063 ierr = PLA_API_end();CHKERRQ(ierr); 1064 1065 if (A->factor == FACTOR_LU){ 1066 /* Apply the permutations to the right hand sides */ 1067 ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr); 1068 1069 /* Solve L y = b, overwriting b with y */ 1070 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1071 1072 /* Solve U x = y (=b), overwriting b with x */ 1073 ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1074 } else { /* FACTOR_CHOLESKY */ 1075 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1076 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1077 } 1078 1079 /* Copy PLAPACK x into Petsc vector x */ 1080 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1081 ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr); 1082 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1083 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1084 if (!lu->pla_solved){ 1085 1086 ierr = PLA_Temp_comm_row_info(lu->templ,&dummy_comm,&r_rank,&r_nproc);CHKERRQ(ierr); 1087 ierr = PLA_Temp_comm_col_info(lu->templ,&dummy_comm,&c_rank,&c_nproc);CHKERRQ(ierr); 1088 /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */ 1089 1090 /* Create IS and cts for VecScatterring */ 1091 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1092 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1093 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1094 idx_petsc = idx_pla + loc_m; 1095 1096 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1097 for (i=0; i<loc_m; i+=lu->nb){ 1098 j = 0; 1099 while (j < lu->nb && i+j < loc_m){ 1100 idx_petsc[i+j] = rstart + j; j++; 1101 } 1102 rstart += size*lu->nb; 1103 } 1104 1105 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1106 1107 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1108 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1109 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1110 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1111 } 1112 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 1115 /* Free data */ 1116 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1117 ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr); 1118 1119 lu->pla_solved = PETSC_TRUE; 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1125 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1126 { 1127 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1128 PetscErrorCode ierr; 1129 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,rend; 1130 PetscInt info_pla=0; 1131 PetscScalar *array,one = 1.0; 1132 1133 PetscFunctionBegin; 1134 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1135 1136 /* Copy A into lu->A */ 1137 ierr = PLA_API_begin();CHKERRQ(ierr); 1138 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1139 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1140 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1141 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1142 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1143 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1144 ierr = PLA_API_end();CHKERRQ(ierr); 1145 1146 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1147 info_pla = PLA_LU(lu->A,lu->pivots); 1148 if (info_pla != 0) 1149 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1150 1151 lu->CleanUpPlapack = PETSC_TRUE; 1152 lu->rstart = rstart; 1153 1154 (*F)->assembled = PETSC_TRUE; /* required by -ksp_view */ 1155 PetscFunctionReturn(0); 1156 } 1157 1158 #undef __FUNCT__ 1159 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1160 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1161 { 1162 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1163 PetscErrorCode ierr; 1164 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,rend; 1165 PetscInt info_pla=0; 1166 PetscScalar *array,one = 1.0; 1167 1168 PetscFunctionBegin; 1169 1170 /* Copy A into lu->A */ 1171 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1172 ierr = PLA_API_begin();CHKERRQ(ierr); 1173 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1174 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1175 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1176 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1177 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1178 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1179 ierr = PLA_API_end();CHKERRQ(ierr); 1180 1181 /* Factor P A -> Chol */ 1182 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1183 if (info_pla != 0) 1184 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1185 1186 lu->CleanUpPlapack = PETSC_TRUE; 1187 lu->rstart = rstart; 1188 1189 (*F)->assembled = PETSC_TRUE; /* required by -ksp_view */ 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private" 1195 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F) 1196 { 1197 Mat B; 1198 Mat_Plapack *lu; 1199 PetscErrorCode ierr; 1200 PetscInt M=A->rmap.N,N=A->cmap.N; 1201 MPI_Comm comm=((PetscObject)A)->comm; 1202 PetscMPIInt size; 1203 PetscInt ierror; 1204 1205 PetscFunctionBegin; 1206 /* Create the factorization matrix */ 1207 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1208 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 1209 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1210 1211 lu = (Mat_Plapack*)(B->spptr); 1212 1213 /* Set default Plapack parameters */ 1214 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1215 ierror = 0; 1216 lu->nb = M/size; 1217 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1218 1219 /* Set runtime options */ 1220 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1221 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 1222 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 1223 1224 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1225 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr); 1226 if (ierror){ 1227 ierr = PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 1228 } else { 1229 ierr = PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 1230 } 1231 lu->ierror = ierror; 1232 1233 lu->nb_alg = 0; 1234 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr); 1235 if (lu->nb_alg){ 1236 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg);CHKERRQ(ierr); 1237 } 1238 PetscOptionsEnd(); 1239 1240 1241 /* Create object distribution template */ 1242 lu->templ = NULL; 1243 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1244 1245 /* Use suggested nb_alg if it is not provided by user */ 1246 if (lu->nb_alg == 0){ 1247 ierr = PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg);CHKERRQ(ierr); 1248 ierr = pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg);CHKERRQ(ierr); 1249 } 1250 1251 /* Set the datatype */ 1252 #if defined(PETSC_USE_COMPLEX) 1253 lu->datatype = MPI_DOUBLE_COMPLEX; 1254 #else 1255 lu->datatype = MPI_DOUBLE; 1256 #endif 1257 1258 ierr = PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1259 1260 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1261 lu->CleanUpPlapack = PETSC_TRUE; 1262 *F = B; 1263 PetscFunctionReturn(0); 1264 } 1265 1266 /* Note the Petsc r and c permutations are ignored */ 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1269 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 1270 { 1271 PetscErrorCode ierr; 1272 PetscInt M = A->rmap.N; 1273 Mat_Plapack *lu; 1274 1275 PetscFunctionBegin; 1276 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1277 lu = (Mat_Plapack*)(*F)->spptr; 1278 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1279 (*F)->factor = FACTOR_LU; 1280 PetscFunctionReturn(0); 1281 } 1282 1283 /* Note the Petsc perm permutation is ignored */ 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1286 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F) 1287 { 1288 PetscErrorCode ierr; 1289 PetscTruth issymmetric,set; 1290 1291 PetscFunctionBegin; 1292 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1293 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1294 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1295 (*F)->factor = FACTOR_CHOLESKY; 1296 PetscFunctionReturn(0); 1297 } 1298 #endif 1299 1300 /* -------------------------------------------------------------------*/ 1301 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1302 MatGetRow_MPIDense, 1303 MatRestoreRow_MPIDense, 1304 MatMult_MPIDense, 1305 /* 4*/ MatMultAdd_MPIDense, 1306 MatMultTranspose_MPIDense, 1307 MatMultTransposeAdd_MPIDense, 1308 #if defined(PETSC_HAVE_PLAPACK) 1309 MatSolve_MPIDense, 1310 #else 1311 0, 1312 #endif 1313 0, 1314 0, 1315 /*10*/ 0, 1316 0, 1317 0, 1318 0, 1319 MatTranspose_MPIDense, 1320 /*15*/ MatGetInfo_MPIDense, 1321 MatEqual_MPIDense, 1322 MatGetDiagonal_MPIDense, 1323 MatDiagonalScale_MPIDense, 1324 MatNorm_MPIDense, 1325 /*20*/ MatAssemblyBegin_MPIDense, 1326 MatAssemblyEnd_MPIDense, 1327 0, 1328 MatSetOption_MPIDense, 1329 MatZeroEntries_MPIDense, 1330 /*25*/ MatZeroRows_MPIDense, 1331 #if defined(PETSC_HAVE_PLAPACK) 1332 MatLUFactorSymbolic_MPIDense, 1333 MatLUFactorNumeric_MPIDense, 1334 MatCholeskyFactorSymbolic_MPIDense, 1335 MatCholeskyFactorNumeric_MPIDense, 1336 #else 1337 0, 1338 0, 1339 0, 1340 0, 1341 #endif 1342 /*30*/ MatSetUpPreallocation_MPIDense, 1343 0, 1344 0, 1345 MatGetArray_MPIDense, 1346 MatRestoreArray_MPIDense, 1347 /*35*/ MatDuplicate_MPIDense, 1348 0, 1349 0, 1350 0, 1351 0, 1352 /*40*/ 0, 1353 MatGetSubMatrices_MPIDense, 1354 0, 1355 MatGetValues_MPIDense, 1356 0, 1357 /*45*/ 0, 1358 MatScale_MPIDense, 1359 0, 1360 0, 1361 0, 1362 /*50*/ 0, 1363 0, 1364 0, 1365 0, 1366 0, 1367 /*55*/ 0, 1368 0, 1369 0, 1370 0, 1371 0, 1372 /*60*/ MatGetSubMatrix_MPIDense, 1373 MatDestroy_MPIDense, 1374 MatView_MPIDense, 1375 0, 1376 0, 1377 /*65*/ 0, 1378 0, 1379 0, 1380 0, 1381 0, 1382 /*70*/ 0, 1383 0, 1384 0, 1385 0, 1386 0, 1387 /*75*/ 0, 1388 0, 1389 0, 1390 0, 1391 0, 1392 /*80*/ 0, 1393 0, 1394 0, 1395 0, 1396 /*84*/ MatLoad_MPIDense, 1397 0, 1398 0, 1399 0, 1400 0, 1401 0, 1402 /*90*/ 0, 1403 MatMatMultSymbolic_MPIDense_MPIDense, 1404 0, 1405 0, 1406 0, 1407 /*95*/ 0, 1408 0, 1409 0, 1410 0}; 1411 1412 EXTERN_C_BEGIN 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1415 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1416 { 1417 Mat_MPIDense *a; 1418 PetscErrorCode ierr; 1419 1420 PetscFunctionBegin; 1421 mat->preallocated = PETSC_TRUE; 1422 /* Note: For now, when data is specified above, this assumes the user correctly 1423 allocates the local dense storage space. We should add error checking. */ 1424 1425 a = (Mat_MPIDense*)mat->data; 1426 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1427 ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr); 1428 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1429 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1430 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } 1433 EXTERN_C_END 1434 1435 EXTERN_C_BEGIN 1436 #if defined(PETSC_HAVE_PLAPACK) 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK" 1439 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type) 1440 { 1441 PetscFunctionBegin; 1442 /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */ 1443 PetscFunctionReturn(0); 1444 } 1445 #endif 1446 EXTERN_C_END 1447 1448 /*MC 1449 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1450 1451 Options Database Keys: 1452 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1453 1454 Level: beginner 1455 1456 MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR) 1457 for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed 1458 (run config/configure.py with the option --download-plapack) 1459 1460 1461 Options Database Keys: 1462 . -mat_plapack_nprows <n> - number of rows in processor partition 1463 . -mat_plapack_npcols <n> - number of columns in processor partition 1464 . -mat_plapack_nb <n> - block size of template vector 1465 . -mat_plapack_nb_alg <n> - algorithmic block size 1466 - -mat_plapack_ckerror <n> - error checking flag 1467 1468 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE 1469 M*/ 1470 1471 EXTERN_C_BEGIN 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatCreate_MPIDense" 1474 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1475 { 1476 Mat_MPIDense *a; 1477 PetscErrorCode ierr; 1478 #if defined(PETSC_HAVE_PLAPACK) 1479 Mat_Plapack *lu; 1480 #endif 1481 1482 PetscFunctionBegin; 1483 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1484 mat->data = (void*)a; 1485 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1486 mat->factor = 0; 1487 mat->mapping = 0; 1488 1489 mat->insertmode = NOT_SET_VALUES; 1490 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1491 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1492 1493 mat->rmap.bs = mat->cmap.bs = 1; 1494 ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr); 1495 ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr); 1496 a->nvec = mat->cmap.n; 1497 1498 /* build cache for off array entries formed */ 1499 a->donotstash = PETSC_FALSE; 1500 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1501 1502 /* stuff used for matrix vector multiply */ 1503 a->lvec = 0; 1504 a->Mvctx = 0; 1505 a->roworiented = PETSC_TRUE; 1506 1507 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1508 "MatGetDiagonalBlock_MPIDense", 1509 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1510 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1511 "MatMPIDenseSetPreallocation_MPIDense", 1512 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1513 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1514 "MatMatMult_MPIAIJ_MPIDense", 1515 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1516 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1517 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1518 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1519 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1520 "MatMatMultNumeric_MPIAIJ_MPIDense", 1521 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1522 #if defined(PETSC_HAVE_PLAPACK) 1523 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C", 1524 "MatSetSolverType_MPIDense_PLAPACK", 1525 MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr); 1526 #endif 1527 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1528 1529 #if defined(PETSC_HAVE_PLAPACK) 1530 ierr = PetscNewLog(mat,Mat_Plapack,&lu);CHKERRQ(ierr); 1531 lu->CleanUpPlapack = PETSC_FALSE; 1532 mat->spptr = (void*)lu; 1533 #endif 1534 PetscFunctionReturn(0); 1535 } 1536 EXTERN_C_END 1537 1538 /*MC 1539 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1540 1541 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1542 and MATMPIDENSE otherwise. 1543 1544 Options Database Keys: 1545 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1546 1547 Level: beginner 1548 1549 1550 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1551 M*/ 1552 1553 EXTERN_C_BEGIN 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "MatCreate_Dense" 1556 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1557 { 1558 PetscErrorCode ierr; 1559 PetscMPIInt size; 1560 1561 PetscFunctionBegin; 1562 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1563 if (size == 1) { 1564 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1565 } else { 1566 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 EXTERN_C_END 1571 1572 #undef __FUNCT__ 1573 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1574 /*@C 1575 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1576 1577 Not collective 1578 1579 Input Parameters: 1580 . A - the matrix 1581 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1582 to control all matrix memory allocation. 1583 1584 Notes: 1585 The dense format is fully compatible with standard Fortran 77 1586 storage by columns. 1587 1588 The data input variable is intended primarily for Fortran programmers 1589 who wish to allocate their own matrix memory space. Most users should 1590 set data=PETSC_NULL. 1591 1592 Level: intermediate 1593 1594 .keywords: matrix,dense, parallel 1595 1596 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1597 @*/ 1598 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1599 { 1600 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1601 1602 PetscFunctionBegin; 1603 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1604 if (f) { 1605 ierr = (*f)(mat,data);CHKERRQ(ierr); 1606 } 1607 PetscFunctionReturn(0); 1608 } 1609 1610 #undef __FUNCT__ 1611 #define __FUNCT__ "MatCreateMPIDense" 1612 /*@C 1613 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1614 1615 Collective on MPI_Comm 1616 1617 Input Parameters: 1618 + comm - MPI communicator 1619 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1620 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1621 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1622 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1623 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1624 to control all matrix memory allocation. 1625 1626 Output Parameter: 1627 . A - the matrix 1628 1629 Notes: 1630 The dense format is fully compatible with standard Fortran 77 1631 storage by columns. 1632 1633 The data input variable is intended primarily for Fortran programmers 1634 who wish to allocate their own matrix memory space. Most users should 1635 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1636 1637 The user MUST specify either the local or global matrix dimensions 1638 (possibly both). 1639 1640 Level: intermediate 1641 1642 .keywords: matrix,dense, parallel 1643 1644 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1645 @*/ 1646 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1647 { 1648 PetscErrorCode ierr; 1649 PetscMPIInt size; 1650 1651 PetscFunctionBegin; 1652 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1653 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1654 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1655 if (size > 1) { 1656 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1657 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1658 } else { 1659 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1660 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1661 } 1662 PetscFunctionReturn(0); 1663 } 1664 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "MatDuplicate_MPIDense" 1667 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1668 { 1669 Mat mat; 1670 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1671 PetscErrorCode ierr; 1672 1673 PetscFunctionBegin; 1674 *newmat = 0; 1675 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1676 ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1677 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1678 a = (Mat_MPIDense*)mat->data; 1679 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1680 mat->factor = A->factor; 1681 mat->assembled = PETSC_TRUE; 1682 mat->preallocated = PETSC_TRUE; 1683 1684 mat->rmap.rstart = A->rmap.rstart; 1685 mat->rmap.rend = A->rmap.rend; 1686 a->size = oldmat->size; 1687 a->rank = oldmat->rank; 1688 mat->insertmode = NOT_SET_VALUES; 1689 a->nvec = oldmat->nvec; 1690 a->donotstash = oldmat->donotstash; 1691 1692 ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1693 ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1694 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1695 1696 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1697 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1698 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1699 1700 #if defined(PETSC_HAVE_PLAPACK) 1701 ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr); 1702 #endif 1703 *newmat = mat; 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #include "petscsys.h" 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1711 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1712 { 1713 PetscErrorCode ierr; 1714 PetscMPIInt rank,size; 1715 PetscInt *rowners,i,m,nz,j; 1716 PetscScalar *array,*vals,*vals_ptr; 1717 MPI_Status status; 1718 1719 PetscFunctionBegin; 1720 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1721 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1722 1723 /* determine ownership of all rows */ 1724 m = M/size + ((M % size) > rank); 1725 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1726 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1727 rowners[0] = 0; 1728 for (i=2; i<=size; i++) { 1729 rowners[i] += rowners[i-1]; 1730 } 1731 1732 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1733 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1734 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1735 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1736 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1737 1738 if (!rank) { 1739 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1740 1741 /* read in my part of the matrix numerical values */ 1742 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1743 1744 /* insert into matrix-by row (this is why cannot directly read into array */ 1745 vals_ptr = vals; 1746 for (i=0; i<m; i++) { 1747 for (j=0; j<N; j++) { 1748 array[i + j*m] = *vals_ptr++; 1749 } 1750 } 1751 1752 /* read in other processors and ship out */ 1753 for (i=1; i<size; i++) { 1754 nz = (rowners[i+1] - rowners[i])*N; 1755 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1756 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1757 } 1758 } else { 1759 /* receive numeric values */ 1760 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1761 1762 /* receive message of values*/ 1763 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1764 1765 /* insert into matrix-by row (this is why cannot directly read into array */ 1766 vals_ptr = vals; 1767 for (i=0; i<m; i++) { 1768 for (j=0; j<N; j++) { 1769 array[i + j*m] = *vals_ptr++; 1770 } 1771 } 1772 } 1773 ierr = PetscFree(rowners);CHKERRQ(ierr); 1774 ierr = PetscFree(vals);CHKERRQ(ierr); 1775 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1776 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatLoad_MPIDense" 1782 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1783 { 1784 Mat A; 1785 PetscScalar *vals,*svals; 1786 MPI_Comm comm = ((PetscObject)viewer)->comm; 1787 MPI_Status status; 1788 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1789 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1790 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1791 PetscInt i,nz,j,rstart,rend; 1792 int fd; 1793 PetscErrorCode ierr; 1794 1795 PetscFunctionBegin; 1796 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1797 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1798 if (!rank) { 1799 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1800 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1801 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1802 } 1803 1804 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1805 M = header[1]; N = header[2]; nz = header[3]; 1806 1807 /* 1808 Handle case where matrix is stored on disk as a dense matrix 1809 */ 1810 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1811 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 /* determine ownership of all rows */ 1816 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1817 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1818 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1819 rowners[0] = 0; 1820 for (i=2; i<=size; i++) { 1821 rowners[i] += rowners[i-1]; 1822 } 1823 rstart = rowners[rank]; 1824 rend = rowners[rank+1]; 1825 1826 /* distribute row lengths to all processors */ 1827 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1828 offlens = ourlens + (rend-rstart); 1829 if (!rank) { 1830 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1831 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1832 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1833 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1834 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1835 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1836 } else { 1837 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1838 } 1839 1840 if (!rank) { 1841 /* calculate the number of nonzeros on each processor */ 1842 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1843 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1844 for (i=0; i<size; i++) { 1845 for (j=rowners[i]; j< rowners[i+1]; j++) { 1846 procsnz[i] += rowlengths[j]; 1847 } 1848 } 1849 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1850 1851 /* determine max buffer needed and allocate it */ 1852 maxnz = 0; 1853 for (i=0; i<size; i++) { 1854 maxnz = PetscMax(maxnz,procsnz[i]); 1855 } 1856 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1857 1858 /* read in my part of the matrix column indices */ 1859 nz = procsnz[0]; 1860 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1861 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1862 1863 /* read in every one elses and ship off */ 1864 for (i=1; i<size; i++) { 1865 nz = procsnz[i]; 1866 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1867 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1868 } 1869 ierr = PetscFree(cols);CHKERRQ(ierr); 1870 } else { 1871 /* determine buffer space needed for message */ 1872 nz = 0; 1873 for (i=0; i<m; i++) { 1874 nz += ourlens[i]; 1875 } 1876 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1877 1878 /* receive message of column indices*/ 1879 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1880 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1881 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1882 } 1883 1884 /* loop over local rows, determining number of off diagonal entries */ 1885 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1886 jj = 0; 1887 for (i=0; i<m; i++) { 1888 for (j=0; j<ourlens[i]; j++) { 1889 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1890 jj++; 1891 } 1892 } 1893 1894 /* create our matrix */ 1895 for (i=0; i<m; i++) { 1896 ourlens[i] -= offlens[i]; 1897 } 1898 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1899 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1900 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1901 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1902 A = *newmat; 1903 for (i=0; i<m; i++) { 1904 ourlens[i] += offlens[i]; 1905 } 1906 1907 if (!rank) { 1908 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1909 1910 /* read in my part of the matrix numerical values */ 1911 nz = procsnz[0]; 1912 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1913 1914 /* insert into matrix */ 1915 jj = rstart; 1916 smycols = mycols; 1917 svals = vals; 1918 for (i=0; i<m; i++) { 1919 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1920 smycols += ourlens[i]; 1921 svals += ourlens[i]; 1922 jj++; 1923 } 1924 1925 /* read in other processors and ship out */ 1926 for (i=1; i<size; i++) { 1927 nz = procsnz[i]; 1928 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1929 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 1930 } 1931 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1932 } else { 1933 /* receive numeric values */ 1934 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1935 1936 /* receive message of values*/ 1937 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 1938 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1939 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1940 1941 /* insert into matrix */ 1942 jj = rstart; 1943 smycols = mycols; 1944 svals = vals; 1945 for (i=0; i<m; i++) { 1946 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1947 smycols += ourlens[i]; 1948 svals += ourlens[i]; 1949 jj++; 1950 } 1951 } 1952 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1953 ierr = PetscFree(vals);CHKERRQ(ierr); 1954 ierr = PetscFree(mycols);CHKERRQ(ierr); 1955 ierr = PetscFree(rowners);CHKERRQ(ierr); 1956 1957 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1958 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatEqual_MPIDense" 1964 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 1965 { 1966 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1967 Mat a,b; 1968 PetscTruth flg; 1969 PetscErrorCode ierr; 1970 1971 PetscFunctionBegin; 1972 a = matA->A; 1973 b = matB->A; 1974 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1975 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #if defined(PETSC_HAVE_PLAPACK) 1980 1981 #undef __FUNCT__ 1982 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 1983 /*@C 1984 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 1985 Level: developer 1986 1987 .keywords: Petsc, destroy, package, PLAPACK 1988 .seealso: PetscFinalize() 1989 @*/ 1990 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 1991 { 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = PLA_Finalize();CHKERRQ(ierr); 1996 PetscFunctionReturn(0); 1997 } 1998 1999 #undef __FUNCT__ 2000 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2001 /*@C 2002 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2003 called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize() 2004 when using static libraries. 2005 2006 Input Parameter: 2007 path - The dynamic library path, or PETSC_NULL 2008 2009 Level: developer 2010 2011 .keywords: Petsc, initialize, package, PLAPACK 2012 .seealso: PetscInitializePackage(), PetscInitialize() 2013 @*/ 2014 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[]) 2015 { 2016 MPI_Comm comm = PETSC_COMM_WORLD; 2017 PetscMPIInt size; 2018 PetscErrorCode ierr; 2019 2020 PetscFunctionBegin; 2021 if (!PLA_Initialized(PETSC_NULL)) { 2022 2023 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2024 Plapack_nprows = 1; 2025 Plapack_npcols = size; 2026 2027 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2028 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2029 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2030 } 2031 PetscFunctionReturn(0); 2032 } 2033 2034 2035 #endif 2036