1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatDenseGetLocalMatrix" 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscTruth flg; 30 31 PetscFunctionBegin; 32 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) { 34 *B = mat->A; 35 } else { 36 *B = A; 37 } 38 PetscFunctionReturn(0); 39 } 40 41 #undef __FUNCT__ 42 #define __FUNCT__ "MatGetRow_MPIDense" 43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscInt lrow,rstart = A->rmap.rstart,rend = A->rmap.rend; 48 49 PetscFunctionBegin; 50 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 51 lrow = row - rstart; 52 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatRestoreRow_MPIDense" 58 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 59 { 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 64 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 71 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 72 { 73 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 74 PetscErrorCode ierr; 75 PetscInt m = A->rmap.n,rstart = A->rmap.rstart; 76 PetscScalar *array; 77 MPI_Comm comm; 78 79 PetscFunctionBegin; 80 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 81 82 /* The reuse aspect is not implemented efficiently */ 83 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 84 85 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 86 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 87 ierr = MatCreate(comm,B);CHKERRQ(ierr); 88 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 89 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 90 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 91 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 93 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 94 95 *iscopy = PETSC_TRUE; 96 PetscFunctionReturn(0); 97 } 98 EXTERN_C_END 99 100 #undef __FUNCT__ 101 #define __FUNCT__ "MatSetValues_MPIDense" 102 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 103 { 104 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 105 PetscErrorCode ierr; 106 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 107 PetscTruth roworiented = A->roworiented; 108 109 PetscFunctionBegin; 110 for (i=0; i<m; i++) { 111 if (idxm[i] < 0) continue; 112 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 113 if (idxm[i] >= rstart && idxm[i] < rend) { 114 row = idxm[i] - rstart; 115 if (roworiented) { 116 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 117 } else { 118 for (j=0; j<n; j++) { 119 if (idxn[j] < 0) continue; 120 if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 121 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 122 } 123 } 124 } else { 125 if (!A->donotstash) { 126 if (roworiented) { 127 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 128 } else { 129 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 130 } 131 } 132 } 133 } 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatGetValues_MPIDense" 139 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 140 { 141 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 142 PetscErrorCode ierr; 143 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 144 145 PetscFunctionBegin; 146 for (i=0; i<m; i++) { 147 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 148 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 149 if (idxm[i] >= rstart && idxm[i] < rend) { 150 row = idxm[i] - rstart; 151 for (j=0; j<n; j++) { 152 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 153 if (idxn[j] >= mat->cmap.N) { 154 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 155 } 156 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 157 } 158 } else { 159 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 160 } 161 } 162 PetscFunctionReturn(0); 163 } 164 165 #undef __FUNCT__ 166 #define __FUNCT__ "MatGetArray_MPIDense" 167 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 #undef __FUNCT__ 178 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 179 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 180 { 181 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 182 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 183 PetscErrorCode ierr; 184 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 185 PetscScalar *av,*bv,*v = lmat->v; 186 Mat newmat; 187 188 PetscFunctionBegin; 189 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 190 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 191 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 192 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 193 194 /* No parallel redistribution currently supported! Should really check each index set 195 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 196 original matrix! */ 197 198 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 199 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 200 201 /* Check submatrix call */ 202 if (scall == MAT_REUSE_MATRIX) { 203 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 204 /* Really need to test rows and column sizes! */ 205 newmat = *B; 206 } else { 207 /* Create and fill new matrix */ 208 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 209 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 210 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 211 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 212 } 213 214 /* Now extract the data pointers and do the copy, column at a time */ 215 newmatd = (Mat_MPIDense*)newmat->data; 216 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 217 218 for (i=0; i<ncols; i++) { 219 av = v + nlrows*icol[i]; 220 for (j=0; j<nrows; j++) { 221 *bv++ = av[irow[j] - rstart]; 222 } 223 } 224 225 /* Assemble the matrices so that the correct flags are set */ 226 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 227 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 228 229 /* Free work space */ 230 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 231 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 232 *B = newmat; 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatRestoreArray_MPIDense" 238 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 239 { 240 PetscFunctionBegin; 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 246 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 247 { 248 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 249 MPI_Comm comm = ((PetscObject)mat)->comm; 250 PetscErrorCode ierr; 251 PetscInt nstash,reallocs; 252 InsertMode addv; 253 254 PetscFunctionBegin; 255 /* make sure all processors are either in INSERTMODE or ADDMODE */ 256 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 257 if (addv == (ADD_VALUES|INSERT_VALUES)) { 258 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 259 } 260 mat->insertmode = addv; /* in case this processor had no cache */ 261 262 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 263 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 264 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 270 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 271 { 272 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 273 PetscErrorCode ierr; 274 PetscInt i,*row,*col,flg,j,rstart,ncols; 275 PetscMPIInt n; 276 PetscScalar *val; 277 InsertMode addv=mat->insertmode; 278 279 PetscFunctionBegin; 280 /* wait on receives */ 281 while (1) { 282 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 283 if (!flg) break; 284 285 for (i=0; i<n;) { 286 /* Now identify the consecutive vals belonging to the same row */ 287 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 288 if (j < n) ncols = j-i; 289 else ncols = n-i; 290 /* Now assemble all these values with a single function call */ 291 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 292 i = j; 293 } 294 } 295 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 296 297 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 298 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 299 300 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 301 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 302 } 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatZeroEntries_MPIDense" 308 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 309 { 310 PetscErrorCode ierr; 311 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 312 313 PetscFunctionBegin; 314 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 /* the code does not do the diagonal entries correctly unless the 319 matrix is square and the column and row owerships are identical. 320 This is a BUG. The only way to fix it seems to be to access 321 mdn->A and mdn->B directly and not through the MatZeroRows() 322 routine. 323 */ 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatZeroRows_MPIDense" 326 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 327 { 328 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 329 PetscErrorCode ierr; 330 PetscInt i,*owners = A->rmap.range; 331 PetscInt *nprocs,j,idx,nsends; 332 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 333 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 334 PetscInt *lens,*lrows,*values; 335 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 336 MPI_Comm comm = ((PetscObject)A)->comm; 337 MPI_Request *send_waits,*recv_waits; 338 MPI_Status recv_status,*send_status; 339 PetscTruth found; 340 341 PetscFunctionBegin; 342 /* first count number of contributors to each processor */ 343 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 344 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 345 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 346 for (i=0; i<N; i++) { 347 idx = rows[i]; 348 found = PETSC_FALSE; 349 for (j=0; j<size; j++) { 350 if (idx >= owners[j] && idx < owners[j+1]) { 351 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 352 } 353 } 354 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 355 } 356 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 357 358 /* inform other processors of number of messages and max length*/ 359 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 360 361 /* post receives: */ 362 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 363 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 364 for (i=0; i<nrecvs; i++) { 365 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 366 } 367 368 /* do sends: 369 1) starts[i] gives the starting index in svalues for stuff going to 370 the ith processor 371 */ 372 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 373 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 374 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 375 starts[0] = 0; 376 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 377 for (i=0; i<N; i++) { 378 svalues[starts[owner[i]]++] = rows[i]; 379 } 380 381 starts[0] = 0; 382 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 383 count = 0; 384 for (i=0; i<size; i++) { 385 if (nprocs[2*i+1]) { 386 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 387 } 388 } 389 ierr = PetscFree(starts);CHKERRQ(ierr); 390 391 base = owners[rank]; 392 393 /* wait on receives */ 394 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 395 source = lens + nrecvs; 396 count = nrecvs; slen = 0; 397 while (count) { 398 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 399 /* unpack receives into our local space */ 400 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 401 source[imdex] = recv_status.MPI_SOURCE; 402 lens[imdex] = n; 403 slen += n; 404 count--; 405 } 406 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 407 408 /* move the data into the send scatter */ 409 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 410 count = 0; 411 for (i=0; i<nrecvs; i++) { 412 values = rvalues + i*nmax; 413 for (j=0; j<lens[i]; j++) { 414 lrows[count++] = values[j] - base; 415 } 416 } 417 ierr = PetscFree(rvalues);CHKERRQ(ierr); 418 ierr = PetscFree(lens);CHKERRQ(ierr); 419 ierr = PetscFree(owner);CHKERRQ(ierr); 420 ierr = PetscFree(nprocs);CHKERRQ(ierr); 421 422 /* actually zap the local rows */ 423 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 424 ierr = PetscFree(lrows);CHKERRQ(ierr); 425 426 /* wait on sends */ 427 if (nsends) { 428 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 429 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 430 ierr = PetscFree(send_status);CHKERRQ(ierr); 431 } 432 ierr = PetscFree(send_waits);CHKERRQ(ierr); 433 ierr = PetscFree(svalues);CHKERRQ(ierr); 434 435 PetscFunctionReturn(0); 436 } 437 438 #undef __FUNCT__ 439 #define __FUNCT__ "MatMult_MPIDense" 440 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 441 { 442 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 443 PetscErrorCode ierr; 444 445 PetscFunctionBegin; 446 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 447 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 448 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatMultAdd_MPIDense" 454 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 455 { 456 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 457 PetscErrorCode ierr; 458 459 PetscFunctionBegin; 460 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 461 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 462 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatMultTranspose_MPIDense" 468 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 469 { 470 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 471 PetscErrorCode ierr; 472 PetscScalar zero = 0.0; 473 474 PetscFunctionBegin; 475 ierr = VecSet(yy,zero);CHKERRQ(ierr); 476 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 477 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 478 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 484 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 487 PetscErrorCode ierr; 488 489 PetscFunctionBegin; 490 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 491 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 492 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 493 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatGetDiagonal_MPIDense" 499 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 503 PetscErrorCode ierr; 504 PetscInt len,i,n,m = A->rmap.n,radd; 505 PetscScalar *x,zero = 0.0; 506 507 PetscFunctionBegin; 508 ierr = VecSet(v,zero);CHKERRQ(ierr); 509 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 510 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 511 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 512 len = PetscMin(a->A->rmap.n,a->A->cmap.n); 513 radd = A->rmap.rstart*m; 514 for (i=0; i<len; i++) { 515 x[i] = aloc->v[radd + i*m + i]; 516 } 517 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 #undef __FUNCT__ 522 #define __FUNCT__ "MatDestroy_MPIDense" 523 PetscErrorCode MatDestroy_MPIDense(Mat mat) 524 { 525 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 526 PetscErrorCode ierr; 527 #if defined(PETSC_HAVE_PLAPACK) 528 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 529 #endif 530 531 PetscFunctionBegin; 532 533 #if defined(PETSC_USE_LOG) 534 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 535 #endif 536 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 537 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 538 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 539 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 540 #if defined(PETSC_HAVE_PLAPACK) 541 if (lu->CleanUpPlapack) { 542 PLA_Obj_free(&lu->A); 543 PLA_Obj_free (&lu->pivots); 544 PLA_Temp_free(&lu->templ); 545 PLA_Finalize(); 546 547 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 548 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 549 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 550 } 551 #endif 552 553 ierr = PetscFree(mdn);CHKERRQ(ierr); 554 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 555 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 556 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 557 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 558 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 559 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatView_MPIDense_Binary" 565 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 566 { 567 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 568 PetscErrorCode ierr; 569 PetscViewerFormat format; 570 int fd; 571 PetscInt header[4],mmax,N = mat->cmap.N,i,j,m,k; 572 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 573 PetscScalar *work,*v,*vv; 574 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 575 MPI_Status status; 576 577 PetscFunctionBegin; 578 if (mdn->size == 1) { 579 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 580 } else { 581 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 582 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 583 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 584 585 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 586 if (format == PETSC_VIEWER_BINARY_NATIVE) { 587 588 if (!rank) { 589 /* store the matrix as a dense matrix */ 590 header[0] = MAT_FILE_COOKIE; 591 header[1] = mat->rmap.N; 592 header[2] = N; 593 header[3] = MATRIX_BINARY_FORMAT_DENSE; 594 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 595 596 /* get largest work array needed for transposing array */ 597 mmax = mat->rmap.n; 598 for (i=1; i<size; i++) { 599 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 600 } 601 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 602 603 /* write out local array, by rows */ 604 m = mat->rmap.n; 605 v = a->v; 606 for (j=0; j<N; j++) { 607 for (i=0; i<m; i++) { 608 work[j + i*N] = *v++; 609 } 610 } 611 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 612 /* get largest work array to receive messages from other processes, excludes process zero */ 613 mmax = 0; 614 for (i=1; i<size; i++) { 615 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 616 } 617 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 618 v = vv; 619 for (k=1; k<size; k++) { 620 m = mat->rmap.range[k+1] - mat->rmap.range[k]; 621 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 622 623 for (j=0; j<N; j++) { 624 for (i=0; i<m; i++) { 625 work[j + i*N] = *v++; 626 } 627 } 628 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 629 } 630 ierr = PetscFree(work);CHKERRQ(ierr); 631 ierr = PetscFree(vv);CHKERRQ(ierr); 632 } else { 633 ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 634 } 635 } 636 } 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 642 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 643 { 644 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 645 PetscErrorCode ierr; 646 PetscMPIInt size = mdn->size,rank = mdn->rank; 647 PetscViewerType vtype; 648 PetscTruth iascii,isdraw; 649 PetscViewer sviewer; 650 PetscViewerFormat format; 651 #if defined(PETSC_HAVE_PLAPACK) 652 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 653 #endif 654 655 PetscFunctionBegin; 656 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 657 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 658 if (iascii) { 659 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 660 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 661 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 662 MatInfo info; 663 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 664 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n, 665 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 666 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 667 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 668 #if defined(PETSC_HAVE_PLAPACK) 669 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 670 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",lu->nprows, lu->npcols);CHKERRQ(ierr); 671 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 672 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",lu->ierror);CHKERRQ(ierr); 673 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",lu->nb_alg);CHKERRQ(ierr); 674 #endif 675 PetscFunctionReturn(0); 676 } else if (format == PETSC_VIEWER_ASCII_INFO) { 677 PetscFunctionReturn(0); 678 } 679 } else if (isdraw) { 680 PetscDraw draw; 681 PetscTruth isnull; 682 683 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 684 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 685 if (isnull) PetscFunctionReturn(0); 686 } 687 688 if (size == 1) { 689 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 690 } else { 691 /* assemble the entire matrix onto first processor. */ 692 Mat A; 693 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz; 694 PetscInt *cols; 695 PetscScalar *vals; 696 697 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 698 if (!rank) { 699 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 700 } else { 701 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 702 } 703 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 704 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 705 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 706 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 707 708 /* Copy the matrix ... This isn't the most efficient means, 709 but it's quick for now */ 710 A->insertmode = INSERT_VALUES; 711 row = mat->rmap.rstart; m = mdn->A->rmap.n; 712 for (i=0; i<m; i++) { 713 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 714 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 715 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 716 row++; 717 } 718 719 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 721 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 722 if (!rank) { 723 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 724 } 725 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 726 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 727 ierr = MatDestroy(A);CHKERRQ(ierr); 728 } 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatView_MPIDense" 734 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 735 { 736 PetscErrorCode ierr; 737 PetscTruth iascii,isbinary,isdraw,issocket; 738 739 PetscFunctionBegin; 740 741 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 742 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 743 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 744 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 745 746 if (iascii || issocket || isdraw) { 747 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 748 } else if (isbinary) { 749 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 750 } else { 751 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatGetInfo_MPIDense" 758 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 759 { 760 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 761 Mat mdn = mat->A; 762 PetscErrorCode ierr; 763 PetscReal isend[5],irecv[5]; 764 765 PetscFunctionBegin; 766 info->rows_global = (double)A->rmap.N; 767 info->columns_global = (double)A->cmap.N; 768 info->rows_local = (double)A->rmap.n; 769 info->columns_local = (double)A->cmap.N; 770 info->block_size = 1.0; 771 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 772 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 773 isend[3] = info->memory; isend[4] = info->mallocs; 774 if (flag == MAT_LOCAL) { 775 info->nz_used = isend[0]; 776 info->nz_allocated = isend[1]; 777 info->nz_unneeded = isend[2]; 778 info->memory = isend[3]; 779 info->mallocs = isend[4]; 780 } else if (flag == MAT_GLOBAL_MAX) { 781 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 782 info->nz_used = irecv[0]; 783 info->nz_allocated = irecv[1]; 784 info->nz_unneeded = irecv[2]; 785 info->memory = irecv[3]; 786 info->mallocs = irecv[4]; 787 } else if (flag == MAT_GLOBAL_SUM) { 788 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 789 info->nz_used = irecv[0]; 790 info->nz_allocated = irecv[1]; 791 info->nz_unneeded = irecv[2]; 792 info->memory = irecv[3]; 793 info->mallocs = irecv[4]; 794 } 795 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 796 info->fill_ratio_needed = 0; 797 info->factor_mallocs = 0; 798 PetscFunctionReturn(0); 799 } 800 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatSetOption_MPIDense" 803 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 804 { 805 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 806 PetscErrorCode ierr; 807 808 PetscFunctionBegin; 809 switch (op) { 810 case MAT_NEW_NONZERO_LOCATIONS: 811 case MAT_NEW_NONZERO_LOCATION_ERR: 812 case MAT_NEW_NONZERO_ALLOCATION_ERR: 813 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 814 break; 815 case MAT_ROW_ORIENTED: 816 a->roworiented = flg; 817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 818 break; 819 case MAT_NEW_DIAGONALS: 820 case MAT_USE_HASH_TABLE: 821 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 822 break; 823 case MAT_IGNORE_OFF_PROC_ENTRIES: 824 a->donotstash = flg; 825 break; 826 case MAT_SYMMETRIC: 827 case MAT_STRUCTURALLY_SYMMETRIC: 828 case MAT_HERMITIAN: 829 case MAT_SYMMETRY_ETERNAL: 830 case MAT_IGNORE_LOWER_TRIANGULAR: 831 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 832 break; 833 default: 834 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 835 } 836 PetscFunctionReturn(0); 837 } 838 839 840 #undef __FUNCT__ 841 #define __FUNCT__ "MatDiagonalScale_MPIDense" 842 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 843 { 844 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 845 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 846 PetscScalar *l,*r,x,*v; 847 PetscErrorCode ierr; 848 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n; 849 850 PetscFunctionBegin; 851 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 852 if (ll) { 853 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 854 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 855 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 856 for (i=0; i<m; i++) { 857 x = l[i]; 858 v = mat->v + i; 859 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 860 } 861 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 862 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 863 } 864 if (rr) { 865 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 866 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 867 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 870 for (i=0; i<n; i++) { 871 x = r[i]; 872 v = mat->v + i*m; 873 for (j=0; j<m; j++) { (*v++) *= x;} 874 } 875 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 876 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 877 } 878 PetscFunctionReturn(0); 879 } 880 881 #undef __FUNCT__ 882 #define __FUNCT__ "MatNorm_MPIDense" 883 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 884 { 885 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 886 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 887 PetscErrorCode ierr; 888 PetscInt i,j; 889 PetscReal sum = 0.0; 890 PetscScalar *v = mat->v; 891 892 PetscFunctionBegin; 893 if (mdn->size == 1) { 894 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 895 } else { 896 if (type == NORM_FROBENIUS) { 897 for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) { 898 #if defined(PETSC_USE_COMPLEX) 899 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 900 #else 901 sum += (*v)*(*v); v++; 902 #endif 903 } 904 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 905 *nrm = sqrt(*nrm); 906 ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr); 907 } else if (type == NORM_1) { 908 PetscReal *tmp,*tmp2; 909 ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 910 tmp2 = tmp + A->cmap.N; 911 ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 912 *nrm = 0.0; 913 v = mat->v; 914 for (j=0; j<mdn->A->cmap.n; j++) { 915 for (i=0; i<mdn->A->rmap.n; i++) { 916 tmp[j] += PetscAbsScalar(*v); v++; 917 } 918 } 919 ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 920 for (j=0; j<A->cmap.N; j++) { 921 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 922 } 923 ierr = PetscFree(tmp);CHKERRQ(ierr); 924 ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr); 925 } else if (type == NORM_INFINITY) { /* max row norm */ 926 PetscReal ntemp; 927 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 928 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 929 } else { 930 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 931 } 932 } 933 PetscFunctionReturn(0); 934 } 935 936 #undef __FUNCT__ 937 #define __FUNCT__ "MatTranspose_MPIDense" 938 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 939 { 940 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 941 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 942 Mat B; 943 PetscInt M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart; 944 PetscErrorCode ierr; 945 PetscInt j,i; 946 PetscScalar *v; 947 948 PetscFunctionBegin; 949 if (A == *matout && M != N) { 950 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 951 } 952 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 953 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 954 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 955 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 956 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 957 } else { 958 B = *matout; 959 } 960 961 m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v; 962 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 963 for (i=0; i<m; i++) rwork[i] = rstart + i; 964 for (j=0; j<n; j++) { 965 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 966 v += m; 967 } 968 ierr = PetscFree(rwork);CHKERRQ(ierr); 969 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 970 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971 if (*matout != A) { 972 *matout = B; 973 } else { 974 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 975 } 976 PetscFunctionReturn(0); 977 } 978 979 #include "petscblaslapack.h" 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatScale_MPIDense" 982 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 983 { 984 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 985 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 986 PetscScalar oalpha = alpha; 987 PetscErrorCode ierr; 988 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N); 989 990 PetscFunctionBegin; 991 BLASscal_(&nz,&oalpha,a->v,&one); 992 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 997 998 #undef __FUNCT__ 999 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1000 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1001 { 1002 PetscErrorCode ierr; 1003 1004 PetscFunctionBegin; 1005 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1011 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1012 { 1013 PetscErrorCode ierr; 1014 PetscInt m=A->rmap.n,n=B->cmap.n; 1015 Mat Cmat; 1016 1017 PetscFunctionBegin; 1018 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 1019 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1020 ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr); 1021 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1022 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 1023 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1024 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1025 *C = Cmat; 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #if defined(PETSC_HAVE_PLAPACK) 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatSolve_MPIDense" 1032 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1033 { 1034 MPI_Comm comm = ((PetscObject)A)->comm; 1035 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1036 PetscErrorCode ierr; 1037 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1038 PetscScalar *array; 1039 PetscReal one = 1.0; 1040 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1041 PLA_Obj v_pla = NULL; 1042 PetscScalar *loc_buf; 1043 Vec loc_x; 1044 1045 PetscFunctionBegin; 1046 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1047 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1048 1049 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1050 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1051 PLA_Obj_set_to_zero(v_pla); 1052 1053 /* Copy b into rhs_pla */ 1054 PLA_API_begin(); 1055 PLA_Obj_API_open(v_pla); 1056 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1057 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1058 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1059 PLA_Obj_API_close(v_pla); 1060 PLA_API_end(); 1061 1062 if (A->factor == FACTOR_LU){ 1063 /* Apply the permutations to the right hand sides */ 1064 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1065 1066 /* Solve L y = b, overwriting b with y */ 1067 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1068 1069 /* Solve U x = y (=b), overwriting b with x */ 1070 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1071 } else { /* FACTOR_CHOLESKY */ 1072 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1073 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1074 PLA_NONUNIT_DIAG,lu->A,v_pla); 1075 } 1076 1077 /* Copy PLAPACK x into Petsc vector x */ 1078 PLA_Obj_local_length(v_pla, &loc_m); 1079 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1080 PLA_Obj_local_stride(v_pla, &loc_stride); 1081 /* 1082 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1083 */ 1084 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1085 if (!lu->pla_solved){ 1086 1087 PLA_Temp_comm_row_info(lu->templ,&lu->comm_2d,&r_rank,&r_nproc); 1088 PLA_Temp_comm_col_info(lu->templ,&lu->comm_2d,&c_rank,&c_nproc); 1089 /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */ 1090 1091 /* Create IS and cts for VecScatterring */ 1092 PLA_Obj_local_length(v_pla, &loc_m); 1093 PLA_Obj_local_stride(v_pla, &loc_stride); 1094 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1095 idx_petsc = idx_pla + loc_m; 1096 1097 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1098 for (i=0; i<loc_m; i+=lu->nb){ 1099 j = 0; 1100 while (j < lu->nb && i+j < loc_m){ 1101 idx_petsc[i+j] = rstart + j; j++; 1102 } 1103 rstart += size*lu->nb; 1104 } 1105 1106 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1107 1108 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1109 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1110 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1111 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1112 } 1113 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 1116 /* Free data */ 1117 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1118 PLA_Obj_free(&v_pla); 1119 1120 lu->pla_solved = PETSC_TRUE; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1126 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1127 { 1128 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1129 PetscErrorCode ierr; 1130 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,rend; 1131 PetscInt info_pla=0; 1132 PetscScalar *array,one = 1.0; 1133 1134 PetscFunctionBegin; 1135 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1136 PLA_Obj_free(&lu->A); 1137 PLA_Obj_free (&lu->pivots); 1138 } 1139 /* Create PLAPACK matrix object */ 1140 lu->A = NULL; lu->pivots = NULL; 1141 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1142 PLA_Obj_set_to_zero(lu->A); 1143 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1144 1145 /* Copy A into lu->A */ 1146 PLA_API_begin(); 1147 PLA_Obj_API_open(lu->A); 1148 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1149 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1150 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1151 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1152 PLA_Obj_API_close(lu->A); 1153 PLA_API_end(); 1154 1155 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1156 info_pla = PLA_LU(lu->A,lu->pivots); 1157 if (info_pla != 0) 1158 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1159 1160 lu->CleanUpPlapack = PETSC_TRUE; 1161 lu->rstart = rstart; 1162 lu->mstruct = SAME_NONZERO_PATTERN; 1163 1164 (*F)->assembled = PETSC_TRUE; /* required by -ksp_view */ 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #undef __FUNCT__ 1169 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1170 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1171 { 1172 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1173 PetscErrorCode ierr; 1174 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,rend; 1175 PetscInt info_pla=0; 1176 PetscScalar *array,one = 1.0; 1177 1178 PetscFunctionBegin; 1179 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1180 PLA_Obj_free(&lu->A); 1181 } 1182 /* Create PLAPACK matrix object */ 1183 lu->A = NULL; 1184 lu->pivots = NULL; 1185 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1186 1187 /* Copy A into lu->A */ 1188 PLA_API_begin(); 1189 PLA_Obj_API_open(lu->A); 1190 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1191 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1192 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1193 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1194 PLA_Obj_API_close(lu->A); 1195 PLA_API_end(); 1196 1197 /* Factor P A -> Chol */ 1198 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1199 if (info_pla != 0) 1200 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1201 1202 lu->CleanUpPlapack = PETSC_TRUE; 1203 lu->rstart = rstart; 1204 lu->mstruct = SAME_NONZERO_PATTERN; 1205 1206 (*F)->assembled = PETSC_TRUE; /* required by -ksp_view */ 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private" 1212 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F) 1213 { 1214 Mat B; 1215 Mat_Plapack *lu; 1216 PetscErrorCode ierr; 1217 PetscInt M=A->rmap.N,N=A->cmap.N; 1218 MPI_Comm comm=((PetscObject)A)->comm,comm_2d; 1219 PetscMPIInt size; 1220 PetscInt ierror; 1221 1222 PetscFunctionBegin; 1223 /* Create the factorization matrix */ 1224 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1225 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 1226 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1227 1228 lu = (Mat_Plapack*)(B->spptr); 1229 1230 /* Set default Plapack parameters */ 1231 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1232 lu->nprows = 1; lu->npcols = size; 1233 ierror = 0; 1234 lu->nb = M/size; 1235 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1236 1237 /* Set runtime options */ 1238 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1239 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",lu->nprows,&lu->nprows,PETSC_NULL);CHKERRQ(ierr); 1240 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",lu->npcols,&lu->npcols,PETSC_NULL);CHKERRQ(ierr); 1241 1242 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1243 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr); 1244 if (ierror){ 1245 PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE ); 1246 } else { 1247 PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE ); 1248 } 1249 lu->ierror = ierror; 1250 1251 lu->nb_alg = 0; 1252 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr); 1253 if (lu->nb_alg){ 1254 pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg); 1255 } 1256 PetscOptionsEnd(); 1257 1258 1259 /* Create a 2D communicator */ 1260 PLA_Comm_1D_to_2D(comm,lu->nprows,lu->npcols,&comm_2d); 1261 lu->comm_2d = comm_2d; 1262 1263 /* Initialize PLAPACK */ 1264 PLA_Init(comm_2d); 1265 1266 /* Create object distribution template */ 1267 lu->templ = NULL; 1268 PLA_Temp_create(lu->nb, 0, &lu->templ); 1269 1270 /* Use suggested nb_alg if it is not provided by user */ 1271 if (lu->nb_alg == 0){ 1272 PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg); 1273 pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg); 1274 } 1275 1276 /* Set the datatype */ 1277 #if defined(PETSC_USE_COMPLEX) 1278 lu->datatype = MPI_DOUBLE_COMPLEX; 1279 #else 1280 lu->datatype = MPI_DOUBLE; 1281 #endif 1282 1283 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1284 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1285 lu->CleanUpPlapack = PETSC_TRUE; 1286 *F = B; 1287 PetscFunctionReturn(0); 1288 } 1289 1290 /* Note the Petsc r and c permutations are ignored */ 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1293 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 1294 { 1295 PetscErrorCode ierr; 1296 1297 PetscFunctionBegin; 1298 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1299 (*F)->factor = FACTOR_LU; 1300 PetscFunctionReturn(0); 1301 } 1302 1303 /* Note the Petsc perm permutation is ignored */ 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1306 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F) 1307 { 1308 PetscErrorCode ierr; 1309 PetscTruth issymmetric,set; 1310 1311 PetscFunctionBegin; 1312 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1313 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1314 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1315 (*F)->factor = FACTOR_CHOLESKY; 1316 PetscFunctionReturn(0); 1317 } 1318 #endif 1319 1320 /* -------------------------------------------------------------------*/ 1321 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1322 MatGetRow_MPIDense, 1323 MatRestoreRow_MPIDense, 1324 MatMult_MPIDense, 1325 /* 4*/ MatMultAdd_MPIDense, 1326 MatMultTranspose_MPIDense, 1327 MatMultTransposeAdd_MPIDense, 1328 #if defined(PETSC_HAVE_PLAPACK) 1329 MatSolve_MPIDense, 1330 #else 1331 0, 1332 #endif 1333 0, 1334 0, 1335 /*10*/ 0, 1336 0, 1337 0, 1338 0, 1339 MatTranspose_MPIDense, 1340 /*15*/ MatGetInfo_MPIDense, 1341 MatEqual_MPIDense, 1342 MatGetDiagonal_MPIDense, 1343 MatDiagonalScale_MPIDense, 1344 MatNorm_MPIDense, 1345 /*20*/ MatAssemblyBegin_MPIDense, 1346 MatAssemblyEnd_MPIDense, 1347 0, 1348 MatSetOption_MPIDense, 1349 MatZeroEntries_MPIDense, 1350 /*25*/ MatZeroRows_MPIDense, 1351 #if defined(PETSC_HAVE_PLAPACK) 1352 MatLUFactorSymbolic_MPIDense, 1353 MatLUFactorNumeric_MPIDense, 1354 MatCholeskyFactorSymbolic_MPIDense, 1355 MatCholeskyFactorNumeric_MPIDense, 1356 #else 1357 0, 1358 0, 1359 0, 1360 0, 1361 #endif 1362 /*30*/ MatSetUpPreallocation_MPIDense, 1363 0, 1364 0, 1365 MatGetArray_MPIDense, 1366 MatRestoreArray_MPIDense, 1367 /*35*/ MatDuplicate_MPIDense, 1368 0, 1369 0, 1370 0, 1371 0, 1372 /*40*/ 0, 1373 MatGetSubMatrices_MPIDense, 1374 0, 1375 MatGetValues_MPIDense, 1376 0, 1377 /*45*/ 0, 1378 MatScale_MPIDense, 1379 0, 1380 0, 1381 0, 1382 /*50*/ 0, 1383 0, 1384 0, 1385 0, 1386 0, 1387 /*55*/ 0, 1388 0, 1389 0, 1390 0, 1391 0, 1392 /*60*/ MatGetSubMatrix_MPIDense, 1393 MatDestroy_MPIDense, 1394 MatView_MPIDense, 1395 0, 1396 0, 1397 /*65*/ 0, 1398 0, 1399 0, 1400 0, 1401 0, 1402 /*70*/ 0, 1403 0, 1404 0, 1405 0, 1406 0, 1407 /*75*/ 0, 1408 0, 1409 0, 1410 0, 1411 0, 1412 /*80*/ 0, 1413 0, 1414 0, 1415 0, 1416 /*84*/ MatLoad_MPIDense, 1417 0, 1418 0, 1419 0, 1420 0, 1421 0, 1422 /*90*/ 0, 1423 MatMatMultSymbolic_MPIDense_MPIDense, 1424 0, 1425 0, 1426 0, 1427 /*95*/ 0, 1428 0, 1429 0, 1430 0}; 1431 1432 EXTERN_C_BEGIN 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1435 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1436 { 1437 Mat_MPIDense *a; 1438 PetscErrorCode ierr; 1439 1440 PetscFunctionBegin; 1441 mat->preallocated = PETSC_TRUE; 1442 /* Note: For now, when data is specified above, this assumes the user correctly 1443 allocates the local dense storage space. We should add error checking. */ 1444 1445 a = (Mat_MPIDense*)mat->data; 1446 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1447 ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr); 1448 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1449 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1450 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1451 PetscFunctionReturn(0); 1452 } 1453 EXTERN_C_END 1454 1455 EXTERN_C_BEGIN 1456 #if defined(PETSC_HAVE_PLAPACK) 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK" 1459 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type) 1460 { 1461 PetscFunctionBegin; 1462 /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */ 1463 PetscFunctionReturn(0); 1464 } 1465 #endif 1466 EXTERN_C_END 1467 1468 /*MC 1469 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1470 1471 Options Database Keys: 1472 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1473 1474 Level: beginner 1475 1476 MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR) 1477 for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed 1478 (run config/configure.py with the option --download-plapack) 1479 1480 1481 Options Database Keys: 1482 . -mat_plapack_nprows <n> - number of rows in processor partition 1483 . -mat_plapack_npcols <n> - number of columns in processor partition 1484 . -mat_plapack_nb <n> - block size of template vector 1485 . -mat_plapack_nb_alg <n> - algorithmic block size 1486 - -mat_plapack_ckerror <n> - error checking flag 1487 1488 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE 1489 M*/ 1490 1491 EXTERN_C_BEGIN 1492 #undef __FUNCT__ 1493 #define __FUNCT__ "MatCreate_MPIDense" 1494 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1495 { 1496 Mat_MPIDense *a; 1497 PetscErrorCode ierr; 1498 #if defined(PETSC_HAVE_PLAPACK) 1499 Mat_Plapack *lu; 1500 #endif 1501 1502 PetscFunctionBegin; 1503 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1504 mat->data = (void*)a; 1505 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1506 mat->factor = 0; 1507 mat->mapping = 0; 1508 1509 mat->insertmode = NOT_SET_VALUES; 1510 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1511 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1512 1513 mat->rmap.bs = mat->cmap.bs = 1; 1514 ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr); 1515 ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr); 1516 a->nvec = mat->cmap.n; 1517 1518 /* build cache for off array entries formed */ 1519 a->donotstash = PETSC_FALSE; 1520 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1521 1522 /* stuff used for matrix vector multiply */ 1523 a->lvec = 0; 1524 a->Mvctx = 0; 1525 a->roworiented = PETSC_TRUE; 1526 1527 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1528 "MatGetDiagonalBlock_MPIDense", 1529 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1530 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1531 "MatMPIDenseSetPreallocation_MPIDense", 1532 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1533 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1534 "MatMatMult_MPIAIJ_MPIDense", 1535 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1536 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1537 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1538 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1539 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1540 "MatMatMultNumeric_MPIAIJ_MPIDense", 1541 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1542 #if defined(PETSC_HAVE_PLAPACK) 1543 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C", 1544 "MatSetSolverType_MPIDense_PLAPACK", 1545 MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr); 1546 #endif 1547 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1548 1549 #if defined(PETSC_HAVE_PLAPACK) 1550 ierr = PetscNewLog(mat,Mat_Plapack,&lu);CHKERRQ(ierr); 1551 lu->CleanUpPlapack = PETSC_FALSE; 1552 mat->spptr = (void*)lu; 1553 #endif 1554 PetscFunctionReturn(0); 1555 } 1556 EXTERN_C_END 1557 1558 /*MC 1559 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1560 1561 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1562 and MATMPIDENSE otherwise. 1563 1564 Options Database Keys: 1565 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1566 1567 Level: beginner 1568 1569 1570 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1571 M*/ 1572 1573 EXTERN_C_BEGIN 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "MatCreate_Dense" 1576 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1577 { 1578 PetscErrorCode ierr; 1579 PetscMPIInt size; 1580 1581 PetscFunctionBegin; 1582 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1583 if (size == 1) { 1584 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1585 } else { 1586 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 EXTERN_C_END 1591 1592 #undef __FUNCT__ 1593 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1594 /*@C 1595 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1596 1597 Not collective 1598 1599 Input Parameters: 1600 . A - the matrix 1601 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1602 to control all matrix memory allocation. 1603 1604 Notes: 1605 The dense format is fully compatible with standard Fortran 77 1606 storage by columns. 1607 1608 The data input variable is intended primarily for Fortran programmers 1609 who wish to allocate their own matrix memory space. Most users should 1610 set data=PETSC_NULL. 1611 1612 Level: intermediate 1613 1614 .keywords: matrix,dense, parallel 1615 1616 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1617 @*/ 1618 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1619 { 1620 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1621 1622 PetscFunctionBegin; 1623 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1624 if (f) { 1625 ierr = (*f)(mat,data);CHKERRQ(ierr); 1626 } 1627 PetscFunctionReturn(0); 1628 } 1629 1630 #undef __FUNCT__ 1631 #define __FUNCT__ "MatCreateMPIDense" 1632 /*@C 1633 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1634 1635 Collective on MPI_Comm 1636 1637 Input Parameters: 1638 + comm - MPI communicator 1639 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1640 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1641 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1642 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1643 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1644 to control all matrix memory allocation. 1645 1646 Output Parameter: 1647 . A - the matrix 1648 1649 Notes: 1650 The dense format is fully compatible with standard Fortran 77 1651 storage by columns. 1652 1653 The data input variable is intended primarily for Fortran programmers 1654 who wish to allocate their own matrix memory space. Most users should 1655 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1656 1657 The user MUST specify either the local or global matrix dimensions 1658 (possibly both). 1659 1660 Level: intermediate 1661 1662 .keywords: matrix,dense, parallel 1663 1664 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1665 @*/ 1666 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1667 { 1668 PetscErrorCode ierr; 1669 PetscMPIInt size; 1670 1671 PetscFunctionBegin; 1672 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1673 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1674 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1675 if (size > 1) { 1676 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1677 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1678 } else { 1679 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1680 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1681 } 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatDuplicate_MPIDense" 1687 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1688 { 1689 Mat mat; 1690 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1691 PetscErrorCode ierr; 1692 1693 PetscFunctionBegin; 1694 *newmat = 0; 1695 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1696 ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1697 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1698 a = (Mat_MPIDense*)mat->data; 1699 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1700 mat->factor = A->factor; 1701 mat->assembled = PETSC_TRUE; 1702 mat->preallocated = PETSC_TRUE; 1703 1704 mat->rmap.rstart = A->rmap.rstart; 1705 mat->rmap.rend = A->rmap.rend; 1706 a->size = oldmat->size; 1707 a->rank = oldmat->rank; 1708 mat->insertmode = NOT_SET_VALUES; 1709 a->nvec = oldmat->nvec; 1710 a->donotstash = oldmat->donotstash; 1711 1712 ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1713 ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1714 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1715 1716 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1717 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1718 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1719 1720 #if defined(PETSC_HAVE_PLAPACK) 1721 ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr); 1722 #endif 1723 *newmat = mat; 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #include "petscsys.h" 1728 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1731 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1732 { 1733 PetscErrorCode ierr; 1734 PetscMPIInt rank,size; 1735 PetscInt *rowners,i,m,nz,j; 1736 PetscScalar *array,*vals,*vals_ptr; 1737 MPI_Status status; 1738 1739 PetscFunctionBegin; 1740 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1741 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1742 1743 /* determine ownership of all rows */ 1744 m = M/size + ((M % size) > rank); 1745 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1746 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1747 rowners[0] = 0; 1748 for (i=2; i<=size; i++) { 1749 rowners[i] += rowners[i-1]; 1750 } 1751 1752 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1753 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1754 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1755 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1756 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1757 1758 if (!rank) { 1759 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1760 1761 /* read in my part of the matrix numerical values */ 1762 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1763 1764 /* insert into matrix-by row (this is why cannot directly read into array */ 1765 vals_ptr = vals; 1766 for (i=0; i<m; i++) { 1767 for (j=0; j<N; j++) { 1768 array[i + j*m] = *vals_ptr++; 1769 } 1770 } 1771 1772 /* read in other processors and ship out */ 1773 for (i=1; i<size; i++) { 1774 nz = (rowners[i+1] - rowners[i])*N; 1775 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1776 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1777 } 1778 } else { 1779 /* receive numeric values */ 1780 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1781 1782 /* receive message of values*/ 1783 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1784 1785 /* insert into matrix-by row (this is why cannot directly read into array */ 1786 vals_ptr = vals; 1787 for (i=0; i<m; i++) { 1788 for (j=0; j<N; j++) { 1789 array[i + j*m] = *vals_ptr++; 1790 } 1791 } 1792 } 1793 ierr = PetscFree(rowners);CHKERRQ(ierr); 1794 ierr = PetscFree(vals);CHKERRQ(ierr); 1795 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1797 PetscFunctionReturn(0); 1798 } 1799 1800 #undef __FUNCT__ 1801 #define __FUNCT__ "MatLoad_MPIDense" 1802 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1803 { 1804 Mat A; 1805 PetscScalar *vals,*svals; 1806 MPI_Comm comm = ((PetscObject)viewer)->comm; 1807 MPI_Status status; 1808 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1809 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1810 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1811 PetscInt i,nz,j,rstart,rend; 1812 int fd; 1813 PetscErrorCode ierr; 1814 1815 PetscFunctionBegin; 1816 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1817 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1818 if (!rank) { 1819 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1820 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1821 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1822 } 1823 1824 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1825 M = header[1]; N = header[2]; nz = header[3]; 1826 1827 /* 1828 Handle case where matrix is stored on disk as a dense matrix 1829 */ 1830 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1831 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1832 PetscFunctionReturn(0); 1833 } 1834 1835 /* determine ownership of all rows */ 1836 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1837 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1838 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1839 rowners[0] = 0; 1840 for (i=2; i<=size; i++) { 1841 rowners[i] += rowners[i-1]; 1842 } 1843 rstart = rowners[rank]; 1844 rend = rowners[rank+1]; 1845 1846 /* distribute row lengths to all processors */ 1847 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1848 offlens = ourlens + (rend-rstart); 1849 if (!rank) { 1850 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1851 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1852 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1853 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1854 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1855 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1856 } else { 1857 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1858 } 1859 1860 if (!rank) { 1861 /* calculate the number of nonzeros on each processor */ 1862 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1863 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1864 for (i=0; i<size; i++) { 1865 for (j=rowners[i]; j< rowners[i+1]; j++) { 1866 procsnz[i] += rowlengths[j]; 1867 } 1868 } 1869 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1870 1871 /* determine max buffer needed and allocate it */ 1872 maxnz = 0; 1873 for (i=0; i<size; i++) { 1874 maxnz = PetscMax(maxnz,procsnz[i]); 1875 } 1876 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1877 1878 /* read in my part of the matrix column indices */ 1879 nz = procsnz[0]; 1880 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1881 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1882 1883 /* read in every one elses and ship off */ 1884 for (i=1; i<size; i++) { 1885 nz = procsnz[i]; 1886 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1887 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1888 } 1889 ierr = PetscFree(cols);CHKERRQ(ierr); 1890 } else { 1891 /* determine buffer space needed for message */ 1892 nz = 0; 1893 for (i=0; i<m; i++) { 1894 nz += ourlens[i]; 1895 } 1896 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1897 1898 /* receive message of column indices*/ 1899 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1900 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1901 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1902 } 1903 1904 /* loop over local rows, determining number of off diagonal entries */ 1905 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1906 jj = 0; 1907 for (i=0; i<m; i++) { 1908 for (j=0; j<ourlens[i]; j++) { 1909 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1910 jj++; 1911 } 1912 } 1913 1914 /* create our matrix */ 1915 for (i=0; i<m; i++) { 1916 ourlens[i] -= offlens[i]; 1917 } 1918 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1919 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1920 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1921 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1922 A = *newmat; 1923 for (i=0; i<m; i++) { 1924 ourlens[i] += offlens[i]; 1925 } 1926 1927 if (!rank) { 1928 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1929 1930 /* read in my part of the matrix numerical values */ 1931 nz = procsnz[0]; 1932 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1933 1934 /* insert into matrix */ 1935 jj = rstart; 1936 smycols = mycols; 1937 svals = vals; 1938 for (i=0; i<m; i++) { 1939 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1940 smycols += ourlens[i]; 1941 svals += ourlens[i]; 1942 jj++; 1943 } 1944 1945 /* read in other processors and ship out */ 1946 for (i=1; i<size; i++) { 1947 nz = procsnz[i]; 1948 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1949 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 1950 } 1951 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1952 } else { 1953 /* receive numeric values */ 1954 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1955 1956 /* receive message of values*/ 1957 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 1958 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1959 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1960 1961 /* insert into matrix */ 1962 jj = rstart; 1963 smycols = mycols; 1964 svals = vals; 1965 for (i=0; i<m; i++) { 1966 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1967 smycols += ourlens[i]; 1968 svals += ourlens[i]; 1969 jj++; 1970 } 1971 } 1972 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1973 ierr = PetscFree(vals);CHKERRQ(ierr); 1974 ierr = PetscFree(mycols);CHKERRQ(ierr); 1975 ierr = PetscFree(rowners);CHKERRQ(ierr); 1976 1977 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1978 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1979 PetscFunctionReturn(0); 1980 } 1981 1982 #undef __FUNCT__ 1983 #define __FUNCT__ "MatEqual_MPIDense" 1984 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 1985 { 1986 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1987 Mat a,b; 1988 PetscTruth flg; 1989 PetscErrorCode ierr; 1990 1991 PetscFunctionBegin; 1992 a = matA->A; 1993 b = matB->A; 1994 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1995 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1996 PetscFunctionReturn(0); 1997 } 1998 1999 2000 2001