1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 11 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact) 12 { 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 17 PetscFunctionReturn(0); 18 } 19 20 #undef __FUNCT__ 21 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 22 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 23 { 24 PetscErrorCode ierr; 25 26 PetscFunctionBegin; 27 ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 28 PetscFunctionReturn(0); 29 } 30 31 #undef __FUNCT__ 32 #define __FUNCT__ "MatDenseGetLocalMatrix" 33 /*@ 34 35 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 36 matrix that represents the operator. For sequential matrices it returns itself. 37 38 Input Parameter: 39 . A - the Seq or MPI dense matrix 40 41 Output Parameter: 42 . B - the inner matrix 43 44 Level: intermediate 45 46 @*/ 47 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 48 { 49 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 50 PetscErrorCode ierr; 51 PetscTruth flg; 52 PetscMPIInt size; 53 54 PetscFunctionBegin; 55 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 56 if (!flg) { /* this check sucks! */ 57 ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr); 58 if (flg) { 59 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 60 if (size == 1) flg = PETSC_FALSE; 61 } 62 } 63 if (flg) { 64 *B = mat->A; 65 } else { 66 *B = A; 67 } 68 PetscFunctionReturn(0); 69 } 70 71 #undef __FUNCT__ 72 #define __FUNCT__ "MatGetRow_MPIDense" 73 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 74 { 75 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 76 PetscErrorCode ierr; 77 PetscInt lrow,rstart = A->rmap.rstart,rend = A->rmap.rend; 78 79 PetscFunctionBegin; 80 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 81 lrow = row - rstart; 82 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 86 #undef __FUNCT__ 87 #define __FUNCT__ "MatRestoreRow_MPIDense" 88 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 89 { 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 94 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 95 PetscFunctionReturn(0); 96 } 97 98 EXTERN_C_BEGIN 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 101 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 102 { 103 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 104 PetscErrorCode ierr; 105 PetscInt m = A->rmap.n,rstart = A->rmap.rstart; 106 PetscScalar *array; 107 MPI_Comm comm; 108 109 PetscFunctionBegin; 110 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 111 112 /* The reuse aspect is not implemented efficiently */ 113 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 114 115 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 116 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 117 ierr = MatCreate(comm,B);CHKERRQ(ierr); 118 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 119 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 120 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 121 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 122 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 123 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 124 125 *iscopy = PETSC_TRUE; 126 PetscFunctionReturn(0); 127 } 128 EXTERN_C_END 129 130 #undef __FUNCT__ 131 #define __FUNCT__ "MatSetValues_MPIDense" 132 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 133 { 134 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 135 PetscErrorCode ierr; 136 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 137 PetscTruth roworiented = A->roworiented; 138 139 PetscFunctionBegin; 140 for (i=0; i<m; i++) { 141 if (idxm[i] < 0) continue; 142 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 143 if (idxm[i] >= rstart && idxm[i] < rend) { 144 row = idxm[i] - rstart; 145 if (roworiented) { 146 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 147 } else { 148 for (j=0; j<n; j++) { 149 if (idxn[j] < 0) continue; 150 if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 151 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 152 } 153 } 154 } else { 155 if (!A->donotstash) { 156 if (roworiented) { 157 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 158 } else { 159 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 160 } 161 } 162 } 163 } 164 PetscFunctionReturn(0); 165 } 166 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatGetValues_MPIDense" 169 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 170 { 171 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 172 PetscErrorCode ierr; 173 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 174 175 PetscFunctionBegin; 176 for (i=0; i<m; i++) { 177 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 178 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 179 if (idxm[i] >= rstart && idxm[i] < rend) { 180 row = idxm[i] - rstart; 181 for (j=0; j<n; j++) { 182 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 183 if (idxn[j] >= mat->cmap.N) { 184 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 185 } 186 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 187 } 188 } else { 189 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 190 } 191 } 192 PetscFunctionReturn(0); 193 } 194 195 #undef __FUNCT__ 196 #define __FUNCT__ "MatGetArray_MPIDense" 197 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 198 { 199 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 200 PetscErrorCode ierr; 201 202 PetscFunctionBegin; 203 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 204 PetscFunctionReturn(0); 205 } 206 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 209 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 210 { 211 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 212 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 213 PetscErrorCode ierr; 214 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 215 PetscScalar *av,*bv,*v = lmat->v; 216 Mat newmat; 217 218 PetscFunctionBegin; 219 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 220 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 221 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 222 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 223 224 /* No parallel redistribution currently supported! Should really check each index set 225 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 226 original matrix! */ 227 228 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 229 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 230 231 /* Check submatrix call */ 232 if (scall == MAT_REUSE_MATRIX) { 233 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 234 /* Really need to test rows and column sizes! */ 235 newmat = *B; 236 } else { 237 /* Create and fill new matrix */ 238 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 239 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 240 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 241 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 242 } 243 244 /* Now extract the data pointers and do the copy, column at a time */ 245 newmatd = (Mat_MPIDense*)newmat->data; 246 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 247 248 for (i=0; i<ncols; i++) { 249 av = v + nlrows*icol[i]; 250 for (j=0; j<nrows; j++) { 251 *bv++ = av[irow[j] - rstart]; 252 } 253 } 254 255 /* Assemble the matrices so that the correct flags are set */ 256 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 257 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 258 259 /* Free work space */ 260 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 261 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 262 *B = newmat; 263 PetscFunctionReturn(0); 264 } 265 266 #undef __FUNCT__ 267 #define __FUNCT__ "MatRestoreArray_MPIDense" 268 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 269 { 270 PetscFunctionBegin; 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 276 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 277 { 278 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 279 MPI_Comm comm = ((PetscObject)mat)->comm; 280 PetscErrorCode ierr; 281 PetscInt nstash,reallocs; 282 InsertMode addv; 283 284 PetscFunctionBegin; 285 /* make sure all processors are either in INSERTMODE or ADDMODE */ 286 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 287 if (addv == (ADD_VALUES|INSERT_VALUES)) { 288 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 289 } 290 mat->insertmode = addv; /* in case this processor had no cache */ 291 292 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 293 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 294 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 295 PetscFunctionReturn(0); 296 } 297 298 #undef __FUNCT__ 299 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 300 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 301 { 302 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 303 PetscErrorCode ierr; 304 PetscInt i,*row,*col,flg,j,rstart,ncols; 305 PetscMPIInt n; 306 PetscScalar *val; 307 InsertMode addv=mat->insertmode; 308 309 PetscFunctionBegin; 310 /* wait on receives */ 311 while (1) { 312 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 313 if (!flg) break; 314 315 for (i=0; i<n;) { 316 /* Now identify the consecutive vals belonging to the same row */ 317 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 318 if (j < n) ncols = j-i; 319 else ncols = n-i; 320 /* Now assemble all these values with a single function call */ 321 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 322 i = j; 323 } 324 } 325 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 326 327 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 328 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 329 330 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 331 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 332 } 333 PetscFunctionReturn(0); 334 } 335 336 #undef __FUNCT__ 337 #define __FUNCT__ "MatZeroEntries_MPIDense" 338 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 339 { 340 PetscErrorCode ierr; 341 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 342 343 PetscFunctionBegin; 344 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 /* the code does not do the diagonal entries correctly unless the 349 matrix is square and the column and row owerships are identical. 350 This is a BUG. The only way to fix it seems to be to access 351 mdn->A and mdn->B directly and not through the MatZeroRows() 352 routine. 353 */ 354 #undef __FUNCT__ 355 #define __FUNCT__ "MatZeroRows_MPIDense" 356 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 357 { 358 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 359 PetscErrorCode ierr; 360 PetscInt i,*owners = A->rmap.range; 361 PetscInt *nprocs,j,idx,nsends; 362 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 363 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 364 PetscInt *lens,*lrows,*values; 365 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 366 MPI_Comm comm = ((PetscObject)A)->comm; 367 MPI_Request *send_waits,*recv_waits; 368 MPI_Status recv_status,*send_status; 369 PetscTruth found; 370 371 PetscFunctionBegin; 372 /* first count number of contributors to each processor */ 373 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 374 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 375 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 376 for (i=0; i<N; i++) { 377 idx = rows[i]; 378 found = PETSC_FALSE; 379 for (j=0; j<size; j++) { 380 if (idx >= owners[j] && idx < owners[j+1]) { 381 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 382 } 383 } 384 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 385 } 386 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 387 388 /* inform other processors of number of messages and max length*/ 389 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 390 391 /* post receives: */ 392 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 393 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 394 for (i=0; i<nrecvs; i++) { 395 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 396 } 397 398 /* do sends: 399 1) starts[i] gives the starting index in svalues for stuff going to 400 the ith processor 401 */ 402 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 403 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 404 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 405 starts[0] = 0; 406 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 407 for (i=0; i<N; i++) { 408 svalues[starts[owner[i]]++] = rows[i]; 409 } 410 411 starts[0] = 0; 412 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 413 count = 0; 414 for (i=0; i<size; i++) { 415 if (nprocs[2*i+1]) { 416 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 417 } 418 } 419 ierr = PetscFree(starts);CHKERRQ(ierr); 420 421 base = owners[rank]; 422 423 /* wait on receives */ 424 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 425 source = lens + nrecvs; 426 count = nrecvs; slen = 0; 427 while (count) { 428 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 429 /* unpack receives into our local space */ 430 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 431 source[imdex] = recv_status.MPI_SOURCE; 432 lens[imdex] = n; 433 slen += n; 434 count--; 435 } 436 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 437 438 /* move the data into the send scatter */ 439 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 440 count = 0; 441 for (i=0; i<nrecvs; i++) { 442 values = rvalues + i*nmax; 443 for (j=0; j<lens[i]; j++) { 444 lrows[count++] = values[j] - base; 445 } 446 } 447 ierr = PetscFree(rvalues);CHKERRQ(ierr); 448 ierr = PetscFree(lens);CHKERRQ(ierr); 449 ierr = PetscFree(owner);CHKERRQ(ierr); 450 ierr = PetscFree(nprocs);CHKERRQ(ierr); 451 452 /* actually zap the local rows */ 453 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 454 ierr = PetscFree(lrows);CHKERRQ(ierr); 455 456 /* wait on sends */ 457 if (nsends) { 458 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 459 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 460 ierr = PetscFree(send_status);CHKERRQ(ierr); 461 } 462 ierr = PetscFree(send_waits);CHKERRQ(ierr); 463 ierr = PetscFree(svalues);CHKERRQ(ierr); 464 465 PetscFunctionReturn(0); 466 } 467 468 #undef __FUNCT__ 469 #define __FUNCT__ "MatMult_MPIDense" 470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 471 { 472 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 473 PetscErrorCode ierr; 474 475 PetscFunctionBegin; 476 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatMultAdd_MPIDense" 484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 487 PetscErrorCode ierr; 488 489 PetscFunctionBegin; 490 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 491 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 492 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatMultTranspose_MPIDense" 498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 499 { 500 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 501 PetscErrorCode ierr; 502 PetscScalar zero = 0.0; 503 504 PetscFunctionBegin; 505 ierr = VecSet(yy,zero);CHKERRQ(ierr); 506 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 507 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 508 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 515 { 516 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 517 PetscErrorCode ierr; 518 519 PetscFunctionBegin; 520 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 521 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 522 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 523 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 524 PetscFunctionReturn(0); 525 } 526 527 #undef __FUNCT__ 528 #define __FUNCT__ "MatGetDiagonal_MPIDense" 529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 530 { 531 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 532 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 533 PetscErrorCode ierr; 534 PetscInt len,i,n,m = A->rmap.n,radd; 535 PetscScalar *x,zero = 0.0; 536 537 PetscFunctionBegin; 538 ierr = VecSet(v,zero);CHKERRQ(ierr); 539 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 540 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 541 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 542 len = PetscMin(a->A->rmap.n,a->A->cmap.n); 543 radd = A->rmap.rstart*m; 544 for (i=0; i<len; i++) { 545 x[i] = aloc->v[radd + i*m + i]; 546 } 547 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatDestroy_MPIDense" 553 PetscErrorCode MatDestroy_MPIDense(Mat mat) 554 { 555 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 556 PetscErrorCode ierr; 557 558 PetscFunctionBegin; 559 560 #if defined(PETSC_USE_LOG) 561 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 562 #endif 563 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 564 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 565 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 566 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 567 if (mdn->factor) { 568 ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr); 569 ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr); 570 ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr); 571 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 572 } 573 ierr = PetscFree(mdn);CHKERRQ(ierr); 574 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 579 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatView_MPIDense_Binary" 585 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 586 { 587 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 588 PetscErrorCode ierr; 589 PetscViewerFormat format; 590 int fd; 591 PetscInt header[4],mmax,N = mat->cmap.N,i,j,m,k; 592 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 593 PetscScalar *work,*v,*vv; 594 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 595 MPI_Status status; 596 597 PetscFunctionBegin; 598 if (mdn->size == 1) { 599 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 600 } else { 601 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 602 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 603 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 604 605 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 606 if (format == PETSC_VIEWER_BINARY_NATIVE) { 607 608 if (!rank) { 609 /* store the matrix as a dense matrix */ 610 header[0] = MAT_FILE_COOKIE; 611 header[1] = mat->rmap.N; 612 header[2] = N; 613 header[3] = MATRIX_BINARY_FORMAT_DENSE; 614 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 615 616 /* get largest work array needed for transposing array */ 617 mmax = mat->rmap.n; 618 for (i=1; i<size; i++) { 619 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 620 } 621 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 622 623 /* write out local array, by rows */ 624 m = mat->rmap.n; 625 v = a->v; 626 for (j=0; j<N; j++) { 627 for (i=0; i<m; i++) { 628 work[j + i*N] = *v++; 629 } 630 } 631 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 632 /* get largest work array to receive messages from other processes, excludes process zero */ 633 mmax = 0; 634 for (i=1; i<size; i++) { 635 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 636 } 637 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 638 v = vv; 639 for (k=1; k<size; k++) { 640 m = mat->rmap.range[k+1] - mat->rmap.range[k]; 641 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 642 643 for (j=0; j<N; j++) { 644 for (i=0; i<m; i++) { 645 work[j + i*N] = *v++; 646 } 647 } 648 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 649 } 650 ierr = PetscFree(work);CHKERRQ(ierr); 651 ierr = PetscFree(vv);CHKERRQ(ierr); 652 } else { 653 ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 654 } 655 } 656 } 657 PetscFunctionReturn(0); 658 } 659 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 662 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 663 { 664 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 665 PetscErrorCode ierr; 666 PetscMPIInt size = mdn->size,rank = mdn->rank; 667 PetscViewerType vtype; 668 PetscTruth iascii,isdraw; 669 PetscViewer sviewer; 670 PetscViewerFormat format; 671 672 PetscFunctionBegin; 673 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 674 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 675 if (iascii) { 676 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 677 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 678 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 679 MatInfo info; 680 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 681 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n, 682 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 683 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 684 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } else if (format == PETSC_VIEWER_ASCII_INFO) { 687 PetscFunctionReturn(0); 688 } 689 } else if (isdraw) { 690 PetscDraw draw; 691 PetscTruth isnull; 692 693 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 694 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 695 if (isnull) PetscFunctionReturn(0); 696 } 697 698 if (size == 1) { 699 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 700 } else { 701 /* assemble the entire matrix onto first processor. */ 702 Mat A; 703 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz; 704 PetscInt *cols; 705 PetscScalar *vals; 706 707 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 708 if (!rank) { 709 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 710 } else { 711 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 712 } 713 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 714 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 715 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 716 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 717 718 /* Copy the matrix ... This isn't the most efficient means, 719 but it's quick for now */ 720 A->insertmode = INSERT_VALUES; 721 row = mat->rmap.rstart; m = mdn->A->rmap.n; 722 for (i=0; i<m; i++) { 723 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 724 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 725 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 726 row++; 727 } 728 729 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 730 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 731 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 732 if (!rank) { 733 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 734 } 735 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 736 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 737 ierr = MatDestroy(A);CHKERRQ(ierr); 738 } 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatView_MPIDense" 744 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 745 { 746 PetscErrorCode ierr; 747 PetscTruth iascii,isbinary,isdraw,issocket; 748 749 PetscFunctionBegin; 750 751 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 752 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 753 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 754 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 755 756 if (iascii || issocket || isdraw) { 757 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 758 } else if (isbinary) { 759 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 760 } else { 761 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 762 } 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNCT__ 767 #define __FUNCT__ "MatGetInfo_MPIDense" 768 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 769 { 770 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 771 Mat mdn = mat->A; 772 PetscErrorCode ierr; 773 PetscReal isend[5],irecv[5]; 774 775 PetscFunctionBegin; 776 info->rows_global = (double)A->rmap.N; 777 info->columns_global = (double)A->cmap.N; 778 info->rows_local = (double)A->rmap.n; 779 info->columns_local = (double)A->cmap.N; 780 info->block_size = 1.0; 781 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 782 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 783 isend[3] = info->memory; isend[4] = info->mallocs; 784 if (flag == MAT_LOCAL) { 785 info->nz_used = isend[0]; 786 info->nz_allocated = isend[1]; 787 info->nz_unneeded = isend[2]; 788 info->memory = isend[3]; 789 info->mallocs = isend[4]; 790 } else if (flag == MAT_GLOBAL_MAX) { 791 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 792 info->nz_used = irecv[0]; 793 info->nz_allocated = irecv[1]; 794 info->nz_unneeded = irecv[2]; 795 info->memory = irecv[3]; 796 info->mallocs = irecv[4]; 797 } else if (flag == MAT_GLOBAL_SUM) { 798 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 799 info->nz_used = irecv[0]; 800 info->nz_allocated = irecv[1]; 801 info->nz_unneeded = irecv[2]; 802 info->memory = irecv[3]; 803 info->mallocs = irecv[4]; 804 } 805 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 806 info->fill_ratio_needed = 0; 807 info->factor_mallocs = 0; 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatSetOption_MPIDense" 813 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 814 { 815 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 816 PetscErrorCode ierr; 817 818 PetscFunctionBegin; 819 switch (op) { 820 case MAT_NEW_NONZERO_LOCATIONS: 821 case MAT_NEW_NONZERO_LOCATION_ERR: 822 case MAT_NEW_NONZERO_ALLOCATION_ERR: 823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 824 break; 825 case MAT_ROW_ORIENTED: 826 a->roworiented = flg; 827 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 828 break; 829 case MAT_NEW_DIAGONALS: 830 case MAT_USE_HASH_TABLE: 831 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 832 break; 833 case MAT_IGNORE_OFF_PROC_ENTRIES: 834 a->donotstash = flg; 835 break; 836 case MAT_SYMMETRIC: 837 case MAT_STRUCTURALLY_SYMMETRIC: 838 case MAT_HERMITIAN: 839 case MAT_SYMMETRY_ETERNAL: 840 case MAT_IGNORE_LOWER_TRIANGULAR: 841 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 842 break; 843 default: 844 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 845 } 846 PetscFunctionReturn(0); 847 } 848 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatDiagonalScale_MPIDense" 852 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 853 { 854 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 855 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 856 PetscScalar *l,*r,x,*v; 857 PetscErrorCode ierr; 858 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n; 859 860 PetscFunctionBegin; 861 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 862 if (ll) { 863 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 864 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 865 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 866 for (i=0; i<m; i++) { 867 x = l[i]; 868 v = mat->v + i; 869 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 870 } 871 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 872 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 873 } 874 if (rr) { 875 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 876 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 877 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 880 for (i=0; i<n; i++) { 881 x = r[i]; 882 v = mat->v + i*m; 883 for (j=0; j<m; j++) { (*v++) *= x;} 884 } 885 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 886 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 887 } 888 PetscFunctionReturn(0); 889 } 890 891 #undef __FUNCT__ 892 #define __FUNCT__ "MatNorm_MPIDense" 893 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 894 { 895 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 896 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 897 PetscErrorCode ierr; 898 PetscInt i,j; 899 PetscReal sum = 0.0; 900 PetscScalar *v = mat->v; 901 902 PetscFunctionBegin; 903 if (mdn->size == 1) { 904 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 905 } else { 906 if (type == NORM_FROBENIUS) { 907 for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) { 908 #if defined(PETSC_USE_COMPLEX) 909 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 910 #else 911 sum += (*v)*(*v); v++; 912 #endif 913 } 914 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 915 *nrm = sqrt(*nrm); 916 ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr); 917 } else if (type == NORM_1) { 918 PetscReal *tmp,*tmp2; 919 ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 920 tmp2 = tmp + A->cmap.N; 921 ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 922 *nrm = 0.0; 923 v = mat->v; 924 for (j=0; j<mdn->A->cmap.n; j++) { 925 for (i=0; i<mdn->A->rmap.n; i++) { 926 tmp[j] += PetscAbsScalar(*v); v++; 927 } 928 } 929 ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 930 for (j=0; j<A->cmap.N; j++) { 931 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 932 } 933 ierr = PetscFree(tmp);CHKERRQ(ierr); 934 ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr); 935 } else if (type == NORM_INFINITY) { /* max row norm */ 936 PetscReal ntemp; 937 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 938 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 939 } else { 940 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 941 } 942 } 943 PetscFunctionReturn(0); 944 } 945 946 #undef __FUNCT__ 947 #define __FUNCT__ "MatTranspose_MPIDense" 948 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 949 { 950 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 951 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 952 Mat B; 953 PetscInt M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart; 954 PetscErrorCode ierr; 955 PetscInt j,i; 956 PetscScalar *v; 957 958 PetscFunctionBegin; 959 if (A == *matout && M != N) { 960 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 961 } 962 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 963 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 964 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 965 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 966 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 967 } else { 968 B = *matout; 969 } 970 971 m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v; 972 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 973 for (i=0; i<m; i++) rwork[i] = rstart + i; 974 for (j=0; j<n; j++) { 975 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 976 v += m; 977 } 978 ierr = PetscFree(rwork);CHKERRQ(ierr); 979 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 980 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 if (*matout != A) { 982 *matout = B; 983 } else { 984 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 985 } 986 PetscFunctionReturn(0); 987 } 988 989 #include "petscblaslapack.h" 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatScale_MPIDense" 992 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 993 { 994 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 995 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 996 PetscScalar oalpha = alpha; 997 PetscErrorCode ierr; 998 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N); 999 1000 PetscFunctionBegin; 1001 BLASscal_(&nz,&oalpha,a->v,&one); 1002 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 1003 PetscFunctionReturn(0); 1004 } 1005 1006 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1010 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1011 { 1012 PetscErrorCode ierr; 1013 1014 PetscFunctionBegin; 1015 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1021 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1022 { 1023 PetscErrorCode ierr; 1024 PetscInt m=A->rmap.n,n=B->cmap.n; 1025 Mat Cmat; 1026 1027 PetscFunctionBegin; 1028 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 1029 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1030 ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr); 1031 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1032 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 1033 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1034 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1035 *C = Cmat; 1036 PetscFunctionReturn(0); 1037 } 1038 1039 /* -------------------------------------------------------------------*/ 1040 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1041 MatGetRow_MPIDense, 1042 MatRestoreRow_MPIDense, 1043 MatMult_MPIDense, 1044 /* 4*/ MatMultAdd_MPIDense, 1045 MatMultTranspose_MPIDense, 1046 MatMultTransposeAdd_MPIDense, 1047 0, 1048 0, 1049 0, 1050 /*10*/ 0, 1051 0, 1052 0, 1053 0, 1054 MatTranspose_MPIDense, 1055 /*15*/ MatGetInfo_MPIDense, 1056 MatEqual_MPIDense, 1057 MatGetDiagonal_MPIDense, 1058 MatDiagonalScale_MPIDense, 1059 MatNorm_MPIDense, 1060 /*20*/ MatAssemblyBegin_MPIDense, 1061 MatAssemblyEnd_MPIDense, 1062 0, 1063 MatSetOption_MPIDense, 1064 MatZeroEntries_MPIDense, 1065 /*25*/ MatZeroRows_MPIDense, 1066 MatLUFactorSymbolic_MPIDense, 1067 0, 1068 MatCholeskyFactorSymbolic_MPIDense, 1069 0, 1070 /*30*/ MatSetUpPreallocation_MPIDense, 1071 0, 1072 0, 1073 MatGetArray_MPIDense, 1074 MatRestoreArray_MPIDense, 1075 /*35*/ MatDuplicate_MPIDense, 1076 0, 1077 0, 1078 0, 1079 0, 1080 /*40*/ 0, 1081 MatGetSubMatrices_MPIDense, 1082 0, 1083 MatGetValues_MPIDense, 1084 0, 1085 /*45*/ 0, 1086 MatScale_MPIDense, 1087 0, 1088 0, 1089 0, 1090 /*50*/ 0, 1091 0, 1092 0, 1093 0, 1094 0, 1095 /*55*/ 0, 1096 0, 1097 0, 1098 0, 1099 0, 1100 /*60*/ MatGetSubMatrix_MPIDense, 1101 MatDestroy_MPIDense, 1102 MatView_MPIDense, 1103 0, 1104 0, 1105 /*65*/ 0, 1106 0, 1107 0, 1108 0, 1109 0, 1110 /*70*/ 0, 1111 0, 1112 0, 1113 0, 1114 0, 1115 /*75*/ 0, 1116 0, 1117 0, 1118 0, 1119 0, 1120 /*80*/ 0, 1121 0, 1122 0, 1123 0, 1124 /*84*/ MatLoad_MPIDense, 1125 0, 1126 0, 1127 0, 1128 0, 1129 0, 1130 /*90*/ 0, 1131 MatMatMultSymbolic_MPIDense_MPIDense, 1132 0, 1133 0, 1134 0, 1135 /*95*/ 0, 1136 0, 1137 0, 1138 0}; 1139 1140 EXTERN_C_BEGIN 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1143 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1144 { 1145 Mat_MPIDense *a; 1146 PetscErrorCode ierr; 1147 1148 PetscFunctionBegin; 1149 mat->preallocated = PETSC_TRUE; 1150 /* Note: For now, when data is specified above, this assumes the user correctly 1151 allocates the local dense storage space. We should add error checking. */ 1152 1153 a = (Mat_MPIDense*)mat->data; 1154 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1155 ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr); 1156 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1157 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1158 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 EXTERN_C_END 1162 1163 /*MC 1164 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1165 1166 Options Database Keys: 1167 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1168 1169 Level: beginner 1170 1171 .seealso: MatCreateMPIDense 1172 M*/ 1173 1174 EXTERN_C_BEGIN 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatCreate_MPIDense" 1177 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1178 { 1179 Mat_MPIDense *a; 1180 PetscErrorCode ierr; 1181 1182 PetscFunctionBegin; 1183 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1184 mat->data = (void*)a; 1185 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1186 mat->factor = 0; 1187 mat->mapping = 0; 1188 1189 a->factor = 0; 1190 mat->insertmode = NOT_SET_VALUES; 1191 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1192 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1193 1194 mat->rmap.bs = mat->cmap.bs = 1; 1195 ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr); 1196 ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr); 1197 a->nvec = mat->cmap.n; 1198 1199 /* build cache for off array entries formed */ 1200 a->donotstash = PETSC_FALSE; 1201 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1202 1203 /* stuff used for matrix vector multiply */ 1204 a->lvec = 0; 1205 a->Mvctx = 0; 1206 a->roworiented = PETSC_TRUE; 1207 1208 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1209 "MatGetDiagonalBlock_MPIDense", 1210 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1211 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1212 "MatMPIDenseSetPreallocation_MPIDense", 1213 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1215 "MatMatMult_MPIAIJ_MPIDense", 1216 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1218 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1219 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1221 "MatMatMultNumeric_MPIAIJ_MPIDense", 1222 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1223 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 EXTERN_C_END 1227 1228 /*MC 1229 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1230 1231 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1232 and MATMPIDENSE otherwise. 1233 1234 Options Database Keys: 1235 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1236 1237 Level: beginner 1238 1239 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1240 M*/ 1241 1242 EXTERN_C_BEGIN 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatCreate_Dense" 1245 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1246 { 1247 PetscErrorCode ierr; 1248 PetscMPIInt size; 1249 1250 PetscFunctionBegin; 1251 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1252 if (size == 1) { 1253 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1254 } else { 1255 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1256 } 1257 PetscFunctionReturn(0); 1258 } 1259 EXTERN_C_END 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1263 /*@C 1264 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1265 1266 Not collective 1267 1268 Input Parameters: 1269 . A - the matrix 1270 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1271 to control all matrix memory allocation. 1272 1273 Notes: 1274 The dense format is fully compatible with standard Fortran 77 1275 storage by columns. 1276 1277 The data input variable is intended primarily for Fortran programmers 1278 who wish to allocate their own matrix memory space. Most users should 1279 set data=PETSC_NULL. 1280 1281 Level: intermediate 1282 1283 .keywords: matrix,dense, parallel 1284 1285 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1286 @*/ 1287 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1288 { 1289 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1290 1291 PetscFunctionBegin; 1292 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1293 if (f) { 1294 ierr = (*f)(mat,data);CHKERRQ(ierr); 1295 } 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatCreateMPIDense" 1301 /*@C 1302 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1303 1304 Collective on MPI_Comm 1305 1306 Input Parameters: 1307 + comm - MPI communicator 1308 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1309 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1310 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1311 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1312 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1313 to control all matrix memory allocation. 1314 1315 Output Parameter: 1316 . A - the matrix 1317 1318 Notes: 1319 The dense format is fully compatible with standard Fortran 77 1320 storage by columns. 1321 1322 The data input variable is intended primarily for Fortran programmers 1323 who wish to allocate their own matrix memory space. Most users should 1324 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1325 1326 The user MUST specify either the local or global matrix dimensions 1327 (possibly both). 1328 1329 Level: intermediate 1330 1331 .keywords: matrix,dense, parallel 1332 1333 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1334 @*/ 1335 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1336 { 1337 PetscErrorCode ierr; 1338 PetscMPIInt size; 1339 1340 PetscFunctionBegin; 1341 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1342 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1343 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1344 if (size > 1) { 1345 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1346 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1347 } else { 1348 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1349 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1350 } 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatDuplicate_MPIDense" 1356 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1357 { 1358 Mat mat; 1359 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1360 PetscErrorCode ierr; 1361 1362 PetscFunctionBegin; 1363 *newmat = 0; 1364 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1365 ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1366 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1367 a = (Mat_MPIDense*)mat->data; 1368 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1369 mat->factor = A->factor; 1370 mat->assembled = PETSC_TRUE; 1371 mat->preallocated = PETSC_TRUE; 1372 1373 mat->rmap.rstart = A->rmap.rstart; 1374 mat->rmap.rend = A->rmap.rend; 1375 a->size = oldmat->size; 1376 a->rank = oldmat->rank; 1377 mat->insertmode = NOT_SET_VALUES; 1378 a->nvec = oldmat->nvec; 1379 a->donotstash = oldmat->donotstash; 1380 1381 ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1382 ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1383 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1384 1385 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1386 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1387 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1388 *newmat = mat; 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #include "petscsys.h" 1393 1394 #undef __FUNCT__ 1395 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1396 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1397 { 1398 PetscErrorCode ierr; 1399 PetscMPIInt rank,size; 1400 PetscInt *rowners,i,m,nz,j; 1401 PetscScalar *array,*vals,*vals_ptr; 1402 MPI_Status status; 1403 1404 PetscFunctionBegin; 1405 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1406 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1407 1408 /* determine ownership of all rows */ 1409 m = M/size + ((M % size) > rank); 1410 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1411 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1412 rowners[0] = 0; 1413 for (i=2; i<=size; i++) { 1414 rowners[i] += rowners[i-1]; 1415 } 1416 1417 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1418 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1419 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1420 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1421 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1422 1423 if (!rank) { 1424 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1425 1426 /* read in my part of the matrix numerical values */ 1427 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1428 1429 /* insert into matrix-by row (this is why cannot directly read into array */ 1430 vals_ptr = vals; 1431 for (i=0; i<m; i++) { 1432 for (j=0; j<N; j++) { 1433 array[i + j*m] = *vals_ptr++; 1434 } 1435 } 1436 1437 /* read in other processors and ship out */ 1438 for (i=1; i<size; i++) { 1439 nz = (rowners[i+1] - rowners[i])*N; 1440 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1441 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1442 } 1443 } else { 1444 /* receive numeric values */ 1445 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1446 1447 /* receive message of values*/ 1448 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1449 1450 /* insert into matrix-by row (this is why cannot directly read into array */ 1451 vals_ptr = vals; 1452 for (i=0; i<m; i++) { 1453 for (j=0; j<N; j++) { 1454 array[i + j*m] = *vals_ptr++; 1455 } 1456 } 1457 } 1458 ierr = PetscFree(rowners);CHKERRQ(ierr); 1459 ierr = PetscFree(vals);CHKERRQ(ierr); 1460 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1461 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1462 PetscFunctionReturn(0); 1463 } 1464 1465 #undef __FUNCT__ 1466 #define __FUNCT__ "MatLoad_MPIDense" 1467 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1468 { 1469 Mat A; 1470 PetscScalar *vals,*svals; 1471 MPI_Comm comm = ((PetscObject)viewer)->comm; 1472 MPI_Status status; 1473 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1474 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1475 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1476 PetscInt i,nz,j,rstart,rend; 1477 int fd; 1478 PetscErrorCode ierr; 1479 1480 PetscFunctionBegin; 1481 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1482 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1483 if (!rank) { 1484 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1485 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1486 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1487 } 1488 1489 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1490 M = header[1]; N = header[2]; nz = header[3]; 1491 1492 /* 1493 Handle case where matrix is stored on disk as a dense matrix 1494 */ 1495 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1496 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 /* determine ownership of all rows */ 1501 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1502 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1503 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1504 rowners[0] = 0; 1505 for (i=2; i<=size; i++) { 1506 rowners[i] += rowners[i-1]; 1507 } 1508 rstart = rowners[rank]; 1509 rend = rowners[rank+1]; 1510 1511 /* distribute row lengths to all processors */ 1512 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1513 offlens = ourlens + (rend-rstart); 1514 if (!rank) { 1515 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1516 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1517 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1518 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1519 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1520 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1521 } else { 1522 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1523 } 1524 1525 if (!rank) { 1526 /* calculate the number of nonzeros on each processor */ 1527 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1528 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1529 for (i=0; i<size; i++) { 1530 for (j=rowners[i]; j< rowners[i+1]; j++) { 1531 procsnz[i] += rowlengths[j]; 1532 } 1533 } 1534 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1535 1536 /* determine max buffer needed and allocate it */ 1537 maxnz = 0; 1538 for (i=0; i<size; i++) { 1539 maxnz = PetscMax(maxnz,procsnz[i]); 1540 } 1541 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1542 1543 /* read in my part of the matrix column indices */ 1544 nz = procsnz[0]; 1545 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1546 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1547 1548 /* read in every one elses and ship off */ 1549 for (i=1; i<size; i++) { 1550 nz = procsnz[i]; 1551 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1552 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1553 } 1554 ierr = PetscFree(cols);CHKERRQ(ierr); 1555 } else { 1556 /* determine buffer space needed for message */ 1557 nz = 0; 1558 for (i=0; i<m; i++) { 1559 nz += ourlens[i]; 1560 } 1561 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1562 1563 /* receive message of column indices*/ 1564 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1565 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1566 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1567 } 1568 1569 /* loop over local rows, determining number of off diagonal entries */ 1570 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1571 jj = 0; 1572 for (i=0; i<m; i++) { 1573 for (j=0; j<ourlens[i]; j++) { 1574 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1575 jj++; 1576 } 1577 } 1578 1579 /* create our matrix */ 1580 for (i=0; i<m; i++) { 1581 ourlens[i] -= offlens[i]; 1582 } 1583 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1584 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1585 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1586 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1587 A = *newmat; 1588 for (i=0; i<m; i++) { 1589 ourlens[i] += offlens[i]; 1590 } 1591 1592 if (!rank) { 1593 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1594 1595 /* read in my part of the matrix numerical values */ 1596 nz = procsnz[0]; 1597 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1598 1599 /* insert into matrix */ 1600 jj = rstart; 1601 smycols = mycols; 1602 svals = vals; 1603 for (i=0; i<m; i++) { 1604 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1605 smycols += ourlens[i]; 1606 svals += ourlens[i]; 1607 jj++; 1608 } 1609 1610 /* read in other processors and ship out */ 1611 for (i=1; i<size; i++) { 1612 nz = procsnz[i]; 1613 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1614 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 1615 } 1616 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1617 } else { 1618 /* receive numeric values */ 1619 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1620 1621 /* receive message of values*/ 1622 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 1623 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1624 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1625 1626 /* insert into matrix */ 1627 jj = rstart; 1628 smycols = mycols; 1629 svals = vals; 1630 for (i=0; i<m; i++) { 1631 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1632 smycols += ourlens[i]; 1633 svals += ourlens[i]; 1634 jj++; 1635 } 1636 } 1637 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1638 ierr = PetscFree(vals);CHKERRQ(ierr); 1639 ierr = PetscFree(mycols);CHKERRQ(ierr); 1640 ierr = PetscFree(rowners);CHKERRQ(ierr); 1641 1642 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1643 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1644 PetscFunctionReturn(0); 1645 } 1646 1647 #undef __FUNCT__ 1648 #define __FUNCT__ "MatEqual_MPIDense" 1649 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 1650 { 1651 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1652 Mat a,b; 1653 PetscTruth flg; 1654 PetscErrorCode ierr; 1655 1656 PetscFunctionBegin; 1657 a = matA->A; 1658 b = matB->A; 1659 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1660 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1661 PetscFunctionReturn(0); 1662 } 1663 1664 1665 1666