1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,ierr; 17 PetscScalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 27 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 28 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 29 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 30 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 33 *iscopy = PETSC_TRUE; 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN int MatSetUpMultiply_MPIDense(Mat); 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatSetValues_MPIDense" 42 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv) 43 { 44 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 45 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 46 PetscTruth roworiented = A->roworiented; 47 48 PetscFunctionBegin; 49 for (i=0; i<m; i++) { 50 if (idxm[i] < 0) continue; 51 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 52 if (idxm[i] >= rstart && idxm[i] < rend) { 53 row = idxm[i] - rstart; 54 if (roworiented) { 55 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 56 } else { 57 for (j=0; j<n; j++) { 58 if (idxn[j] < 0) continue; 59 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 60 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 61 } 62 } 63 } else { 64 if (!A->donotstash) { 65 if (roworiented) { 66 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 67 } else { 68 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 69 } 70 } 71 } 72 } 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetValues_MPIDense" 78 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 79 { 80 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 81 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 82 83 PetscFunctionBegin; 84 for (i=0; i<m; i++) { 85 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 86 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 87 if (idxm[i] >= rstart && idxm[i] < rend) { 88 row = idxm[i] - rstart; 89 for (j=0; j<n; j++) { 90 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 91 if (idxn[j] >= mat->N) { 92 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 93 } 94 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 95 } 96 } else { 97 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 98 } 99 } 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatGetArray_MPIDense" 105 int MatGetArray_MPIDense(Mat A,PetscScalar **array) 106 { 107 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 108 int ierr; 109 110 PetscFunctionBegin; 111 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 118 { 119 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 120 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 121 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 122 PetscScalar *av,*bv,*v = lmat->v; 123 Mat newmat; 124 125 PetscFunctionBegin; 126 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 127 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 128 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 129 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 130 131 /* No parallel redistribution currently supported! Should really check each index set 132 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 133 original matrix! */ 134 135 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 137 138 /* Check submatrix call */ 139 if (scall == MAT_REUSE_MATRIX) { 140 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 141 /* Really need to test rows and column sizes! */ 142 newmat = *B; 143 } else { 144 /* Create and fill new matrix */ 145 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 146 } 147 148 /* Now extract the data pointers and do the copy, column at a time */ 149 newmatd = (Mat_MPIDense*)newmat->data; 150 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 151 152 for (i=0; i<ncols; i++) { 153 av = v + nlrows*icol[i]; 154 for (j=0; j<nrows; j++) { 155 *bv++ = av[irow[j] - rstart]; 156 } 157 } 158 159 /* Assemble the matrices so that the correct flags are set */ 160 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 161 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 162 163 /* Free work space */ 164 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 165 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 166 *B = newmat; 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatRestoreArray_MPIDense" 172 int MatRestoreArray_MPIDense(Mat A,PetscScalar **array) 173 { 174 PetscFunctionBegin; 175 PetscFunctionReturn(0); 176 } 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 181 { 182 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 183 MPI_Comm comm = mat->comm; 184 int ierr,nstash,reallocs; 185 InsertMode addv; 186 187 PetscFunctionBegin; 188 /* make sure all processors are either in INSERTMODE or ADDMODE */ 189 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 190 if (addv == (ADD_VALUES|INSERT_VALUES)) { 191 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 192 } 193 mat->insertmode = addv; /* in case this processor had no cache */ 194 195 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 196 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 197 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 198 PetscFunctionReturn(0); 199 } 200 201 #undef __FUNCT__ 202 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 204 { 205 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 206 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 207 PetscScalar *val; 208 InsertMode addv=mat->insertmode; 209 210 PetscFunctionBegin; 211 /* wait on receives */ 212 while (1) { 213 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 214 if (!flg) break; 215 216 for (i=0; i<n;) { 217 /* Now identify the consecutive vals belonging to the same row */ 218 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 219 if (j < n) ncols = j-i; 220 else ncols = n-i; 221 /* Now assemble all these values with a single function call */ 222 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 223 i = j; 224 } 225 } 226 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 227 228 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 229 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 230 231 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 232 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatZeroEntries_MPIDense" 239 int MatZeroEntries_MPIDense(Mat A) 240 { 241 int ierr; 242 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 243 244 PetscFunctionBegin; 245 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatGetBlockSize_MPIDense" 251 int MatGetBlockSize_MPIDense(Mat A,int *bs) 252 { 253 PetscFunctionBegin; 254 *bs = 1; 255 PetscFunctionReturn(0); 256 } 257 258 /* the code does not do the diagonal entries correctly unless the 259 matrix is square and the column and row owerships are identical. 260 This is a BUG. The only way to fix it seems to be to access 261 mdn->A and mdn->B directly and not through the MatZeroRows() 262 routine. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatZeroRows_MPIDense" 266 int MatZeroRows_MPIDense(Mat A,IS is,PetscScalar *diag) 267 { 268 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 269 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 270 int *procs,*nprocs,j,idx,nsends,*work; 271 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 272 int *rvalues,tag = A->tag,count,base,slen,n,*source; 273 int *lens,imdex,*lrows,*values; 274 MPI_Comm comm = A->comm; 275 MPI_Request *send_waits,*recv_waits; 276 MPI_Status recv_status,*send_status; 277 IS istmp; 278 PetscTruth found; 279 280 PetscFunctionBegin; 281 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 282 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 283 284 /* first count number of contributors to each processor */ 285 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 286 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 287 procs = nprocs + size; 288 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 289 for (i=0; i<N; i++) { 290 idx = rows[i]; 291 found = PETSC_FALSE; 292 for (j=0; j<size; j++) { 293 if (idx >= owners[j] && idx < owners[j+1]) { 294 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 295 } 296 } 297 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 298 } 299 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 300 301 /* inform other processors of number of messages and max length*/ 302 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 303 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 304 nmax = work[rank]; 305 nrecvs = work[size+rank]; 306 ierr = PetscFree(work);CHKERRQ(ierr); 307 308 /* post receives: */ 309 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 310 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 311 for (i=0; i<nrecvs; i++) { 312 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 313 } 314 315 /* do sends: 316 1) starts[i] gives the starting index in svalues for stuff going to 317 the ith processor 318 */ 319 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 320 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 321 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 322 starts[0] = 0; 323 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 324 for (i=0; i<N; i++) { 325 svalues[starts[owner[i]]++] = rows[i]; 326 } 327 ISRestoreIndices(is,&rows); 328 329 starts[0] = 0; 330 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 331 count = 0; 332 for (i=0; i<size; i++) { 333 if (procs[i]) { 334 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 335 } 336 } 337 ierr = PetscFree(starts);CHKERRQ(ierr); 338 339 base = owners[rank]; 340 341 /* wait on receives */ 342 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 343 source = lens + nrecvs; 344 count = nrecvs; slen = 0; 345 while (count) { 346 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 347 /* unpack receives into our local space */ 348 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 349 source[imdex] = recv_status.MPI_SOURCE; 350 lens[imdex] = n; 351 slen += n; 352 count--; 353 } 354 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 355 356 /* move the data into the send scatter */ 357 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 358 count = 0; 359 for (i=0; i<nrecvs; i++) { 360 values = rvalues + i*nmax; 361 for (j=0; j<lens[i]; j++) { 362 lrows[count++] = values[j] - base; 363 } 364 } 365 ierr = PetscFree(rvalues);CHKERRQ(ierr); 366 ierr = PetscFree(lens);CHKERRQ(ierr); 367 ierr = PetscFree(owner);CHKERRQ(ierr); 368 ierr = PetscFree(nprocs);CHKERRQ(ierr); 369 370 /* actually zap the local rows */ 371 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 372 PetscLogObjectParent(A,istmp); 373 ierr = PetscFree(lrows);CHKERRQ(ierr); 374 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 375 ierr = ISDestroy(istmp);CHKERRQ(ierr); 376 377 /* wait on sends */ 378 if (nsends) { 379 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 380 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 381 ierr = PetscFree(send_status);CHKERRQ(ierr); 382 } 383 ierr = PetscFree(send_waits);CHKERRQ(ierr); 384 ierr = PetscFree(svalues);CHKERRQ(ierr); 385 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatMult_MPIDense" 391 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 392 { 393 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 394 int ierr; 395 396 PetscFunctionBegin; 397 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 398 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 399 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatMultAdd_MPIDense" 405 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 406 { 407 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 408 int ierr; 409 410 PetscFunctionBegin; 411 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 412 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 413 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatMultTranspose_MPIDense" 419 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 420 { 421 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 422 int ierr; 423 PetscScalar zero = 0.0; 424 425 PetscFunctionBegin; 426 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 427 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 428 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 429 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 435 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 436 { 437 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 438 int ierr; 439 440 PetscFunctionBegin; 441 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 442 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 443 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 444 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatGetDiagonal_MPIDense" 450 int MatGetDiagonal_MPIDense(Mat A,Vec v) 451 { 452 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 453 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 454 int ierr,len,i,n,m = A->m,radd; 455 PetscScalar *x,zero = 0.0; 456 457 PetscFunctionBegin; 458 ierr = VecSet(&zero,v);CHKERRQ(ierr); 459 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 460 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 461 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 462 len = PetscMin(a->A->m,a->A->n); 463 radd = a->rstart*m; 464 for (i=0; i<len; i++) { 465 x[i] = aloc->v[radd + i*m + i]; 466 } 467 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "MatDestroy_MPIDense" 473 int MatDestroy_MPIDense(Mat mat) 474 { 475 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 476 int ierr; 477 478 PetscFunctionBegin; 479 480 #if defined(PETSC_USE_LOG) 481 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 482 #endif 483 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 484 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 485 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 486 if (mdn->lvec) VecDestroy(mdn->lvec); 487 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 488 if (mdn->factor) { 489 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 490 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 491 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 492 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 493 } 494 ierr = PetscFree(mdn);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatView_MPIDense_Binary" 500 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 501 { 502 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 503 int ierr; 504 505 PetscFunctionBegin; 506 if (mdn->size == 1) { 507 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 508 } 509 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 515 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 516 { 517 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 518 int ierr,size = mdn->size,rank = mdn->rank; 519 PetscViewerType vtype; 520 PetscTruth isascii,isdraw; 521 PetscViewer sviewer; 522 PetscViewerFormat format; 523 524 PetscFunctionBegin; 525 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 526 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 527 if (isascii) { 528 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 529 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 530 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 531 MatInfo info; 532 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 533 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 534 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 535 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 536 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 537 PetscFunctionReturn(0); 538 } else if (format == PETSC_VIEWER_ASCII_INFO) { 539 PetscFunctionReturn(0); 540 } 541 } else if (isdraw) { 542 PetscDraw draw; 543 PetscTruth isnull; 544 545 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 546 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 547 if (isnull) PetscFunctionReturn(0); 548 } 549 550 if (size == 1) { 551 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 552 } else { 553 /* assemble the entire matrix onto first processor. */ 554 Mat A; 555 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 556 PetscScalar *vals; 557 558 if (!rank) { 559 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 560 } else { 561 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 562 } 563 PetscLogObjectParent(mat,A); 564 565 /* Copy the matrix ... This isn't the most efficient means, 566 but it's quick for now */ 567 row = mdn->rstart; m = mdn->A->m; 568 for (i=0; i<m; i++) { 569 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 570 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 571 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 572 row++; 573 } 574 575 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 576 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 577 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 578 if (!rank) { 579 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 580 } 581 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 582 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 583 ierr = MatDestroy(A);CHKERRQ(ierr); 584 } 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatView_MPIDense" 590 int MatView_MPIDense(Mat mat,PetscViewer viewer) 591 { 592 int ierr; 593 PetscTruth isascii,isbinary,isdraw,issocket; 594 595 PetscFunctionBegin; 596 597 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 598 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 600 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 601 602 if (isascii || issocket || isdraw) { 603 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 604 } else if (isbinary) { 605 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 606 } else { 607 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 608 } 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatGetInfo_MPIDense" 614 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 615 { 616 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 617 Mat mdn = mat->A; 618 int ierr; 619 PetscReal isend[5],irecv[5]; 620 621 PetscFunctionBegin; 622 info->rows_global = (double)A->M; 623 info->columns_global = (double)A->N; 624 info->rows_local = (double)A->m; 625 info->columns_local = (double)A->N; 626 info->block_size = 1.0; 627 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 628 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 629 isend[3] = info->memory; isend[4] = info->mallocs; 630 if (flag == MAT_LOCAL) { 631 info->nz_used = isend[0]; 632 info->nz_allocated = isend[1]; 633 info->nz_unneeded = isend[2]; 634 info->memory = isend[3]; 635 info->mallocs = isend[4]; 636 } else if (flag == MAT_GLOBAL_MAX) { 637 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 638 info->nz_used = irecv[0]; 639 info->nz_allocated = irecv[1]; 640 info->nz_unneeded = irecv[2]; 641 info->memory = irecv[3]; 642 info->mallocs = irecv[4]; 643 } else if (flag == MAT_GLOBAL_SUM) { 644 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 645 info->nz_used = irecv[0]; 646 info->nz_allocated = irecv[1]; 647 info->nz_unneeded = irecv[2]; 648 info->memory = irecv[3]; 649 info->mallocs = irecv[4]; 650 } 651 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 652 info->fill_ratio_needed = 0; 653 info->factor_mallocs = 0; 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatSetOption_MPIDense" 659 int MatSetOption_MPIDense(Mat A,MatOption op) 660 { 661 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 662 int ierr; 663 664 PetscFunctionBegin; 665 switch (op) { 666 case MAT_NO_NEW_NONZERO_LOCATIONS: 667 case MAT_YES_NEW_NONZERO_LOCATIONS: 668 case MAT_NEW_NONZERO_LOCATION_ERR: 669 case MAT_NEW_NONZERO_ALLOCATION_ERR: 670 case MAT_COLUMNS_SORTED: 671 case MAT_COLUMNS_UNSORTED: 672 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 673 break; 674 case MAT_ROW_ORIENTED: 675 a->roworiented = PETSC_TRUE; 676 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 677 break; 678 case MAT_ROWS_SORTED: 679 case MAT_ROWS_UNSORTED: 680 case MAT_YES_NEW_DIAGONALS: 681 case MAT_USE_HASH_TABLE: 682 case MAT_USE_SINGLE_PRECISION_SOLVES: 683 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 684 break; 685 case MAT_COLUMN_ORIENTED: 686 a->roworiented = PETSC_FALSE; 687 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 688 break; 689 case MAT_IGNORE_OFF_PROC_ENTRIES: 690 a->donotstash = PETSC_TRUE; 691 break; 692 case MAT_NO_NEW_DIAGONALS: 693 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 694 default: 695 SETERRQ(PETSC_ERR_SUP,"unknown option"); 696 } 697 PetscFunctionReturn(0); 698 } 699 700 #undef __FUNCT__ 701 #define __FUNCT__ "MatGetRow_MPIDense" 702 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v) 703 { 704 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 705 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 706 707 PetscFunctionBegin; 708 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 709 lrow = row - rstart; 710 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 711 PetscFunctionReturn(0); 712 } 713 714 #undef __FUNCT__ 715 #define __FUNCT__ "MatRestoreRow_MPIDense" 716 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 717 { 718 int ierr; 719 720 PetscFunctionBegin; 721 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 722 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatDiagonalScale_MPIDense" 728 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 729 { 730 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 731 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 732 PetscScalar *l,*r,x,*v; 733 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 734 735 PetscFunctionBegin; 736 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 737 if (ll) { 738 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 739 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 740 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 741 for (i=0; i<m; i++) { 742 x = l[i]; 743 v = mat->v + i; 744 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 745 } 746 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 747 PetscLogFlops(n*m); 748 } 749 if (rr) { 750 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 751 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 752 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 753 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 754 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 755 for (i=0; i<n; i++) { 756 x = r[i]; 757 v = mat->v + i*m; 758 for (j=0; j<m; j++) { (*v++) *= x;} 759 } 760 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 761 PetscLogFlops(n*m); 762 } 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNCT__ 767 #define __FUNCT__ "MatNorm_MPIDense" 768 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 769 { 770 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 771 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 772 int ierr,i,j; 773 PetscReal sum = 0.0; 774 PetscScalar *v = mat->v; 775 776 PetscFunctionBegin; 777 if (mdn->size == 1) { 778 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 779 } else { 780 if (type == NORM_FROBENIUS) { 781 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 782 #if defined(PETSC_USE_COMPLEX) 783 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 784 #else 785 sum += (*v)*(*v); v++; 786 #endif 787 } 788 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 789 *nrm = sqrt(*nrm); 790 PetscLogFlops(2*mdn->A->n*mdn->A->m); 791 } else if (type == NORM_1) { 792 PetscReal *tmp,*tmp2; 793 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 794 tmp2 = tmp + A->N; 795 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 796 *nrm = 0.0; 797 v = mat->v; 798 for (j=0; j<mdn->A->n; j++) { 799 for (i=0; i<mdn->A->m; i++) { 800 tmp[j] += PetscAbsScalar(*v); v++; 801 } 802 } 803 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 804 for (j=0; j<A->N; j++) { 805 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 806 } 807 ierr = PetscFree(tmp);CHKERRQ(ierr); 808 PetscLogFlops(A->n*A->m); 809 } else if (type == NORM_INFINITY) { /* max row norm */ 810 PetscReal ntemp; 811 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 812 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 813 } else { 814 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 815 } 816 } 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatTranspose_MPIDense" 822 int MatTranspose_MPIDense(Mat A,Mat *matout) 823 { 824 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 825 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 826 Mat B; 827 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 828 int j,i,ierr; 829 PetscScalar *v; 830 831 PetscFunctionBegin; 832 if (!matout && M != N) { 833 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 834 } 835 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 836 837 m = a->A->m; n = a->A->n; v = Aloc->v; 838 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 839 for (j=0; j<n; j++) { 840 for (i=0; i<m; i++) rwork[i] = rstart + i; 841 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 842 v += m; 843 } 844 ierr = PetscFree(rwork);CHKERRQ(ierr); 845 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 846 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 847 if (matout) { 848 *matout = B; 849 } else { 850 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 851 } 852 PetscFunctionReturn(0); 853 } 854 855 #include "petscblaslapack.h" 856 #undef __FUNCT__ 857 #define __FUNCT__ "MatScale_MPIDense" 858 int MatScale_MPIDense(PetscScalar *alpha,Mat inA) 859 { 860 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 861 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 862 int one = 1,nz; 863 864 PetscFunctionBegin; 865 nz = inA->m*inA->N; 866 BLscal_(&nz,alpha,a->v,&one); 867 PetscLogFlops(nz); 868 PetscFunctionReturn(0); 869 } 870 871 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 872 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 876 int MatSetUpPreallocation_MPIDense(Mat A) 877 { 878 int ierr; 879 880 PetscFunctionBegin; 881 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 882 PetscFunctionReturn(0); 883 } 884 885 /* -------------------------------------------------------------------*/ 886 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 887 MatGetRow_MPIDense, 888 MatRestoreRow_MPIDense, 889 MatMult_MPIDense, 890 MatMultAdd_MPIDense, 891 MatMultTranspose_MPIDense, 892 MatMultTransposeAdd_MPIDense, 893 0, 894 0, 895 0, 896 0, 897 0, 898 0, 899 0, 900 MatTranspose_MPIDense, 901 MatGetInfo_MPIDense,0, 902 MatGetDiagonal_MPIDense, 903 MatDiagonalScale_MPIDense, 904 MatNorm_MPIDense, 905 MatAssemblyBegin_MPIDense, 906 MatAssemblyEnd_MPIDense, 907 0, 908 MatSetOption_MPIDense, 909 MatZeroEntries_MPIDense, 910 MatZeroRows_MPIDense, 911 0, 912 0, 913 0, 914 0, 915 MatSetUpPreallocation_MPIDense, 916 0, 917 0, 918 MatGetArray_MPIDense, 919 MatRestoreArray_MPIDense, 920 MatDuplicate_MPIDense, 921 0, 922 0, 923 0, 924 0, 925 0, 926 MatGetSubMatrices_MPIDense, 927 0, 928 MatGetValues_MPIDense, 929 0, 930 0, 931 MatScale_MPIDense, 932 0, 933 0, 934 0, 935 MatGetBlockSize_MPIDense, 936 0, 937 0, 938 0, 939 0, 940 0, 941 0, 942 0, 943 0, 944 0, 945 MatGetSubMatrix_MPIDense, 946 MatDestroy_MPIDense, 947 MatView_MPIDense, 948 MatGetPetscMaps_Petsc}; 949 950 EXTERN_C_BEGIN 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatCreate_MPIDense" 953 int MatCreate_MPIDense(Mat mat) 954 { 955 Mat_MPIDense *a; 956 int ierr,i; 957 958 PetscFunctionBegin; 959 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 960 mat->data = (void*)a; 961 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 962 mat->factor = 0; 963 mat->mapping = 0; 964 965 a->factor = 0; 966 mat->insertmode = NOT_SET_VALUES; 967 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 968 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 969 970 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 971 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 972 a->nvec = mat->n; 973 974 /* the information in the maps duplicates the information computed below, eventually 975 we should remove the duplicate information that is not contained in the maps */ 976 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 977 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 978 979 /* build local table of row and column ownerships */ 980 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 981 a->cowners = a->rowners + a->size + 1; 982 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 983 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 984 a->rowners[0] = 0; 985 for (i=2; i<=a->size; i++) { 986 a->rowners[i] += a->rowners[i-1]; 987 } 988 a->rstart = a->rowners[a->rank]; 989 a->rend = a->rowners[a->rank+1]; 990 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 991 a->cowners[0] = 0; 992 for (i=2; i<=a->size; i++) { 993 a->cowners[i] += a->cowners[i-1]; 994 } 995 996 /* build cache for off array entries formed */ 997 a->donotstash = PETSC_FALSE; 998 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 999 1000 /* stuff used for matrix vector multiply */ 1001 a->lvec = 0; 1002 a->Mvctx = 0; 1003 a->roworiented = PETSC_TRUE; 1004 1005 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1006 "MatGetDiagonalBlock_MPIDense", 1007 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 EXTERN_C_END 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1014 /*@C 1015 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1016 1017 Not collective 1018 1019 Input Parameters: 1020 . A - the matrix 1021 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1022 to control all matrix memory allocation. 1023 1024 Notes: 1025 The dense format is fully compatible with standard Fortran 77 1026 storage by columns. 1027 1028 The data input variable is intended primarily for Fortran programmers 1029 who wish to allocate their own matrix memory space. Most users should 1030 set data=PETSC_NULL. 1031 1032 Level: intermediate 1033 1034 .keywords: matrix,dense, parallel 1035 1036 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1037 @*/ 1038 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1039 { 1040 Mat_MPIDense *a; 1041 int ierr; 1042 PetscTruth flg2; 1043 1044 PetscFunctionBegin; 1045 ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr); 1046 if (!flg2) PetscFunctionReturn(0); 1047 mat->preallocated = PETSC_TRUE; 1048 /* Note: For now, when data is specified above, this assumes the user correctly 1049 allocates the local dense storage space. We should add error checking. */ 1050 1051 a = (Mat_MPIDense*)mat->data; 1052 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 1053 PetscLogObjectParent(mat,a->A); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatCreateMPIDense" 1059 /*@C 1060 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1061 1062 Collective on MPI_Comm 1063 1064 Input Parameters: 1065 + comm - MPI communicator 1066 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1067 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1068 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1069 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1070 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1071 to control all matrix memory allocation. 1072 1073 Output Parameter: 1074 . A - the matrix 1075 1076 Notes: 1077 The dense format is fully compatible with standard Fortran 77 1078 storage by columns. 1079 1080 The data input variable is intended primarily for Fortran programmers 1081 who wish to allocate their own matrix memory space. Most users should 1082 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1083 1084 The user MUST specify either the local or global matrix dimensions 1085 (possibly both). 1086 1087 Level: intermediate 1088 1089 .keywords: matrix,dense, parallel 1090 1091 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1092 @*/ 1093 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A) 1094 { 1095 int ierr,size; 1096 1097 PetscFunctionBegin; 1098 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1099 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1100 if (size > 1) { 1101 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1102 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1103 } else { 1104 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1105 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1106 } 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatDuplicate_MPIDense" 1112 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1113 { 1114 Mat mat; 1115 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1116 int ierr; 1117 1118 PetscFunctionBegin; 1119 *newmat = 0; 1120 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1121 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1122 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1123 mat->data = (void*)a; 1124 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1125 mat->factor = A->factor; 1126 mat->assembled = PETSC_TRUE; 1127 mat->preallocated = PETSC_TRUE; 1128 1129 a->rstart = oldmat->rstart; 1130 a->rend = oldmat->rend; 1131 a->size = oldmat->size; 1132 a->rank = oldmat->rank; 1133 mat->insertmode = NOT_SET_VALUES; 1134 a->nvec = oldmat->nvec; 1135 a->donotstash = oldmat->donotstash; 1136 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1137 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1138 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1139 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1140 1141 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1142 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1143 PetscLogObjectParent(mat,a->A); 1144 *newmat = mat; 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #include "petscsys.h" 1149 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1152 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1153 { 1154 int *rowners,i,size,rank,m,ierr,nz,j; 1155 PetscScalar *array,*vals,*vals_ptr; 1156 MPI_Status status; 1157 1158 PetscFunctionBegin; 1159 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1160 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1161 1162 /* determine ownership of all rows */ 1163 m = M/size + ((M % size) > rank); 1164 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1165 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1166 rowners[0] = 0; 1167 for (i=2; i<=size; i++) { 1168 rowners[i] += rowners[i-1]; 1169 } 1170 1171 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1172 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1173 1174 if (!rank) { 1175 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1176 1177 /* read in my part of the matrix numerical values */ 1178 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1179 1180 /* insert into matrix-by row (this is why cannot directly read into array */ 1181 vals_ptr = vals; 1182 for (i=0; i<m; i++) { 1183 for (j=0; j<N; j++) { 1184 array[i + j*m] = *vals_ptr++; 1185 } 1186 } 1187 1188 /* read in other processors and ship out */ 1189 for (i=1; i<size; i++) { 1190 nz = (rowners[i+1] - rowners[i])*N; 1191 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1192 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1193 } 1194 } else { 1195 /* receive numeric values */ 1196 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1197 1198 /* receive message of values*/ 1199 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1200 1201 /* insert into matrix-by row (this is why cannot directly read into array */ 1202 vals_ptr = vals; 1203 for (i=0; i<m; i++) { 1204 for (j=0; j<N; j++) { 1205 array[i + j*m] = *vals_ptr++; 1206 } 1207 } 1208 } 1209 ierr = PetscFree(rowners);CHKERRQ(ierr); 1210 ierr = PetscFree(vals);CHKERRQ(ierr); 1211 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1212 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 EXTERN_C_BEGIN 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatLoad_MPIDense" 1219 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat) 1220 { 1221 Mat A; 1222 PetscScalar *vals,*svals; 1223 MPI_Comm comm = ((PetscObject)viewer)->comm; 1224 MPI_Status status; 1225 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1226 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1227 int tag = ((PetscObject)viewer)->tag; 1228 int i,nz,ierr,j,rstart,rend,fd; 1229 1230 PetscFunctionBegin; 1231 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1232 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1233 if (!rank) { 1234 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1235 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1236 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1237 } 1238 1239 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1240 M = header[1]; N = header[2]; nz = header[3]; 1241 1242 /* 1243 Handle case where matrix is stored on disk as a dense matrix 1244 */ 1245 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1246 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 /* determine ownership of all rows */ 1251 m = M/size + ((M % size) > rank); 1252 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1253 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1254 rowners[0] = 0; 1255 for (i=2; i<=size; i++) { 1256 rowners[i] += rowners[i-1]; 1257 } 1258 rstart = rowners[rank]; 1259 rend = rowners[rank+1]; 1260 1261 /* distribute row lengths to all processors */ 1262 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1263 offlens = ourlens + (rend-rstart); 1264 if (!rank) { 1265 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1266 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1267 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1268 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1269 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1270 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1271 } else { 1272 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1273 } 1274 1275 if (!rank) { 1276 /* calculate the number of nonzeros on each processor */ 1277 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1278 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1279 for (i=0; i<size; i++) { 1280 for (j=rowners[i]; j< rowners[i+1]; j++) { 1281 procsnz[i] += rowlengths[j]; 1282 } 1283 } 1284 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1285 1286 /* determine max buffer needed and allocate it */ 1287 maxnz = 0; 1288 for (i=0; i<size; i++) { 1289 maxnz = PetscMax(maxnz,procsnz[i]); 1290 } 1291 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1292 1293 /* read in my part of the matrix column indices */ 1294 nz = procsnz[0]; 1295 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1296 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1297 1298 /* read in every one elses and ship off */ 1299 for (i=1; i<size; i++) { 1300 nz = procsnz[i]; 1301 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1302 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1303 } 1304 ierr = PetscFree(cols);CHKERRQ(ierr); 1305 } else { 1306 /* determine buffer space needed for message */ 1307 nz = 0; 1308 for (i=0; i<m; i++) { 1309 nz += ourlens[i]; 1310 } 1311 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1312 1313 /* receive message of column indices*/ 1314 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1315 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1316 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1317 } 1318 1319 /* loop over local rows, determining number of off diagonal entries */ 1320 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1321 jj = 0; 1322 for (i=0; i<m; i++) { 1323 for (j=0; j<ourlens[i]; j++) { 1324 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1325 jj++; 1326 } 1327 } 1328 1329 /* create our matrix */ 1330 for (i=0; i<m; i++) { 1331 ourlens[i] -= offlens[i]; 1332 } 1333 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1334 A = *newmat; 1335 for (i=0; i<m; i++) { 1336 ourlens[i] += offlens[i]; 1337 } 1338 1339 if (!rank) { 1340 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1341 1342 /* read in my part of the matrix numerical values */ 1343 nz = procsnz[0]; 1344 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1345 1346 /* insert into matrix */ 1347 jj = rstart; 1348 smycols = mycols; 1349 svals = vals; 1350 for (i=0; i<m; i++) { 1351 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1352 smycols += ourlens[i]; 1353 svals += ourlens[i]; 1354 jj++; 1355 } 1356 1357 /* read in other processors and ship out */ 1358 for (i=1; i<size; i++) { 1359 nz = procsnz[i]; 1360 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1361 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1362 } 1363 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1364 } else { 1365 /* receive numeric values */ 1366 ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1367 1368 /* receive message of values*/ 1369 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1370 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1371 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1372 1373 /* insert into matrix */ 1374 jj = rstart; 1375 smycols = mycols; 1376 svals = vals; 1377 for (i=0; i<m; i++) { 1378 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1379 smycols += ourlens[i]; 1380 svals += ourlens[i]; 1381 jj++; 1382 } 1383 } 1384 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1385 ierr = PetscFree(vals);CHKERRQ(ierr); 1386 ierr = PetscFree(mycols);CHKERRQ(ierr); 1387 ierr = PetscFree(rowners);CHKERRQ(ierr); 1388 1389 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1390 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1391 PetscFunctionReturn(0); 1392 } 1393 EXTERN_C_END 1394 1395 1396 1397 1398 1399