1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,ierr; 17 PetscScalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 27 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 28 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 29 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 30 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 33 *iscopy = PETSC_TRUE; 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN int MatSetUpMultiply_MPIDense(Mat); 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatSetValues_MPIDense" 42 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv) 43 { 44 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 45 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 46 PetscTruth roworiented = A->roworiented; 47 48 PetscFunctionBegin; 49 for (i=0; i<m; i++) { 50 if (idxm[i] < 0) continue; 51 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 52 if (idxm[i] >= rstart && idxm[i] < rend) { 53 row = idxm[i] - rstart; 54 if (roworiented) { 55 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 56 } else { 57 for (j=0; j<n; j++) { 58 if (idxn[j] < 0) continue; 59 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 60 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 61 } 62 } 63 } else { 64 if (!A->donotstash) { 65 if (roworiented) { 66 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 67 } else { 68 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 69 } 70 } 71 } 72 } 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetValues_MPIDense" 78 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 79 { 80 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 81 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 82 83 PetscFunctionBegin; 84 for (i=0; i<m; i++) { 85 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 86 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 87 if (idxm[i] >= rstart && idxm[i] < rend) { 88 row = idxm[i] - rstart; 89 for (j=0; j<n; j++) { 90 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 91 if (idxn[j] >= mat->N) { 92 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 93 } 94 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 95 } 96 } else { 97 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 98 } 99 } 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatGetArray_MPIDense" 105 int MatGetArray_MPIDense(Mat A,PetscScalar **array) 106 { 107 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 108 int ierr; 109 110 PetscFunctionBegin; 111 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 118 { 119 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 120 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 121 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 122 PetscScalar *av,*bv,*v = lmat->v; 123 Mat newmat; 124 125 PetscFunctionBegin; 126 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 127 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 128 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 129 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 130 131 /* No parallel redistribution currently supported! Should really check each index set 132 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 133 original matrix! */ 134 135 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 137 138 /* Check submatrix call */ 139 if (scall == MAT_REUSE_MATRIX) { 140 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 141 /* Really need to test rows and column sizes! */ 142 newmat = *B; 143 } else { 144 /* Create and fill new matrix */ 145 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 146 } 147 148 /* Now extract the data pointers and do the copy, column at a time */ 149 newmatd = (Mat_MPIDense*)newmat->data; 150 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 151 152 for (i=0; i<ncols; i++) { 153 av = v + nlrows*icol[i]; 154 for (j=0; j<nrows; j++) { 155 *bv++ = av[irow[j] - rstart]; 156 } 157 } 158 159 /* Assemble the matrices so that the correct flags are set */ 160 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 161 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 162 163 /* Free work space */ 164 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 165 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 166 *B = newmat; 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatRestoreArray_MPIDense" 172 int MatRestoreArray_MPIDense(Mat A,PetscScalar **array) 173 { 174 PetscFunctionBegin; 175 PetscFunctionReturn(0); 176 } 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 181 { 182 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 183 MPI_Comm comm = mat->comm; 184 int ierr,nstash,reallocs; 185 InsertMode addv; 186 187 PetscFunctionBegin; 188 /* make sure all processors are either in INSERTMODE or ADDMODE */ 189 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 190 if (addv == (ADD_VALUES|INSERT_VALUES)) { 191 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 192 } 193 mat->insertmode = addv; /* in case this processor had no cache */ 194 195 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 196 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 197 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 198 PetscFunctionReturn(0); 199 } 200 201 #undef __FUNCT__ 202 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 204 { 205 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 206 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 207 PetscScalar *val; 208 InsertMode addv=mat->insertmode; 209 210 PetscFunctionBegin; 211 /* wait on receives */ 212 while (1) { 213 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 214 if (!flg) break; 215 216 for (i=0; i<n;) { 217 /* Now identify the consecutive vals belonging to the same row */ 218 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 219 if (j < n) ncols = j-i; 220 else ncols = n-i; 221 /* Now assemble all these values with a single function call */ 222 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 223 i = j; 224 } 225 } 226 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 227 228 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 229 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 230 231 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 232 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatZeroEntries_MPIDense" 239 int MatZeroEntries_MPIDense(Mat A) 240 { 241 int ierr; 242 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 243 244 PetscFunctionBegin; 245 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatGetBlockSize_MPIDense" 251 int MatGetBlockSize_MPIDense(Mat A,int *bs) 252 { 253 PetscFunctionBegin; 254 *bs = 1; 255 PetscFunctionReturn(0); 256 } 257 258 /* the code does not do the diagonal entries correctly unless the 259 matrix is square and the column and row owerships are identical. 260 This is a BUG. The only way to fix it seems to be to access 261 mdn->A and mdn->B directly and not through the MatZeroRows() 262 routine. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatZeroRows_MPIDense" 266 int MatZeroRows_MPIDense(Mat A,IS is,PetscScalar *diag) 267 { 268 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 269 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 270 int *procs,*nprocs,j,idx,nsends,*work; 271 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 272 int *rvalues,tag = A->tag,count,base,slen,n,*source; 273 int *lens,imdex,*lrows,*values; 274 MPI_Comm comm = A->comm; 275 MPI_Request *send_waits,*recv_waits; 276 MPI_Status recv_status,*send_status; 277 IS istmp; 278 PetscTruth found; 279 280 PetscFunctionBegin; 281 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 282 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 283 284 /* first count number of contributors to each processor */ 285 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 286 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 287 procs = nprocs + size; 288 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 289 for (i=0; i<N; i++) { 290 idx = rows[i]; 291 found = PETSC_FALSE; 292 for (j=0; j<size; j++) { 293 if (idx >= owners[j] && idx < owners[j+1]) { 294 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 295 } 296 } 297 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 298 } 299 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 300 301 /* inform other processors of number of messages and max length*/ 302 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 303 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 304 nmax = work[rank]; 305 nrecvs = work[size+rank]; 306 ierr = PetscFree(work);CHKERRQ(ierr); 307 308 /* post receives: */ 309 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 310 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 311 for (i=0; i<nrecvs; i++) { 312 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 313 } 314 315 /* do sends: 316 1) starts[i] gives the starting index in svalues for stuff going to 317 the ith processor 318 */ 319 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 320 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 321 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 322 starts[0] = 0; 323 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 324 for (i=0; i<N; i++) { 325 svalues[starts[owner[i]]++] = rows[i]; 326 } 327 ISRestoreIndices(is,&rows); 328 329 starts[0] = 0; 330 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 331 count = 0; 332 for (i=0; i<size; i++) { 333 if (procs[i]) { 334 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 335 } 336 } 337 ierr = PetscFree(starts);CHKERRQ(ierr); 338 339 base = owners[rank]; 340 341 /* wait on receives */ 342 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 343 source = lens + nrecvs; 344 count = nrecvs; slen = 0; 345 while (count) { 346 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 347 /* unpack receives into our local space */ 348 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 349 source[imdex] = recv_status.MPI_SOURCE; 350 lens[imdex] = n; 351 slen += n; 352 count--; 353 } 354 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 355 356 /* move the data into the send scatter */ 357 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 358 count = 0; 359 for (i=0; i<nrecvs; i++) { 360 values = rvalues + i*nmax; 361 for (j=0; j<lens[i]; j++) { 362 lrows[count++] = values[j] - base; 363 } 364 } 365 ierr = PetscFree(rvalues);CHKERRQ(ierr); 366 ierr = PetscFree(lens);CHKERRQ(ierr); 367 ierr = PetscFree(owner);CHKERRQ(ierr); 368 ierr = PetscFree(nprocs);CHKERRQ(ierr); 369 370 /* actually zap the local rows */ 371 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 372 PetscLogObjectParent(A,istmp); 373 ierr = PetscFree(lrows);CHKERRQ(ierr); 374 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 375 ierr = ISDestroy(istmp);CHKERRQ(ierr); 376 377 /* wait on sends */ 378 if (nsends) { 379 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 380 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 381 ierr = PetscFree(send_status);CHKERRQ(ierr); 382 } 383 ierr = PetscFree(send_waits);CHKERRQ(ierr); 384 ierr = PetscFree(svalues);CHKERRQ(ierr); 385 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatMult_MPIDense" 391 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 392 { 393 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 394 int ierr; 395 396 PetscFunctionBegin; 397 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 398 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 399 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatMultAdd_MPIDense" 405 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 406 { 407 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 408 int ierr; 409 410 PetscFunctionBegin; 411 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 412 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 413 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatMultTranspose_MPIDense" 419 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 420 { 421 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 422 int ierr; 423 PetscScalar zero = 0.0; 424 425 PetscFunctionBegin; 426 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 427 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 428 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 429 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 435 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 436 { 437 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 438 int ierr; 439 440 PetscFunctionBegin; 441 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 442 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 443 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 444 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatGetDiagonal_MPIDense" 450 int MatGetDiagonal_MPIDense(Mat A,Vec v) 451 { 452 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 453 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 454 int ierr,len,i,n,m = A->m,radd; 455 PetscScalar *x,zero = 0.0; 456 457 PetscFunctionBegin; 458 ierr = VecSet(&zero,v);CHKERRQ(ierr); 459 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 460 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 461 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 462 len = PetscMin(a->A->m,a->A->n); 463 radd = a->rstart*m; 464 for (i=0; i<len; i++) { 465 x[i] = aloc->v[radd + i*m + i]; 466 } 467 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "MatDestroy_MPIDense" 473 int MatDestroy_MPIDense(Mat mat) 474 { 475 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 476 int ierr; 477 478 PetscFunctionBegin; 479 480 #if defined(PETSC_USE_LOG) 481 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 482 #endif 483 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 484 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 485 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 486 if (mdn->lvec) VecDestroy(mdn->lvec); 487 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 488 if (mdn->factor) { 489 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 490 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 491 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 492 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 493 } 494 ierr = PetscFree(mdn);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatView_MPIDense_Binary" 500 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 501 { 502 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 503 int ierr; 504 505 PetscFunctionBegin; 506 if (mdn->size == 1) { 507 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 508 } 509 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 515 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 516 { 517 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 518 int ierr,size = mdn->size,rank = mdn->rank; 519 PetscViewerType vtype; 520 PetscTruth isascii,isdraw; 521 PetscViewer sviewer; 522 PetscViewerFormat format; 523 524 PetscFunctionBegin; 525 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 526 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 527 if (isascii) { 528 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 529 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 530 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 531 MatInfo info; 532 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 533 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 534 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 535 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 536 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 537 PetscFunctionReturn(0); 538 } else if (format == PETSC_VIEWER_ASCII_INFO) { 539 PetscFunctionReturn(0); 540 } 541 } else if (isdraw) { 542 PetscDraw draw; 543 PetscTruth isnull; 544 545 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 546 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 547 if (isnull) PetscFunctionReturn(0); 548 } 549 550 if (size == 1) { 551 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 552 } else { 553 /* assemble the entire matrix onto first processor. */ 554 Mat A; 555 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 556 PetscScalar *vals; 557 558 if (!rank) { 559 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 560 } else { 561 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 562 } 563 PetscLogObjectParent(mat,A); 564 565 /* Copy the matrix ... This isn't the most efficient means, 566 but it's quick for now */ 567 row = mdn->rstart; m = mdn->A->m; 568 for (i=0; i<m; i++) { 569 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 570 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 571 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 572 row++; 573 } 574 575 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 576 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 577 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 578 if (!rank) { 579 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 580 } 581 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 582 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 583 ierr = MatDestroy(A);CHKERRQ(ierr); 584 } 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatView_MPIDense" 590 int MatView_MPIDense(Mat mat,PetscViewer viewer) 591 { 592 int ierr; 593 PetscTruth isascii,isbinary,isdraw,issocket; 594 595 PetscFunctionBegin; 596 597 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 598 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 600 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 601 602 if (isascii || issocket || isdraw) { 603 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 604 } else if (isbinary) { 605 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 606 } else { 607 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 608 } 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatGetInfo_MPIDense" 614 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 615 { 616 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 617 Mat mdn = mat->A; 618 int ierr; 619 PetscReal isend[5],irecv[5]; 620 621 PetscFunctionBegin; 622 info->rows_global = (double)A->M; 623 info->columns_global = (double)A->N; 624 info->rows_local = (double)A->m; 625 info->columns_local = (double)A->N; 626 info->block_size = 1.0; 627 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 628 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 629 isend[3] = info->memory; isend[4] = info->mallocs; 630 if (flag == MAT_LOCAL) { 631 info->nz_used = isend[0]; 632 info->nz_allocated = isend[1]; 633 info->nz_unneeded = isend[2]; 634 info->memory = isend[3]; 635 info->mallocs = isend[4]; 636 } else if (flag == MAT_GLOBAL_MAX) { 637 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 638 info->nz_used = irecv[0]; 639 info->nz_allocated = irecv[1]; 640 info->nz_unneeded = irecv[2]; 641 info->memory = irecv[3]; 642 info->mallocs = irecv[4]; 643 } else if (flag == MAT_GLOBAL_SUM) { 644 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 645 info->nz_used = irecv[0]; 646 info->nz_allocated = irecv[1]; 647 info->nz_unneeded = irecv[2]; 648 info->memory = irecv[3]; 649 info->mallocs = irecv[4]; 650 } 651 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 652 info->fill_ratio_needed = 0; 653 info->factor_mallocs = 0; 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatSetOption_MPIDense" 659 int MatSetOption_MPIDense(Mat A,MatOption op) 660 { 661 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 662 int ierr; 663 664 PetscFunctionBegin; 665 switch (op) { 666 case MAT_NO_NEW_NONZERO_LOCATIONS: 667 case MAT_YES_NEW_NONZERO_LOCATIONS: 668 case MAT_NEW_NONZERO_LOCATION_ERR: 669 case MAT_NEW_NONZERO_ALLOCATION_ERR: 670 case MAT_COLUMNS_SORTED: 671 case MAT_COLUMNS_UNSORTED: 672 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 673 break; 674 case MAT_ROW_ORIENTED: 675 a->roworiented = PETSC_TRUE; 676 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 677 break; 678 case MAT_ROWS_SORTED: 679 case MAT_ROWS_UNSORTED: 680 case MAT_YES_NEW_DIAGONALS: 681 case MAT_USE_HASH_TABLE: 682 case MAT_USE_SINGLE_PRECISION_SOLVES: 683 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 684 break; 685 case MAT_COLUMN_ORIENTED: 686 a->roworiented = PETSC_FALSE; 687 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 688 break; 689 case MAT_IGNORE_OFF_PROC_ENTRIES: 690 a->donotstash = PETSC_TRUE; 691 break; 692 case MAT_NO_NEW_DIAGONALS: 693 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 694 break; 695 default: 696 SETERRQ(PETSC_ERR_SUP,"unknown option"); 697 break; 698 } 699 PetscFunctionReturn(0); 700 } 701 702 #undef __FUNCT__ 703 #define __FUNCT__ "MatGetRow_MPIDense" 704 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v) 705 { 706 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 707 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 708 709 PetscFunctionBegin; 710 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 711 lrow = row - rstart; 712 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatRestoreRow_MPIDense" 718 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 719 { 720 int ierr; 721 722 PetscFunctionBegin; 723 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 724 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatDiagonalScale_MPIDense" 730 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 731 { 732 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 733 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 734 PetscScalar *l,*r,x,*v; 735 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 736 737 PetscFunctionBegin; 738 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 739 if (ll) { 740 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 741 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 742 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 743 for (i=0; i<m; i++) { 744 x = l[i]; 745 v = mat->v + i; 746 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 747 } 748 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 749 PetscLogFlops(n*m); 750 } 751 if (rr) { 752 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 753 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 754 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 755 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 756 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 757 for (i=0; i<n; i++) { 758 x = r[i]; 759 v = mat->v + i*m; 760 for (j=0; j<m; j++) { (*v++) *= x;} 761 } 762 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 763 PetscLogFlops(n*m); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatNorm_MPIDense" 770 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm) 771 { 772 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 773 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 774 int ierr,i,j; 775 PetscReal sum = 0.0; 776 PetscScalar *v = mat->v; 777 778 PetscFunctionBegin; 779 if (mdn->size == 1) { 780 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 781 } else { 782 if (type == NORM_FROBENIUS) { 783 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 784 #if defined(PETSC_USE_COMPLEX) 785 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 786 #else 787 sum += (*v)*(*v); v++; 788 #endif 789 } 790 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 791 *norm = sqrt(*norm); 792 PetscLogFlops(2*mdn->A->n*mdn->A->m); 793 } else if (type == NORM_1) { 794 PetscReal *tmp,*tmp2; 795 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 796 tmp2 = tmp + A->N; 797 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 798 *norm = 0.0; 799 v = mat->v; 800 for (j=0; j<mdn->A->n; j++) { 801 for (i=0; i<mdn->A->m; i++) { 802 tmp[j] += PetscAbsScalar(*v); v++; 803 } 804 } 805 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 806 for (j=0; j<A->N; j++) { 807 if (tmp2[j] > *norm) *norm = tmp2[j]; 808 } 809 ierr = PetscFree(tmp);CHKERRQ(ierr); 810 PetscLogFlops(A->n*A->m); 811 } else if (type == NORM_INFINITY) { /* max row norm */ 812 PetscReal ntemp; 813 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 814 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 815 } else { 816 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 817 } 818 } 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatTranspose_MPIDense" 824 int MatTranspose_MPIDense(Mat A,Mat *matout) 825 { 826 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 827 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 828 Mat B; 829 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 830 int j,i,ierr; 831 PetscScalar *v; 832 833 PetscFunctionBegin; 834 if (!matout && M != N) { 835 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 836 } 837 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 838 839 m = a->A->m; n = a->A->n; v = Aloc->v; 840 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 841 for (j=0; j<n; j++) { 842 for (i=0; i<m; i++) rwork[i] = rstart + i; 843 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 844 v += m; 845 } 846 ierr = PetscFree(rwork);CHKERRQ(ierr); 847 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 848 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 849 if (matout) { 850 *matout = B; 851 } else { 852 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 853 } 854 PetscFunctionReturn(0); 855 } 856 857 #include "petscblaslapack.h" 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatScale_MPIDense" 860 int MatScale_MPIDense(PetscScalar *alpha,Mat inA) 861 { 862 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 863 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 864 int one = 1,nz; 865 866 PetscFunctionBegin; 867 nz = inA->m*inA->N; 868 BLscal_(&nz,alpha,a->v,&one); 869 PetscLogFlops(nz); 870 PetscFunctionReturn(0); 871 } 872 873 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 874 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 878 int MatSetUpPreallocation_MPIDense(Mat A) 879 { 880 int ierr; 881 882 PetscFunctionBegin; 883 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 884 PetscFunctionReturn(0); 885 } 886 887 /* -------------------------------------------------------------------*/ 888 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 889 MatGetRow_MPIDense, 890 MatRestoreRow_MPIDense, 891 MatMult_MPIDense, 892 MatMultAdd_MPIDense, 893 MatMultTranspose_MPIDense, 894 MatMultTransposeAdd_MPIDense, 895 0, 896 0, 897 0, 898 0, 899 0, 900 0, 901 0, 902 MatTranspose_MPIDense, 903 MatGetInfo_MPIDense,0, 904 MatGetDiagonal_MPIDense, 905 MatDiagonalScale_MPIDense, 906 MatNorm_MPIDense, 907 MatAssemblyBegin_MPIDense, 908 MatAssemblyEnd_MPIDense, 909 0, 910 MatSetOption_MPIDense, 911 MatZeroEntries_MPIDense, 912 MatZeroRows_MPIDense, 913 0, 914 0, 915 0, 916 0, 917 MatSetUpPreallocation_MPIDense, 918 0, 919 0, 920 0, 921 MatGetArray_MPIDense, 922 MatRestoreArray_MPIDense, 923 MatDuplicate_MPIDense, 924 0, 925 0, 926 0, 927 0, 928 0, 929 MatGetSubMatrices_MPIDense, 930 0, 931 MatGetValues_MPIDense, 932 0, 933 0, 934 MatScale_MPIDense, 935 0, 936 0, 937 0, 938 MatGetBlockSize_MPIDense, 939 0, 940 0, 941 0, 942 0, 943 0, 944 0, 945 0, 946 0, 947 0, 948 MatGetSubMatrix_MPIDense, 949 MatDestroy_MPIDense, 950 MatView_MPIDense, 951 MatGetPetscMaps_Petsc}; 952 953 EXTERN_C_BEGIN 954 #undef __FUNCT__ 955 #define __FUNCT__ "MatCreate_MPIDense" 956 int MatCreate_MPIDense(Mat mat) 957 { 958 Mat_MPIDense *a; 959 int ierr,i; 960 961 PetscFunctionBegin; 962 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 963 mat->data = (void*)a; 964 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 965 mat->factor = 0; 966 mat->mapping = 0; 967 968 a->factor = 0; 969 mat->insertmode = NOT_SET_VALUES; 970 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 971 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 972 973 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 974 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 975 a->nvec = mat->n; 976 977 /* the information in the maps duplicates the information computed below, eventually 978 we should remove the duplicate information that is not contained in the maps */ 979 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 980 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 981 982 /* build local table of row and column ownerships */ 983 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 984 a->cowners = a->rowners + a->size + 1; 985 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 986 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 987 a->rowners[0] = 0; 988 for (i=2; i<=a->size; i++) { 989 a->rowners[i] += a->rowners[i-1]; 990 } 991 a->rstart = a->rowners[a->rank]; 992 a->rend = a->rowners[a->rank+1]; 993 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 994 a->cowners[0] = 0; 995 for (i=2; i<=a->size; i++) { 996 a->cowners[i] += a->cowners[i-1]; 997 } 998 999 /* build cache for off array entries formed */ 1000 a->donotstash = PETSC_FALSE; 1001 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1002 1003 /* stuff used for matrix vector multiply */ 1004 a->lvec = 0; 1005 a->Mvctx = 0; 1006 a->roworiented = PETSC_TRUE; 1007 1008 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1009 "MatGetDiagonalBlock_MPIDense", 1010 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 EXTERN_C_END 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1017 /*@C 1018 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1019 1020 Not collective 1021 1022 Input Parameters: 1023 . A - the matrix 1024 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1025 to control all matrix memory allocation. 1026 1027 Notes: 1028 The dense format is fully compatible with standard Fortran 77 1029 storage by columns. 1030 1031 The data input variable is intended primarily for Fortran programmers 1032 who wish to allocate their own matrix memory space. Most users should 1033 set data=PETSC_NULL. 1034 1035 Level: intermediate 1036 1037 .keywords: matrix,dense, parallel 1038 1039 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1040 @*/ 1041 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1042 { 1043 Mat_MPIDense *a; 1044 int ierr; 1045 PetscTruth flg2; 1046 1047 PetscFunctionBegin; 1048 ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr); 1049 if (!flg2) PetscFunctionReturn(0); 1050 mat->preallocated = PETSC_TRUE; 1051 /* Note: For now, when data is specified above, this assumes the user correctly 1052 allocates the local dense storage space. We should add error checking. */ 1053 1054 a = (Mat_MPIDense*)mat->data; 1055 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 1056 PetscLogObjectParent(mat,a->A); 1057 PetscFunctionReturn(0); 1058 } 1059 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatCreateMPIDense" 1062 /*@C 1063 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1064 1065 Collective on MPI_Comm 1066 1067 Input Parameters: 1068 + comm - MPI communicator 1069 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1070 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1071 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1072 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1073 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1074 to control all matrix memory allocation. 1075 1076 Output Parameter: 1077 . A - the matrix 1078 1079 Notes: 1080 The dense format is fully compatible with standard Fortran 77 1081 storage by columns. 1082 1083 The data input variable is intended primarily for Fortran programmers 1084 who wish to allocate their own matrix memory space. Most users should 1085 set data=PETSC_NULL. 1086 1087 The user MUST specify either the local or global matrix dimensions 1088 (possibly both). 1089 1090 Level: intermediate 1091 1092 .keywords: matrix,dense, parallel 1093 1094 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1095 @*/ 1096 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A) 1097 { 1098 int ierr,size; 1099 1100 PetscFunctionBegin; 1101 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1102 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1103 if (size > 1) { 1104 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1105 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1106 } else { 1107 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1108 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1109 } 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatDuplicate_MPIDense" 1115 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1116 { 1117 Mat mat; 1118 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1119 int ierr; 1120 1121 PetscFunctionBegin; 1122 *newmat = 0; 1123 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1124 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1125 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1126 mat->data = (void*)a; 1127 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1128 mat->factor = A->factor; 1129 mat->assembled = PETSC_TRUE; 1130 mat->preallocated = PETSC_TRUE; 1131 1132 a->rstart = oldmat->rstart; 1133 a->rend = oldmat->rend; 1134 a->size = oldmat->size; 1135 a->rank = oldmat->rank; 1136 mat->insertmode = NOT_SET_VALUES; 1137 a->nvec = oldmat->nvec; 1138 a->donotstash = oldmat->donotstash; 1139 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1140 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1141 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1142 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1143 1144 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1145 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1146 PetscLogObjectParent(mat,a->A); 1147 *newmat = mat; 1148 PetscFunctionReturn(0); 1149 } 1150 1151 #include "petscsys.h" 1152 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1155 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1156 { 1157 int *rowners,i,size,rank,m,ierr,nz,j; 1158 PetscScalar *array,*vals,*vals_ptr; 1159 MPI_Status status; 1160 1161 PetscFunctionBegin; 1162 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1163 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1164 1165 /* determine ownership of all rows */ 1166 m = M/size + ((M % size) > rank); 1167 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1168 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1169 rowners[0] = 0; 1170 for (i=2; i<=size; i++) { 1171 rowners[i] += rowners[i-1]; 1172 } 1173 1174 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1175 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1176 1177 if (!rank) { 1178 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1179 1180 /* read in my part of the matrix numerical values */ 1181 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1182 1183 /* insert into matrix-by row (this is why cannot directly read into array */ 1184 vals_ptr = vals; 1185 for (i=0; i<m; i++) { 1186 for (j=0; j<N; j++) { 1187 array[i + j*m] = *vals_ptr++; 1188 } 1189 } 1190 1191 /* read in other processors and ship out */ 1192 for (i=1; i<size; i++) { 1193 nz = (rowners[i+1] - rowners[i])*N; 1194 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1195 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1196 } 1197 } else { 1198 /* receive numeric values */ 1199 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1200 1201 /* receive message of values*/ 1202 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1203 1204 /* insert into matrix-by row (this is why cannot directly read into array */ 1205 vals_ptr = vals; 1206 for (i=0; i<m; i++) { 1207 for (j=0; j<N; j++) { 1208 array[i + j*m] = *vals_ptr++; 1209 } 1210 } 1211 } 1212 ierr = PetscFree(rowners);CHKERRQ(ierr); 1213 ierr = PetscFree(vals);CHKERRQ(ierr); 1214 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1215 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1216 PetscFunctionReturn(0); 1217 } 1218 1219 EXTERN_C_BEGIN 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "MatLoad_MPIDense" 1222 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat) 1223 { 1224 Mat A; 1225 PetscScalar *vals,*svals; 1226 MPI_Comm comm = ((PetscObject)viewer)->comm; 1227 MPI_Status status; 1228 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1229 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1230 int tag = ((PetscObject)viewer)->tag; 1231 int i,nz,ierr,j,rstart,rend,fd; 1232 1233 PetscFunctionBegin; 1234 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1235 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1236 if (!rank) { 1237 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1238 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1239 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1240 } 1241 1242 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1243 M = header[1]; N = header[2]; nz = header[3]; 1244 1245 /* 1246 Handle case where matrix is stored on disk as a dense matrix 1247 */ 1248 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1249 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 /* determine ownership of all rows */ 1254 m = M/size + ((M % size) > rank); 1255 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1256 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1257 rowners[0] = 0; 1258 for (i=2; i<=size; i++) { 1259 rowners[i] += rowners[i-1]; 1260 } 1261 rstart = rowners[rank]; 1262 rend = rowners[rank+1]; 1263 1264 /* distribute row lengths to all processors */ 1265 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1266 offlens = ourlens + (rend-rstart); 1267 if (!rank) { 1268 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1269 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1270 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1271 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1272 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1273 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1274 } else { 1275 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1276 } 1277 1278 if (!rank) { 1279 /* calculate the number of nonzeros on each processor */ 1280 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1281 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1282 for (i=0; i<size; i++) { 1283 for (j=rowners[i]; j< rowners[i+1]; j++) { 1284 procsnz[i] += rowlengths[j]; 1285 } 1286 } 1287 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1288 1289 /* determine max buffer needed and allocate it */ 1290 maxnz = 0; 1291 for (i=0; i<size; i++) { 1292 maxnz = PetscMax(maxnz,procsnz[i]); 1293 } 1294 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1295 1296 /* read in my part of the matrix column indices */ 1297 nz = procsnz[0]; 1298 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1299 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1300 1301 /* read in every one elses and ship off */ 1302 for (i=1; i<size; i++) { 1303 nz = procsnz[i]; 1304 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1305 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1306 } 1307 ierr = PetscFree(cols);CHKERRQ(ierr); 1308 } else { 1309 /* determine buffer space needed for message */ 1310 nz = 0; 1311 for (i=0; i<m; i++) { 1312 nz += ourlens[i]; 1313 } 1314 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1315 1316 /* receive message of column indices*/ 1317 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1318 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1319 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1320 } 1321 1322 /* loop over local rows, determining number of off diagonal entries */ 1323 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1324 jj = 0; 1325 for (i=0; i<m; i++) { 1326 for (j=0; j<ourlens[i]; j++) { 1327 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1328 jj++; 1329 } 1330 } 1331 1332 /* create our matrix */ 1333 for (i=0; i<m; i++) { 1334 ourlens[i] -= offlens[i]; 1335 } 1336 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1337 A = *newmat; 1338 for (i=0; i<m; i++) { 1339 ourlens[i] += offlens[i]; 1340 } 1341 1342 if (!rank) { 1343 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1344 1345 /* read in my part of the matrix numerical values */ 1346 nz = procsnz[0]; 1347 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1348 1349 /* insert into matrix */ 1350 jj = rstart; 1351 smycols = mycols; 1352 svals = vals; 1353 for (i=0; i<m; i++) { 1354 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1355 smycols += ourlens[i]; 1356 svals += ourlens[i]; 1357 jj++; 1358 } 1359 1360 /* read in other processors and ship out */ 1361 for (i=1; i<size; i++) { 1362 nz = procsnz[i]; 1363 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1364 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1365 } 1366 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1367 } else { 1368 /* receive numeric values */ 1369 ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1370 1371 /* receive message of values*/ 1372 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1373 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1374 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1375 1376 /* insert into matrix */ 1377 jj = rstart; 1378 smycols = mycols; 1379 svals = vals; 1380 for (i=0; i<m; i++) { 1381 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1382 smycols += ourlens[i]; 1383 svals += ourlens[i]; 1384 jj++; 1385 } 1386 } 1387 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1388 ierr = PetscFree(vals);CHKERRQ(ierr); 1389 ierr = PetscFree(mycols);CHKERRQ(ierr); 1390 ierr = PetscFree(rowners);CHKERRQ(ierr); 1391 1392 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1393 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1394 PetscFunctionReturn(0); 1395 } 1396 EXTERN_C_END 1397 1398 1399 1400 1401 1402