1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,ierr; 17 PetscScalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 27 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 28 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 29 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 30 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 33 *iscopy = PETSC_TRUE; 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN int MatSetUpMultiply_MPIDense(Mat); 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatSetValues_MPIDense" 42 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv) 43 { 44 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 45 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 46 PetscTruth roworiented = A->roworiented; 47 48 PetscFunctionBegin; 49 for (i=0; i<m; i++) { 50 if (idxm[i] < 0) continue; 51 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 52 if (idxm[i] >= rstart && idxm[i] < rend) { 53 row = idxm[i] - rstart; 54 if (roworiented) { 55 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 56 } else { 57 for (j=0; j<n; j++) { 58 if (idxn[j] < 0) continue; 59 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 60 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 61 } 62 } 63 } else { 64 if (!A->donotstash) { 65 if (roworiented) { 66 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 67 } else { 68 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 69 } 70 } 71 } 72 } 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetValues_MPIDense" 78 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 79 { 80 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 81 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 82 83 PetscFunctionBegin; 84 for (i=0; i<m; i++) { 85 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 86 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 87 if (idxm[i] >= rstart && idxm[i] < rend) { 88 row = idxm[i] - rstart; 89 for (j=0; j<n; j++) { 90 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 91 if (idxn[j] >= mat->N) { 92 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 93 } 94 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 95 } 96 } else { 97 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 98 } 99 } 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatGetArray_MPIDense" 105 int MatGetArray_MPIDense(Mat A,PetscScalar **array) 106 { 107 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 108 int ierr; 109 110 PetscFunctionBegin; 111 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 118 { 119 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 120 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 121 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 122 PetscScalar *av,*bv,*v = lmat->v; 123 Mat newmat; 124 125 PetscFunctionBegin; 126 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 127 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 128 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 129 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 130 131 /* No parallel redistribution currently supported! Should really check each index set 132 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 133 original matrix! */ 134 135 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 137 138 /* Check submatrix call */ 139 if (scall == MAT_REUSE_MATRIX) { 140 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 141 /* Really need to test rows and column sizes! */ 142 newmat = *B; 143 } else { 144 /* Create and fill new matrix */ 145 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 146 } 147 148 /* Now extract the data pointers and do the copy, column at a time */ 149 newmatd = (Mat_MPIDense*)newmat->data; 150 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 151 152 for (i=0; i<ncols; i++) { 153 av = v + nlrows*icol[i]; 154 for (j=0; j<nrows; j++) { 155 *bv++ = av[irow[j] - rstart]; 156 } 157 } 158 159 /* Assemble the matrices so that the correct flags are set */ 160 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 161 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 162 163 /* Free work space */ 164 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 165 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 166 *B = newmat; 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatRestoreArray_MPIDense" 172 int MatRestoreArray_MPIDense(Mat A,PetscScalar **array) 173 { 174 PetscFunctionBegin; 175 PetscFunctionReturn(0); 176 } 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 181 { 182 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 183 MPI_Comm comm = mat->comm; 184 int ierr,nstash,reallocs; 185 InsertMode addv; 186 187 PetscFunctionBegin; 188 /* make sure all processors are either in INSERTMODE or ADDMODE */ 189 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 190 if (addv == (ADD_VALUES|INSERT_VALUES)) { 191 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 192 } 193 mat->insertmode = addv; /* in case this processor had no cache */ 194 195 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 196 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 197 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 198 PetscFunctionReturn(0); 199 } 200 201 #undef __FUNCT__ 202 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 204 { 205 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 206 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 207 PetscScalar *val; 208 InsertMode addv=mat->insertmode; 209 210 PetscFunctionBegin; 211 /* wait on receives */ 212 while (1) { 213 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 214 if (!flg) break; 215 216 for (i=0; i<n;) { 217 /* Now identify the consecutive vals belonging to the same row */ 218 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 219 if (j < n) ncols = j-i; 220 else ncols = n-i; 221 /* Now assemble all these values with a single function call */ 222 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 223 i = j; 224 } 225 } 226 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 227 228 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 229 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 230 231 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 232 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatZeroEntries_MPIDense" 239 int MatZeroEntries_MPIDense(Mat A) 240 { 241 int ierr; 242 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 243 244 PetscFunctionBegin; 245 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatGetBlockSize_MPIDense" 251 int MatGetBlockSize_MPIDense(Mat A,int *bs) 252 { 253 PetscFunctionBegin; 254 *bs = 1; 255 PetscFunctionReturn(0); 256 } 257 258 /* the code does not do the diagonal entries correctly unless the 259 matrix is square and the column and row owerships are identical. 260 This is a BUG. The only way to fix it seems to be to access 261 mdn->A and mdn->B directly and not through the MatZeroRows() 262 routine. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatZeroRows_MPIDense" 266 int MatZeroRows_MPIDense(Mat A,IS is,PetscScalar *diag) 267 { 268 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 269 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 270 int *procs,*nprocs,j,idx,nsends,*work; 271 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 272 int *rvalues,tag = A->tag,count,base,slen,n,*source; 273 int *lens,imdex,*lrows,*values; 274 MPI_Comm comm = A->comm; 275 MPI_Request *send_waits,*recv_waits; 276 MPI_Status recv_status,*send_status; 277 IS istmp; 278 PetscTruth found; 279 280 PetscFunctionBegin; 281 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 282 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 283 284 /* first count number of contributors to each processor */ 285 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 286 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 287 procs = nprocs + size; 288 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 289 for (i=0; i<N; i++) { 290 idx = rows[i]; 291 found = PETSC_FALSE; 292 for (j=0; j<size; j++) { 293 if (idx >= owners[j] && idx < owners[j+1]) { 294 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 295 } 296 } 297 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 298 } 299 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 300 301 /* inform other processors of number of messages and max length*/ 302 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 303 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 304 nmax = work[rank]; 305 nrecvs = work[size+rank]; 306 ierr = PetscFree(work);CHKERRQ(ierr); 307 308 /* post receives: */ 309 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 310 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 311 for (i=0; i<nrecvs; i++) { 312 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 313 } 314 315 /* do sends: 316 1) starts[i] gives the starting index in svalues for stuff going to 317 the ith processor 318 */ 319 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 320 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 321 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 322 starts[0] = 0; 323 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 324 for (i=0; i<N; i++) { 325 svalues[starts[owner[i]]++] = rows[i]; 326 } 327 ISRestoreIndices(is,&rows); 328 329 starts[0] = 0; 330 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 331 count = 0; 332 for (i=0; i<size; i++) { 333 if (procs[i]) { 334 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 335 } 336 } 337 ierr = PetscFree(starts);CHKERRQ(ierr); 338 339 base = owners[rank]; 340 341 /* wait on receives */ 342 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 343 source = lens + nrecvs; 344 count = nrecvs; slen = 0; 345 while (count) { 346 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 347 /* unpack receives into our local space */ 348 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 349 source[imdex] = recv_status.MPI_SOURCE; 350 lens[imdex] = n; 351 slen += n; 352 count--; 353 } 354 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 355 356 /* move the data into the send scatter */ 357 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 358 count = 0; 359 for (i=0; i<nrecvs; i++) { 360 values = rvalues + i*nmax; 361 for (j=0; j<lens[i]; j++) { 362 lrows[count++] = values[j] - base; 363 } 364 } 365 ierr = PetscFree(rvalues);CHKERRQ(ierr); 366 ierr = PetscFree(lens);CHKERRQ(ierr); 367 ierr = PetscFree(owner);CHKERRQ(ierr); 368 ierr = PetscFree(nprocs);CHKERRQ(ierr); 369 370 /* actually zap the local rows */ 371 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 372 PetscLogObjectParent(A,istmp); 373 ierr = PetscFree(lrows);CHKERRQ(ierr); 374 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 375 ierr = ISDestroy(istmp);CHKERRQ(ierr); 376 377 /* wait on sends */ 378 if (nsends) { 379 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 380 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 381 ierr = PetscFree(send_status);CHKERRQ(ierr); 382 } 383 ierr = PetscFree(send_waits);CHKERRQ(ierr); 384 ierr = PetscFree(svalues);CHKERRQ(ierr); 385 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatMult_MPIDense" 391 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 392 { 393 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 394 int ierr; 395 396 PetscFunctionBegin; 397 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 398 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 399 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatMultAdd_MPIDense" 405 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 406 { 407 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 408 int ierr; 409 410 PetscFunctionBegin; 411 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 412 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 413 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatMultTranspose_MPIDense" 419 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 420 { 421 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 422 int ierr; 423 PetscScalar zero = 0.0; 424 425 PetscFunctionBegin; 426 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 427 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 428 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 429 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 435 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 436 { 437 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 438 int ierr; 439 440 PetscFunctionBegin; 441 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 442 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 443 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 444 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatGetDiagonal_MPIDense" 450 int MatGetDiagonal_MPIDense(Mat A,Vec v) 451 { 452 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 453 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 454 int ierr,len,i,n,m = A->m,radd; 455 PetscScalar *x,zero = 0.0; 456 457 PetscFunctionBegin; 458 ierr = VecSet(&zero,v);CHKERRQ(ierr); 459 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 460 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 461 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 462 len = PetscMin(a->A->m,a->A->n); 463 radd = a->rstart*m; 464 for (i=0; i<len; i++) { 465 x[i] = aloc->v[radd + i*m + i]; 466 } 467 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "MatDestroy_MPIDense" 473 int MatDestroy_MPIDense(Mat mat) 474 { 475 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 476 int ierr; 477 478 PetscFunctionBegin; 479 480 #if defined(PETSC_USE_LOG) 481 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 482 #endif 483 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 484 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 485 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 486 if (mdn->lvec) VecDestroy(mdn->lvec); 487 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 488 if (mdn->factor) { 489 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 490 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 491 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 492 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 493 } 494 ierr = PetscFree(mdn);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatView_MPIDense_Binary" 500 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 501 { 502 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 503 int ierr; 504 505 PetscFunctionBegin; 506 if (mdn->size == 1) { 507 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 508 } 509 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 515 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 516 { 517 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 518 int ierr,size = mdn->size,rank = mdn->rank; 519 PetscViewerType vtype; 520 PetscTruth isascii,isdraw; 521 PetscViewer sviewer; 522 PetscViewerFormat format; 523 524 PetscFunctionBegin; 525 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 526 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 527 if (isascii) { 528 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 529 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 530 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 531 MatInfo info; 532 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 533 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 534 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 535 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 536 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 537 PetscFunctionReturn(0); 538 } else if (format == PETSC_VIEWER_ASCII_INFO) { 539 PetscFunctionReturn(0); 540 } 541 } else if (isdraw) { 542 PetscDraw draw; 543 PetscTruth isnull; 544 545 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 546 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 547 if (isnull) PetscFunctionReturn(0); 548 } 549 550 if (size == 1) { 551 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 552 } else { 553 /* assemble the entire matrix onto first processor. */ 554 Mat A; 555 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 556 PetscScalar *vals; 557 558 if (!rank) { 559 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 560 } else { 561 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 562 } 563 PetscLogObjectParent(mat,A); 564 565 /* Copy the matrix ... This isn't the most efficient means, 566 but it's quick for now */ 567 row = mdn->rstart; m = mdn->A->m; 568 for (i=0; i<m; i++) { 569 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 570 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 571 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 572 row++; 573 } 574 575 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 576 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 577 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 578 if (!rank) { 579 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 580 } 581 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 582 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 583 ierr = MatDestroy(A);CHKERRQ(ierr); 584 } 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatView_MPIDense" 590 int MatView_MPIDense(Mat mat,PetscViewer viewer) 591 { 592 int ierr; 593 PetscTruth isascii,isbinary,isdraw,issocket; 594 595 PetscFunctionBegin; 596 597 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 598 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 600 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 601 602 if (isascii || issocket || isdraw) { 603 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 604 } else if (isbinary) { 605 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 606 } else { 607 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 608 } 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatGetInfo_MPIDense" 614 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 615 { 616 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 617 Mat mdn = mat->A; 618 int ierr; 619 PetscReal isend[5],irecv[5]; 620 621 PetscFunctionBegin; 622 info->rows_global = (double)A->M; 623 info->columns_global = (double)A->N; 624 info->rows_local = (double)A->m; 625 info->columns_local = (double)A->N; 626 info->block_size = 1.0; 627 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 628 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 629 isend[3] = info->memory; isend[4] = info->mallocs; 630 if (flag == MAT_LOCAL) { 631 info->nz_used = isend[0]; 632 info->nz_allocated = isend[1]; 633 info->nz_unneeded = isend[2]; 634 info->memory = isend[3]; 635 info->mallocs = isend[4]; 636 } else if (flag == MAT_GLOBAL_MAX) { 637 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 638 info->nz_used = irecv[0]; 639 info->nz_allocated = irecv[1]; 640 info->nz_unneeded = irecv[2]; 641 info->memory = irecv[3]; 642 info->mallocs = irecv[4]; 643 } else if (flag == MAT_GLOBAL_SUM) { 644 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 645 info->nz_used = irecv[0]; 646 info->nz_allocated = irecv[1]; 647 info->nz_unneeded = irecv[2]; 648 info->memory = irecv[3]; 649 info->mallocs = irecv[4]; 650 } 651 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 652 info->fill_ratio_needed = 0; 653 info->factor_mallocs = 0; 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatSetOption_MPIDense" 659 int MatSetOption_MPIDense(Mat A,MatOption op) 660 { 661 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 662 int ierr; 663 664 PetscFunctionBegin; 665 switch (op) { 666 case MAT_NO_NEW_NONZERO_LOCATIONS: 667 case MAT_YES_NEW_NONZERO_LOCATIONS: 668 case MAT_NEW_NONZERO_LOCATION_ERR: 669 case MAT_NEW_NONZERO_ALLOCATION_ERR: 670 case MAT_COLUMNS_SORTED: 671 case MAT_COLUMNS_UNSORTED: 672 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 673 break; 674 case MAT_ROW_ORIENTED: 675 a->roworiented = PETSC_TRUE; 676 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 677 break; 678 case MAT_ROWS_SORTED: 679 case MAT_ROWS_UNSORTED: 680 case MAT_YES_NEW_DIAGONALS: 681 case MAT_USE_HASH_TABLE: 682 case MAT_USE_SINGLE_PRECISION_SOLVES: 683 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 684 break; 685 case MAT_COLUMN_ORIENTED: 686 a->roworiented = PETSC_FALSE; 687 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 688 break; 689 case MAT_IGNORE_OFF_PROC_ENTRIES: 690 a->donotstash = PETSC_TRUE; 691 break; 692 case MAT_NO_NEW_DIAGONALS: 693 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 694 break; 695 default: 696 SETERRQ(PETSC_ERR_SUP,"unknown option"); 697 break; 698 } 699 PetscFunctionReturn(0); 700 } 701 702 #undef __FUNCT__ 703 #define __FUNCT__ "MatGetOwnershipRange_MPIDense" 704 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 705 { 706 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 707 708 PetscFunctionBegin; 709 if (m) *m = mat->rstart; 710 if (n) *n = mat->rend; 711 PetscFunctionReturn(0); 712 } 713 714 #undef __FUNCT__ 715 #define __FUNCT__ "MatGetRow_MPIDense" 716 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v) 717 { 718 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 719 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 720 721 PetscFunctionBegin; 722 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 723 lrow = row - rstart; 724 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatRestoreRow_MPIDense" 730 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 731 { 732 int ierr; 733 734 PetscFunctionBegin; 735 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 736 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatDiagonalScale_MPIDense" 742 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 743 { 744 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 745 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 746 PetscScalar *l,*r,x,*v; 747 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 748 749 PetscFunctionBegin; 750 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 751 if (ll) { 752 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 753 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 754 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 755 for (i=0; i<m; i++) { 756 x = l[i]; 757 v = mat->v + i; 758 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 759 } 760 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 761 PetscLogFlops(n*m); 762 } 763 if (rr) { 764 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 765 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 766 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 767 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 768 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 769 for (i=0; i<n; i++) { 770 x = r[i]; 771 v = mat->v + i*m; 772 for (j=0; j<m; j++) { (*v++) *= x;} 773 } 774 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 775 PetscLogFlops(n*m); 776 } 777 PetscFunctionReturn(0); 778 } 779 780 #undef __FUNCT__ 781 #define __FUNCT__ "MatNorm_MPIDense" 782 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm) 783 { 784 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 785 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 786 int ierr,i,j; 787 PetscReal sum = 0.0; 788 PetscScalar *v = mat->v; 789 790 PetscFunctionBegin; 791 if (mdn->size == 1) { 792 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 793 } else { 794 if (type == NORM_FROBENIUS) { 795 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 796 #if defined(PETSC_USE_COMPLEX) 797 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 798 #else 799 sum += (*v)*(*v); v++; 800 #endif 801 } 802 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 803 *norm = sqrt(*norm); 804 PetscLogFlops(2*mdn->A->n*mdn->A->m); 805 } else if (type == NORM_1) { 806 PetscReal *tmp,*tmp2; 807 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 808 tmp2 = tmp + A->N; 809 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 810 *norm = 0.0; 811 v = mat->v; 812 for (j=0; j<mdn->A->n; j++) { 813 for (i=0; i<mdn->A->m; i++) { 814 tmp[j] += PetscAbsScalar(*v); v++; 815 } 816 } 817 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 818 for (j=0; j<A->N; j++) { 819 if (tmp2[j] > *norm) *norm = tmp2[j]; 820 } 821 ierr = PetscFree(tmp);CHKERRQ(ierr); 822 PetscLogFlops(A->n*A->m); 823 } else if (type == NORM_INFINITY) { /* max row norm */ 824 PetscReal ntemp; 825 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 826 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 827 } else { 828 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 829 } 830 } 831 PetscFunctionReturn(0); 832 } 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatTranspose_MPIDense" 836 int MatTranspose_MPIDense(Mat A,Mat *matout) 837 { 838 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 839 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 840 Mat B; 841 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 842 int j,i,ierr; 843 PetscScalar *v; 844 845 PetscFunctionBegin; 846 if (!matout && M != N) { 847 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 848 } 849 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 850 851 m = a->A->m; n = a->A->n; v = Aloc->v; 852 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 853 for (j=0; j<n; j++) { 854 for (i=0; i<m; i++) rwork[i] = rstart + i; 855 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 856 v += m; 857 } 858 ierr = PetscFree(rwork);CHKERRQ(ierr); 859 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 860 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 if (matout) { 862 *matout = B; 863 } else { 864 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 865 } 866 PetscFunctionReturn(0); 867 } 868 869 #include "petscblaslapack.h" 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatScale_MPIDense" 872 int MatScale_MPIDense(PetscScalar *alpha,Mat inA) 873 { 874 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 875 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 876 int one = 1,nz; 877 878 PetscFunctionBegin; 879 nz = inA->m*inA->N; 880 BLscal_(&nz,alpha,a->v,&one); 881 PetscLogFlops(nz); 882 PetscFunctionReturn(0); 883 } 884 885 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 886 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 887 888 #undef __FUNCT__ 889 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 890 int MatSetUpPreallocation_MPIDense(Mat A) 891 { 892 int ierr; 893 894 PetscFunctionBegin; 895 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 896 PetscFunctionReturn(0); 897 } 898 899 /* -------------------------------------------------------------------*/ 900 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 901 MatGetRow_MPIDense, 902 MatRestoreRow_MPIDense, 903 MatMult_MPIDense, 904 MatMultAdd_MPIDense, 905 MatMultTranspose_MPIDense, 906 MatMultTransposeAdd_MPIDense, 907 0, 908 0, 909 0, 910 0, 911 0, 912 0, 913 0, 914 MatTranspose_MPIDense, 915 MatGetInfo_MPIDense,0, 916 MatGetDiagonal_MPIDense, 917 MatDiagonalScale_MPIDense, 918 MatNorm_MPIDense, 919 MatAssemblyBegin_MPIDense, 920 MatAssemblyEnd_MPIDense, 921 0, 922 MatSetOption_MPIDense, 923 MatZeroEntries_MPIDense, 924 MatZeroRows_MPIDense, 925 0, 926 0, 927 0, 928 0, 929 MatSetUpPreallocation_MPIDense, 930 0, 931 MatGetOwnershipRange_MPIDense, 932 0, 933 0, 934 MatGetArray_MPIDense, 935 MatRestoreArray_MPIDense, 936 MatDuplicate_MPIDense, 937 0, 938 0, 939 0, 940 0, 941 0, 942 MatGetSubMatrices_MPIDense, 943 0, 944 MatGetValues_MPIDense, 945 0, 946 0, 947 MatScale_MPIDense, 948 0, 949 0, 950 0, 951 MatGetBlockSize_MPIDense, 952 0, 953 0, 954 0, 955 0, 956 0, 957 0, 958 0, 959 0, 960 0, 961 MatGetSubMatrix_MPIDense, 962 MatDestroy_MPIDense, 963 MatView_MPIDense, 964 MatGetPetscMaps_Petsc}; 965 966 EXTERN_C_BEGIN 967 #undef __FUNCT__ 968 #define __FUNCT__ "MatCreate_MPIDense" 969 int MatCreate_MPIDense(Mat mat) 970 { 971 Mat_MPIDense *a; 972 int ierr,i; 973 974 PetscFunctionBegin; 975 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 976 mat->data = (void*)a; 977 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 978 mat->factor = 0; 979 mat->mapping = 0; 980 981 a->factor = 0; 982 mat->insertmode = NOT_SET_VALUES; 983 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 984 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 985 986 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 987 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 988 a->nvec = mat->n; 989 990 /* the information in the maps duplicates the information computed below, eventually 991 we should remove the duplicate information that is not contained in the maps */ 992 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 993 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 994 995 /* build local table of row and column ownerships */ 996 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 997 a->cowners = a->rowners + a->size + 1; 998 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 999 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1000 a->rowners[0] = 0; 1001 for (i=2; i<=a->size; i++) { 1002 a->rowners[i] += a->rowners[i-1]; 1003 } 1004 a->rstart = a->rowners[a->rank]; 1005 a->rend = a->rowners[a->rank+1]; 1006 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1007 a->cowners[0] = 0; 1008 for (i=2; i<=a->size; i++) { 1009 a->cowners[i] += a->cowners[i-1]; 1010 } 1011 1012 /* build cache for off array entries formed */ 1013 a->donotstash = PETSC_FALSE; 1014 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1015 1016 /* stuff used for matrix vector multiply */ 1017 a->lvec = 0; 1018 a->Mvctx = 0; 1019 a->roworiented = PETSC_TRUE; 1020 1021 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1022 "MatGetDiagonalBlock_MPIDense", 1023 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 EXTERN_C_END 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1030 /*@C 1031 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1032 1033 Not collective 1034 1035 Input Parameters: 1036 . A - the matrix 1037 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1038 to control all matrix memory allocation. 1039 1040 Notes: 1041 The dense format is fully compatible with standard Fortran 77 1042 storage by columns. 1043 1044 The data input variable is intended primarily for Fortran programmers 1045 who wish to allocate their own matrix memory space. Most users should 1046 set data=PETSC_NULL. 1047 1048 Level: intermediate 1049 1050 .keywords: matrix,dense, parallel 1051 1052 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1053 @*/ 1054 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1055 { 1056 Mat_MPIDense *a; 1057 int ierr; 1058 PetscTruth flg2; 1059 1060 PetscFunctionBegin; 1061 ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr); 1062 if (!flg2) PetscFunctionReturn(0); 1063 mat->preallocated = PETSC_TRUE; 1064 /* Note: For now, when data is specified above, this assumes the user correctly 1065 allocates the local dense storage space. We should add error checking. */ 1066 1067 a = (Mat_MPIDense*)mat->data; 1068 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 1069 PetscLogObjectParent(mat,a->A); 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatCreateMPIDense" 1075 /*@C 1076 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1077 1078 Collective on MPI_Comm 1079 1080 Input Parameters: 1081 + comm - MPI communicator 1082 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1083 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1084 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1085 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1086 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1087 to control all matrix memory allocation. 1088 1089 Output Parameter: 1090 . A - the matrix 1091 1092 Notes: 1093 The dense format is fully compatible with standard Fortran 77 1094 storage by columns. 1095 1096 The data input variable is intended primarily for Fortran programmers 1097 who wish to allocate their own matrix memory space. Most users should 1098 set data=PETSC_NULL. 1099 1100 The user MUST specify either the local or global matrix dimensions 1101 (possibly both). 1102 1103 Level: intermediate 1104 1105 .keywords: matrix,dense, parallel 1106 1107 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1108 @*/ 1109 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A) 1110 { 1111 int ierr,size; 1112 1113 PetscFunctionBegin; 1114 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1115 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1116 if (size > 1) { 1117 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1118 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1119 } else { 1120 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1121 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1122 } 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatDuplicate_MPIDense" 1128 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1129 { 1130 Mat mat; 1131 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1132 int ierr; 1133 1134 PetscFunctionBegin; 1135 *newmat = 0; 1136 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1137 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1138 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1139 mat->data = (void*)a; 1140 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1141 mat->factor = A->factor; 1142 mat->assembled = PETSC_TRUE; 1143 mat->preallocated = PETSC_TRUE; 1144 1145 a->rstart = oldmat->rstart; 1146 a->rend = oldmat->rend; 1147 a->size = oldmat->size; 1148 a->rank = oldmat->rank; 1149 mat->insertmode = NOT_SET_VALUES; 1150 a->nvec = oldmat->nvec; 1151 a->donotstash = oldmat->donotstash; 1152 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1153 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1154 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1155 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1156 1157 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1158 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1159 PetscLogObjectParent(mat,a->A); 1160 *newmat = mat; 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #include "petscsys.h" 1165 1166 #undef __FUNCT__ 1167 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1168 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1169 { 1170 int *rowners,i,size,rank,m,ierr,nz,j; 1171 PetscScalar *array,*vals,*vals_ptr; 1172 MPI_Status status; 1173 1174 PetscFunctionBegin; 1175 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1176 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1177 1178 /* determine ownership of all rows */ 1179 m = M/size + ((M % size) > rank); 1180 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1181 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1182 rowners[0] = 0; 1183 for (i=2; i<=size; i++) { 1184 rowners[i] += rowners[i-1]; 1185 } 1186 1187 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1188 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1189 1190 if (!rank) { 1191 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1192 1193 /* read in my part of the matrix numerical values */ 1194 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1195 1196 /* insert into matrix-by row (this is why cannot directly read into array */ 1197 vals_ptr = vals; 1198 for (i=0; i<m; i++) { 1199 for (j=0; j<N; j++) { 1200 array[i + j*m] = *vals_ptr++; 1201 } 1202 } 1203 1204 /* read in other processors and ship out */ 1205 for (i=1; i<size; i++) { 1206 nz = (rowners[i+1] - rowners[i])*N; 1207 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1208 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1209 } 1210 } else { 1211 /* receive numeric values */ 1212 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1213 1214 /* receive message of values*/ 1215 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1216 1217 /* insert into matrix-by row (this is why cannot directly read into array */ 1218 vals_ptr = vals; 1219 for (i=0; i<m; i++) { 1220 for (j=0; j<N; j++) { 1221 array[i + j*m] = *vals_ptr++; 1222 } 1223 } 1224 } 1225 ierr = PetscFree(rowners);CHKERRQ(ierr); 1226 ierr = PetscFree(vals);CHKERRQ(ierr); 1227 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1228 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 EXTERN_C_BEGIN 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatLoad_MPIDense" 1235 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat) 1236 { 1237 Mat A; 1238 PetscScalar *vals,*svals; 1239 MPI_Comm comm = ((PetscObject)viewer)->comm; 1240 MPI_Status status; 1241 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1242 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1243 int tag = ((PetscObject)viewer)->tag; 1244 int i,nz,ierr,j,rstart,rend,fd; 1245 1246 PetscFunctionBegin; 1247 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1249 if (!rank) { 1250 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1251 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1252 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1253 } 1254 1255 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1256 M = header[1]; N = header[2]; nz = header[3]; 1257 1258 /* 1259 Handle case where matrix is stored on disk as a dense matrix 1260 */ 1261 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1262 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 /* determine ownership of all rows */ 1267 m = M/size + ((M % size) > rank); 1268 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1269 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1270 rowners[0] = 0; 1271 for (i=2; i<=size; i++) { 1272 rowners[i] += rowners[i-1]; 1273 } 1274 rstart = rowners[rank]; 1275 rend = rowners[rank+1]; 1276 1277 /* distribute row lengths to all processors */ 1278 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1279 offlens = ourlens + (rend-rstart); 1280 if (!rank) { 1281 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1282 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1283 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1284 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1285 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1286 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1287 } else { 1288 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1289 } 1290 1291 if (!rank) { 1292 /* calculate the number of nonzeros on each processor */ 1293 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1294 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1295 for (i=0; i<size; i++) { 1296 for (j=rowners[i]; j< rowners[i+1]; j++) { 1297 procsnz[i] += rowlengths[j]; 1298 } 1299 } 1300 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1301 1302 /* determine max buffer needed and allocate it */ 1303 maxnz = 0; 1304 for (i=0; i<size; i++) { 1305 maxnz = PetscMax(maxnz,procsnz[i]); 1306 } 1307 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1308 1309 /* read in my part of the matrix column indices */ 1310 nz = procsnz[0]; 1311 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1312 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1313 1314 /* read in every one elses and ship off */ 1315 for (i=1; i<size; i++) { 1316 nz = procsnz[i]; 1317 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1318 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1319 } 1320 ierr = PetscFree(cols);CHKERRQ(ierr); 1321 } else { 1322 /* determine buffer space needed for message */ 1323 nz = 0; 1324 for (i=0; i<m; i++) { 1325 nz += ourlens[i]; 1326 } 1327 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1328 1329 /* receive message of column indices*/ 1330 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1331 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1332 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1333 } 1334 1335 /* loop over local rows, determining number of off diagonal entries */ 1336 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1337 jj = 0; 1338 for (i=0; i<m; i++) { 1339 for (j=0; j<ourlens[i]; j++) { 1340 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1341 jj++; 1342 } 1343 } 1344 1345 /* create our matrix */ 1346 for (i=0; i<m; i++) { 1347 ourlens[i] -= offlens[i]; 1348 } 1349 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1350 A = *newmat; 1351 for (i=0; i<m; i++) { 1352 ourlens[i] += offlens[i]; 1353 } 1354 1355 if (!rank) { 1356 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1357 1358 /* read in my part of the matrix numerical values */ 1359 nz = procsnz[0]; 1360 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1361 1362 /* insert into matrix */ 1363 jj = rstart; 1364 smycols = mycols; 1365 svals = vals; 1366 for (i=0; i<m; i++) { 1367 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1368 smycols += ourlens[i]; 1369 svals += ourlens[i]; 1370 jj++; 1371 } 1372 1373 /* read in other processors and ship out */ 1374 for (i=1; i<size; i++) { 1375 nz = procsnz[i]; 1376 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1377 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1378 } 1379 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1380 } else { 1381 /* receive numeric values */ 1382 ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1383 1384 /* receive message of values*/ 1385 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1386 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1387 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1388 1389 /* insert into matrix */ 1390 jj = rstart; 1391 smycols = mycols; 1392 svals = vals; 1393 for (i=0; i<m; i++) { 1394 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1395 smycols += ourlens[i]; 1396 svals += ourlens[i]; 1397 jj++; 1398 } 1399 } 1400 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1401 ierr = PetscFree(vals);CHKERRQ(ierr); 1402 ierr = PetscFree(mycols);CHKERRQ(ierr); 1403 ierr = PetscFree(rowners);CHKERRQ(ierr); 1404 1405 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1406 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 EXTERN_C_END 1410 1411 1412 1413 1414 1415