1 /*$Id: mpidense.c,v 1.144 2000/09/28 21:10:58 bsmith Exp bsmith $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNC__ 12 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,rank,ierr; 17 Scalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 27 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 28 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 29 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 30 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 31 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 33 34 *iscopy = PETSC_TRUE; 35 PetscFunctionReturn(0); 36 } 37 EXTERN_C_END 38 39 EXTERN int MatSetUpMultiply_MPIDense(Mat); 40 41 #undef __FUNC__ 42 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIDense" 43 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 44 { 45 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 46 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 47 PetscTruth roworiented = A->roworiented; 48 49 PetscFunctionBegin; 50 for (i=0; i<m; i++) { 51 if (idxm[i] < 0) continue; 52 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 53 if (idxm[i] >= rstart && idxm[i] < rend) { 54 row = idxm[i] - rstart; 55 if (roworiented) { 56 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 57 } else { 58 for (j=0; j<n; j++) { 59 if (idxn[j] < 0) continue; 60 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 61 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 62 } 63 } 64 } else { 65 if (!A->donotstash) { 66 if (roworiented) { 67 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 68 } else { 69 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 70 } 71 } 72 } 73 } 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNC__ 78 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIDense" 79 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 80 { 81 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 82 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 83 84 PetscFunctionBegin; 85 for (i=0; i<m; i++) { 86 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 87 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 88 if (idxm[i] >= rstart && idxm[i] < rend) { 89 row = idxm[i] - rstart; 90 for (j=0; j<n; j++) { 91 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 92 if (idxn[j] >= mat->N) { 93 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 94 } 95 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 96 } 97 } else { 98 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 99 } 100 } 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNC__ 105 #define __FUNC__ /*<a name=""></a>*/"MatGetArray_MPIDense" 106 int MatGetArray_MPIDense(Mat A,Scalar **array) 107 { 108 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 109 int ierr; 110 111 PetscFunctionBegin; 112 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNC__ 117 #define __FUNC__ /*<a name=""></a>*/"MatGetSubMatrix_MPIDense" 118 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 119 { 120 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 121 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 122 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols,rank; 123 Scalar *av,*bv,*v = lmat->v; 124 Mat newmat; 125 126 PetscFunctionBegin; 127 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 128 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 129 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 130 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 131 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 132 133 /* No parallel redistribution currently supported! Should really check each index set 134 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 135 original matrix! */ 136 137 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 138 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 139 140 /* Check submatrix call */ 141 if (scall == MAT_REUSE_MATRIX) { 142 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 143 /* Really need to test rows and column sizes! */ 144 newmat = *B; 145 } else { 146 /* Create and fill new matrix */ 147 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 148 } 149 150 /* Now extract the data pointers and do the copy, column at a time */ 151 newmatd = (Mat_MPIDense*)newmat->data; 152 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 153 154 for (i=0; i<ncols; i++) { 155 av = v + nlrows*icol[i]; 156 for (j=0; j<nrows; j++) { 157 *bv++ = av[irow[j] - rstart]; 158 } 159 } 160 161 /* Assemble the matrices so that the correct flags are set */ 162 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 163 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 164 165 /* Free work space */ 166 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 167 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 168 *B = newmat; 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNC__ 173 #define __FUNC__ /*<a name=""></a>*/"MatRestoreArray_MPIDense" 174 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 175 { 176 PetscFunctionBegin; 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNC__ 181 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIDense" 182 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 183 { 184 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 185 MPI_Comm comm = mat->comm; 186 int ierr,nstash,reallocs; 187 InsertMode addv; 188 189 PetscFunctionBegin; 190 /* make sure all processors are either in INSERTMODE or ADDMODE */ 191 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 192 if (addv == (ADD_VALUES|INSERT_VALUES)) { 193 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 194 } 195 mat->insertmode = addv; /* in case this processor had no cache */ 196 197 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 198 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 199 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 200 PetscFunctionReturn(0); 201 } 202 203 #undef __FUNC__ 204 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIDense" 205 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 206 { 207 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 208 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 209 Scalar *val; 210 InsertMode addv=mat->insertmode; 211 212 PetscFunctionBegin; 213 /* wait on receives */ 214 while (1) { 215 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 216 if (!flg) break; 217 218 for (i=0; i<n;) { 219 /* Now identify the consecutive vals belonging to the same row */ 220 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 221 if (j < n) ncols = j-i; 222 else ncols = n-i; 223 /* Now assemble all these values with a single function call */ 224 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 225 i = j; 226 } 227 } 228 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 229 230 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 231 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 232 233 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 234 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 235 } 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNC__ 240 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIDense" 241 int MatZeroEntries_MPIDense(Mat A) 242 { 243 int ierr; 244 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 245 246 PetscFunctionBegin; 247 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 248 PetscFunctionReturn(0); 249 } 250 251 #undef __FUNC__ 252 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIDense" 253 int MatGetBlockSize_MPIDense(Mat A,int *bs) 254 { 255 PetscFunctionBegin; 256 *bs = 1; 257 PetscFunctionReturn(0); 258 } 259 260 /* the code does not do the diagonal entries correctly unless the 261 matrix is square and the column and row owerships are identical. 262 This is a BUG. The only way to fix it seems to be to access 263 mdn->A and mdn->B directly and not through the MatZeroRows() 264 routine. 265 */ 266 #undef __FUNC__ 267 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIDense" 268 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 269 { 270 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 271 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 272 int *procs,*nprocs,j,found,idx,nsends,*work; 273 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 274 int *rvalues,tag = A->tag,count,base,slen,n,*source; 275 int *lens,imdex,*lrows,*values; 276 MPI_Comm comm = A->comm; 277 MPI_Request *send_waits,*recv_waits; 278 MPI_Status recv_status,*send_status; 279 IS istmp; 280 281 PetscFunctionBegin; 282 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 283 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 284 285 /* first count number of contributors to each processor */ 286 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 287 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 288 procs = nprocs + size; 289 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 290 for (i=0; i<N; i++) { 291 idx = rows[i]; 292 found = 0; 293 for (j=0; j<size; j++) { 294 if (idx >= owners[j] && idx < owners[j+1]) { 295 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 296 } 297 } 298 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 299 } 300 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 301 302 /* inform other processors of number of messages and max length*/ 303 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 304 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 305 nmax = work[rank]; 306 nrecvs = work[size+rank]; 307 ierr = PetscFree(work);CHKERRQ(ierr); 308 309 /* post receives: */ 310 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 311 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 312 for (i=0; i<nrecvs; i++) { 313 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 314 } 315 316 /* do sends: 317 1) starts[i] gives the starting index in svalues for stuff going to 318 the ith processor 319 */ 320 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 321 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 322 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 323 starts[0] = 0; 324 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 325 for (i=0; i<N; i++) { 326 svalues[starts[owner[i]]++] = rows[i]; 327 } 328 ISRestoreIndices(is,&rows); 329 330 starts[0] = 0; 331 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 332 count = 0; 333 for (i=0; i<size; i++) { 334 if (procs[i]) { 335 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 336 } 337 } 338 ierr = PetscFree(starts);CHKERRQ(ierr); 339 340 base = owners[rank]; 341 342 /* wait on receives */ 343 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 344 source = lens + nrecvs; 345 count = nrecvs; slen = 0; 346 while (count) { 347 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 348 /* unpack receives into our local space */ 349 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 350 source[imdex] = recv_status.MPI_SOURCE; 351 lens[imdex] = n; 352 slen += n; 353 count--; 354 } 355 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 356 357 /* move the data into the send scatter */ 358 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 359 count = 0; 360 for (i=0; i<nrecvs; i++) { 361 values = rvalues + i*nmax; 362 for (j=0; j<lens[i]; j++) { 363 lrows[count++] = values[j] - base; 364 } 365 } 366 ierr = PetscFree(rvalues);CHKERRQ(ierr); 367 ierr = PetscFree(lens);CHKERRQ(ierr); 368 ierr = PetscFree(owner);CHKERRQ(ierr); 369 ierr = PetscFree(nprocs);CHKERRQ(ierr); 370 371 /* actually zap the local rows */ 372 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 373 PLogObjectParent(A,istmp); 374 ierr = PetscFree(lrows);CHKERRQ(ierr); 375 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 376 ierr = ISDestroy(istmp);CHKERRQ(ierr); 377 378 /* wait on sends */ 379 if (nsends) { 380 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 381 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 382 ierr = PetscFree(send_status);CHKERRQ(ierr); 383 } 384 ierr = PetscFree(send_waits);CHKERRQ(ierr); 385 ierr = PetscFree(svalues);CHKERRQ(ierr); 386 387 PetscFunctionReturn(0); 388 } 389 390 #undef __FUNC__ 391 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIDense" 392 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 395 int ierr; 396 397 PetscFunctionBegin; 398 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 399 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 400 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNC__ 405 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIDense" 406 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 407 { 408 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 413 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 414 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNC__ 419 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIDense" 420 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 421 { 422 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 423 int ierr; 424 Scalar zero = 0.0; 425 426 PetscFunctionBegin; 427 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 428 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 429 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 430 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 #undef __FUNC__ 435 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIDense" 436 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 437 { 438 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 439 int ierr; 440 441 PetscFunctionBegin; 442 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 443 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 444 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 445 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 #undef __FUNC__ 450 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIDense" 451 int MatGetDiagonal_MPIDense(Mat A,Vec v) 452 { 453 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 454 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 455 int ierr,len,i,n,m = A->m,radd; 456 Scalar *x,zero = 0.0; 457 458 PetscFunctionBegin; 459 ierr = VecSet(&zero,v);CHKERRQ(ierr); 460 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 461 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 462 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 463 len = PetscMin(a->A->m,a->A->n); 464 radd = a->rstart*m; 465 for (i=0; i<len; i++) { 466 x[i] = aloc->v[radd + i*m + i]; 467 } 468 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNC__ 473 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIDense" 474 int MatDestroy_MPIDense(Mat mat) 475 { 476 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 477 int ierr; 478 479 PetscFunctionBegin; 480 481 #if defined(PETSC_USE_LOG) 482 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 483 #endif 484 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 485 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 486 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 487 if (mdn->lvec) VecDestroy(mdn->lvec); 488 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 489 if (mdn->factor) { 490 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 491 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 492 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 493 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 494 } 495 ierr = PetscFree(mdn);CHKERRQ(ierr); 496 PetscFunctionReturn(0); 497 } 498 499 #undef __FUNC__ 500 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense_Binary" 501 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 502 { 503 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 504 int ierr; 505 506 PetscFunctionBegin; 507 if (mdn->size == 1) { 508 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 509 } 510 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 511 PetscFunctionReturn(0); 512 } 513 514 #undef __FUNC__ 515 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense_ASCIIorDraworSocket" 516 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,Viewer viewer) 517 { 518 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 519 int ierr,format,size = mdn->size,rank = mdn->rank; 520 ViewerType vtype; 521 PetscTruth isascii,isdraw; 522 Viewer sviewer; 523 524 PetscFunctionBegin; 525 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 526 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 527 if (isascii) { 528 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 529 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 530 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 531 MatInfo info; 532 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 533 ierr = ViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 534 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 535 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 536 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 537 PetscFunctionReturn(0); 538 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 539 PetscFunctionReturn(0); 540 } 541 } else if (isdraw) { 542 Draw draw; 543 PetscTruth isnull; 544 545 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 546 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); 547 if (isnull) PetscFunctionReturn(0); 548 } 549 550 if (size == 1) { 551 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 552 } else { 553 /* assemble the entire matrix onto first processor. */ 554 Mat A; 555 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 556 Scalar *vals; 557 558 if (!rank) { 559 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 560 } else { 561 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 562 } 563 PLogObjectParent(mat,A); 564 565 /* Copy the matrix ... This isn't the most efficient means, 566 but it's quick for now */ 567 row = mdn->rstart; m = mdn->A->m; 568 for (i=0; i<m; i++) { 569 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 570 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 571 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 572 row++; 573 } 574 575 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 576 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 577 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 578 if (!rank) { 579 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 580 } 581 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 582 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 583 ierr = MatDestroy(A);CHKERRQ(ierr); 584 } 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNC__ 589 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense" 590 int MatView_MPIDense(Mat mat,Viewer viewer) 591 { 592 int ierr; 593 PetscTruth isascii,isbinary,isdraw,issocket; 594 595 PetscFunctionBegin; 596 597 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 598 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 600 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 601 602 if (isascii || issocket || isdraw) { 603 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 604 } else if (isbinary) { 605 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 606 } else { 607 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 608 } 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNC__ 613 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIDense" 614 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 615 { 616 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 617 Mat mdn = mat->A; 618 int ierr; 619 PetscReal isend[5],irecv[5]; 620 621 PetscFunctionBegin; 622 info->rows_global = (double)A->M; 623 info->columns_global = (double)A->N; 624 info->rows_local = (double)A->m; 625 info->columns_local = (double)A->N; 626 info->block_size = 1.0; 627 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 628 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 629 isend[3] = info->memory; isend[4] = info->mallocs; 630 if (flag == MAT_LOCAL) { 631 info->nz_used = isend[0]; 632 info->nz_allocated = isend[1]; 633 info->nz_unneeded = isend[2]; 634 info->memory = isend[3]; 635 info->mallocs = isend[4]; 636 } else if (flag == MAT_GLOBAL_MAX) { 637 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 638 info->nz_used = irecv[0]; 639 info->nz_allocated = irecv[1]; 640 info->nz_unneeded = irecv[2]; 641 info->memory = irecv[3]; 642 info->mallocs = irecv[4]; 643 } else if (flag == MAT_GLOBAL_SUM) { 644 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 645 info->nz_used = irecv[0]; 646 info->nz_allocated = irecv[1]; 647 info->nz_unneeded = irecv[2]; 648 info->memory = irecv[3]; 649 info->mallocs = irecv[4]; 650 } 651 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 652 info->fill_ratio_needed = 0; 653 info->factor_mallocs = 0; 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNC__ 658 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIDense" 659 int MatSetOption_MPIDense(Mat A,MatOption op) 660 { 661 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 662 int ierr; 663 664 PetscFunctionBegin; 665 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 666 op == MAT_YES_NEW_NONZERO_LOCATIONS || 667 op == MAT_NEW_NONZERO_LOCATION_ERR || 668 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 669 op == MAT_COLUMNS_SORTED || 670 op == MAT_COLUMNS_UNSORTED) { 671 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 672 } else if (op == MAT_ROW_ORIENTED) { 673 a->roworiented = PETSC_TRUE; 674 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 675 } else if (op == MAT_ROWS_SORTED || 676 op == MAT_ROWS_UNSORTED || 677 op == MAT_SYMMETRIC || 678 op == MAT_STRUCTURALLY_SYMMETRIC || 679 op == MAT_YES_NEW_DIAGONALS || 680 op == MAT_USE_HASH_TABLE) { 681 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 682 } else if (op == MAT_COLUMN_ORIENTED) { 683 a->roworiented = PETSC_FALSE; 684 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 685 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 686 a->donotstash = PETSC_TRUE; 687 } else if (op == MAT_NO_NEW_DIAGONALS) { 688 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 689 } else { 690 SETERRQ(PETSC_ERR_SUP,"unknown option"); 691 } 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNC__ 696 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIDense" 697 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 698 { 699 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 700 701 PetscFunctionBegin; 702 if (m) *m = mat->rstart; 703 if (n) *n = mat->rend; 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNC__ 708 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIDense" 709 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 710 { 711 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 712 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 713 714 PetscFunctionBegin; 715 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 716 lrow = row - rstart; 717 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNC__ 722 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIDense" 723 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 724 { 725 int ierr; 726 727 PetscFunctionBegin; 728 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 729 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 730 PetscFunctionReturn(0); 731 } 732 733 #undef __FUNC__ 734 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIDense" 735 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 736 { 737 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 738 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 739 Scalar *l,*r,x,*v; 740 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 741 742 PetscFunctionBegin; 743 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 744 if (ll) { 745 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 746 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 747 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 748 for (i=0; i<m; i++) { 749 x = l[i]; 750 v = mat->v + i; 751 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 752 } 753 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 754 PLogFlops(n*m); 755 } 756 if (rr) { 757 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 758 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 759 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 760 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 761 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 762 for (i=0; i<n; i++) { 763 x = r[i]; 764 v = mat->v + i*m; 765 for (j=0; j<m; j++) { (*v++) *= x;} 766 } 767 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 768 PLogFlops(n*m); 769 } 770 PetscFunctionReturn(0); 771 } 772 773 #undef __FUNC__ 774 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIDense" 775 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm) 776 { 777 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 778 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 779 int ierr,i,j; 780 PetscReal sum = 0.0; 781 Scalar *v = mat->v; 782 783 PetscFunctionBegin; 784 if (mdn->size == 1) { 785 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 786 } else { 787 if (type == NORM_FROBENIUS) { 788 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 789 #if defined(PETSC_USE_COMPLEX) 790 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 791 #else 792 sum += (*v)*(*v); v++; 793 #endif 794 } 795 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 796 *norm = sqrt(*norm); 797 PLogFlops(2*mdn->A->n*mdn->A->m); 798 } else if (type == NORM_1) { 799 PetscReal *tmp,*tmp2; 800 tmp = (PetscReal*)PetscMalloc(2*A->N*sizeof(PetscReal));CHKPTRQ(tmp); 801 tmp2 = tmp + A->N; 802 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 803 *norm = 0.0; 804 v = mat->v; 805 for (j=0; j<mdn->A->n; j++) { 806 for (i=0; i<mdn->A->m; i++) { 807 tmp[j] += PetscAbsScalar(*v); v++; 808 } 809 } 810 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 811 for (j=0; j<A->N; j++) { 812 if (tmp2[j] > *norm) *norm = tmp2[j]; 813 } 814 ierr = PetscFree(tmp);CHKERRQ(ierr); 815 PLogFlops(A->n*A->m); 816 } else if (type == NORM_INFINITY) { /* max row norm */ 817 PetscReal ntemp; 818 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 819 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 820 } else { 821 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 822 } 823 } 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNC__ 828 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIDense" 829 int MatTranspose_MPIDense(Mat A,Mat *matout) 830 { 831 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 832 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 833 Mat B; 834 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 835 int j,i,ierr; 836 Scalar *v; 837 838 PetscFunctionBegin; 839 if (!matout && M != N) { 840 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 841 } 842 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 843 844 m = a->A->m; n = a->A->n; v = Aloc->v; 845 rwork = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 846 for (j=0; j<n; j++) { 847 for (i=0; i<m; i++) rwork[i] = rstart + i; 848 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 849 v += m; 850 } 851 ierr = PetscFree(rwork);CHKERRQ(ierr); 852 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 853 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 854 if (matout) { 855 *matout = B; 856 } else { 857 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 858 } 859 PetscFunctionReturn(0); 860 } 861 862 #include "petscblaslapack.h" 863 #undef __FUNC__ 864 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIDense" 865 int MatScale_MPIDense(Scalar *alpha,Mat inA) 866 { 867 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 868 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 869 int one = 1,nz; 870 871 PetscFunctionBegin; 872 nz = inA->m*inA->N; 873 BLscal_(&nz,alpha,a->v,&one); 874 PLogFlops(nz); 875 PetscFunctionReturn(0); 876 } 877 878 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 879 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 880 881 #undef __FUNC__ 882 #define __FUNC__ /*<a name="MatSetUpPreallocation_MPIDense"></a>*/"MatSetUpPreallocation_MPIDense" 883 int MatSetUpPreallocation_MPIDense(Mat A) 884 { 885 int ierr; 886 887 PetscFunctionBegin; 888 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 /* -------------------------------------------------------------------*/ 893 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 894 MatGetRow_MPIDense, 895 MatRestoreRow_MPIDense, 896 MatMult_MPIDense, 897 MatMultAdd_MPIDense, 898 MatMultTranspose_MPIDense, 899 MatMultTransposeAdd_MPIDense, 900 0, 901 0, 902 0, 903 0, 904 0, 905 0, 906 0, 907 MatTranspose_MPIDense, 908 MatGetInfo_MPIDense,0, 909 MatGetDiagonal_MPIDense, 910 MatDiagonalScale_MPIDense, 911 MatNorm_MPIDense, 912 MatAssemblyBegin_MPIDense, 913 MatAssemblyEnd_MPIDense, 914 0, 915 MatSetOption_MPIDense, 916 MatZeroEntries_MPIDense, 917 MatZeroRows_MPIDense, 918 0, 919 0, 920 0, 921 0, 922 MatSetUpPreallocation_MPIDense, 923 0, 924 MatGetOwnershipRange_MPIDense, 925 0, 926 0, 927 MatGetArray_MPIDense, 928 MatRestoreArray_MPIDense, 929 MatDuplicate_MPIDense, 930 0, 931 0, 932 0, 933 0, 934 0, 935 MatGetSubMatrices_MPIDense, 936 0, 937 MatGetValues_MPIDense, 938 0, 939 0, 940 MatScale_MPIDense, 941 0, 942 0, 943 0, 944 MatGetBlockSize_MPIDense, 945 0, 946 0, 947 0, 948 0, 949 0, 950 0, 951 0, 952 0, 953 0, 954 MatGetSubMatrix_MPIDense, 955 MatDestroy_MPIDense, 956 MatView_MPIDense, 957 MatGetMaps_Petsc}; 958 959 EXTERN_C_BEGIN 960 #undef __FUNC__ 961 #define __FUNC__ /*<a name=""></a>*/"MatCreate_MPIDense" 962 int MatCreate_MPIDense(Mat mat) 963 { 964 Mat_MPIDense *a; 965 int ierr,i; 966 967 PetscFunctionBegin; 968 mat->data = (void*)(a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 969 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 970 mat->factor = 0; 971 mat->mapping = 0; 972 973 a->factor = 0; 974 mat->insertmode = NOT_SET_VALUES; 975 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 976 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 977 978 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 979 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 980 a->nvec = mat->n; 981 982 /* the information in the maps duplicates the information computed below, eventually 983 we should remove the duplicate information that is not contained in the maps */ 984 ierr = MapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 985 ierr = MapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 986 987 /* build local table of row and column ownerships */ 988 a->rowners = (int*)PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 989 a->cowners = a->rowners + a->size + 1; 990 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 991 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 992 a->rowners[0] = 0; 993 for (i=2; i<=a->size; i++) { 994 a->rowners[i] += a->rowners[i-1]; 995 } 996 a->rstart = a->rowners[a->rank]; 997 a->rend = a->rowners[a->rank+1]; 998 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 999 a->cowners[0] = 0; 1000 for (i=2; i<=a->size; i++) { 1001 a->cowners[i] += a->cowners[i-1]; 1002 } 1003 1004 /* build cache for off array entries formed */ 1005 a->donotstash = PETSC_FALSE; 1006 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1007 1008 /* stuff used for matrix vector multiply */ 1009 a->lvec = 0; 1010 a->Mvctx = 0; 1011 a->roworiented = PETSC_TRUE; 1012 1013 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1014 "MatGetDiagonalBlock_MPIDense", 1015 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 EXTERN_C_END 1019 1020 #undef __FUNC__ 1021 #define __FUNC__ /*<a name=""></a>*/"MatMPIDenseSetPreallocation" 1022 /*@C 1023 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1024 1025 Not collective 1026 1027 Input Parameters: 1028 . A - the matrix 1029 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1030 to control all matrix memory allocation. 1031 1032 Notes: 1033 The dense format is fully compatible with standard Fortran 77 1034 storage by columns. 1035 1036 The data input variable is intended primarily for Fortran programmers 1037 who wish to allocate their own matrix memory space. Most users should 1038 set data=PETSC_NULL. 1039 1040 Level: intermediate 1041 1042 .keywords: matrix,dense, parallel 1043 1044 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1045 @*/ 1046 int MatMPIDenseSetPreallocation(Mat mat,Scalar *data) 1047 { 1048 Mat_MPIDense *a; 1049 int ierr; 1050 PetscTruth flg2; 1051 1052 PetscFunctionBegin; 1053 ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr); 1054 if (!flg2) PetscFunctionReturn(0); 1055 mat->preallocated = PETSC_TRUE; 1056 /* Note: For now, when data is specified above, this assumes the user correctly 1057 allocates the local dense storage space. We should add error checking. */ 1058 1059 a = (Mat_MPIDense*)mat->data; 1060 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 1061 PLogObjectParent(mat,a->A); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNC__ 1066 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIDense" 1067 /*@C 1068 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1069 1070 Collective on MPI_Comm 1071 1072 Input Parameters: 1073 + comm - MPI communicator 1074 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1075 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1076 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1077 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1078 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1079 to control all matrix memory allocation. 1080 1081 Output Parameter: 1082 . A - the matrix 1083 1084 Notes: 1085 The dense format is fully compatible with standard Fortran 77 1086 storage by columns. 1087 1088 The data input variable is intended primarily for Fortran programmers 1089 who wish to allocate their own matrix memory space. Most users should 1090 set data=PETSC_NULL. 1091 1092 The user MUST specify either the local or global matrix dimensions 1093 (possibly both). 1094 1095 Level: intermediate 1096 1097 .keywords: matrix,dense, parallel 1098 1099 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1100 @*/ 1101 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 1102 { 1103 int ierr,size; 1104 1105 PetscFunctionBegin; 1106 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1107 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1108 if (size > 1) { 1109 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1110 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1111 } else { 1112 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1113 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1114 } 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNC__ 1119 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIDense" 1120 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1121 { 1122 Mat mat; 1123 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1124 int ierr; 1125 FactorCtx *factor; 1126 1127 PetscFunctionBegin; 1128 *newmat = 0; 1129 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1130 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1131 mat->data = (void*)(a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1132 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1133 mat->factor = A->factor; 1134 mat->assembled = PETSC_TRUE; 1135 mat->preallocated = PETSC_TRUE; 1136 1137 if (oldmat->factor) { 1138 a->factor = (FactorCtx*)(factor = PetscNew(FactorCtx));CHKPTRQ(factor); 1139 /* copy factor contents ... add this code! */ 1140 } else a->factor = 0; 1141 1142 a->rstart = oldmat->rstart; 1143 a->rend = oldmat->rend; 1144 a->size = oldmat->size; 1145 a->rank = oldmat->rank; 1146 mat->insertmode = NOT_SET_VALUES; 1147 a->nvec = oldmat->nvec; 1148 a->donotstash = oldmat->donotstash; 1149 a->rowners = (int*)PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1150 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1151 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1152 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1153 1154 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1155 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1156 PLogObjectParent(mat,a->A); 1157 *newmat = mat; 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #include "petscsys.h" 1162 1163 #undef __FUNC__ 1164 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIDense_DenseInFile" 1165 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1166 { 1167 int *rowners,i,size,rank,m,ierr,nz,j; 1168 Scalar *array,*vals,*vals_ptr; 1169 MPI_Status status; 1170 1171 PetscFunctionBegin; 1172 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1173 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1174 1175 /* determine ownership of all rows */ 1176 m = M/size + ((M % size) > rank); 1177 rowners = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1178 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1179 rowners[0] = 0; 1180 for (i=2; i<=size; i++) { 1181 rowners[i] += rowners[i-1]; 1182 } 1183 1184 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1185 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1186 1187 if (!rank) { 1188 vals = (Scalar*)PetscMalloc(m*N*sizeof(Scalar));CHKPTRQ(vals); 1189 1190 /* read in my part of the matrix numerical values */ 1191 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1192 1193 /* insert into matrix-by row (this is why cannot directly read into array */ 1194 vals_ptr = vals; 1195 for (i=0; i<m; i++) { 1196 for (j=0; j<N; j++) { 1197 array[i + j*m] = *vals_ptr++; 1198 } 1199 } 1200 1201 /* read in other processors and ship out */ 1202 for (i=1; i<size; i++) { 1203 nz = (rowners[i+1] - rowners[i])*N; 1204 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1205 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1206 } 1207 } else { 1208 /* receive numeric values */ 1209 vals = (Scalar*)PetscMalloc(m*N*sizeof(Scalar));CHKPTRQ(vals); 1210 1211 /* receive message of values*/ 1212 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1213 1214 /* insert into matrix-by row (this is why cannot directly read into array */ 1215 vals_ptr = vals; 1216 for (i=0; i<m; i++) { 1217 for (j=0; j<N; j++) { 1218 array[i + j*m] = *vals_ptr++; 1219 } 1220 } 1221 } 1222 ierr = PetscFree(rowners);CHKERRQ(ierr); 1223 ierr = PetscFree(vals);CHKERRQ(ierr); 1224 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1225 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 EXTERN_C_BEGIN 1230 #undef __FUNC__ 1231 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIDense" 1232 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1233 { 1234 Mat A; 1235 Scalar *vals,*svals; 1236 MPI_Comm comm = ((PetscObject)viewer)->comm; 1237 MPI_Status status; 1238 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1239 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1240 int tag = ((PetscObject)viewer)->tag; 1241 int i,nz,ierr,j,rstart,rend,fd; 1242 1243 PetscFunctionBegin; 1244 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1245 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1246 if (!rank) { 1247 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1248 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1249 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1250 } 1251 1252 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1253 M = header[1]; N = header[2]; nz = header[3]; 1254 1255 /* 1256 Handle case where matrix is stored on disk as a dense matrix 1257 */ 1258 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1259 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 /* determine ownership of all rows */ 1264 m = M/size + ((M % size) > rank); 1265 rowners = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1266 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1267 rowners[0] = 0; 1268 for (i=2; i<=size; i++) { 1269 rowners[i] += rowners[i-1]; 1270 } 1271 rstart = rowners[rank]; 1272 rend = rowners[rank+1]; 1273 1274 /* distribute row lengths to all processors */ 1275 ourlens = (int*)PetscMalloc(2*(rend-rstart)*sizeof(int));CHKPTRQ(ourlens); 1276 offlens = ourlens + (rend-rstart); 1277 if (!rank) { 1278 rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths); 1279 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1280 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 1281 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1282 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1283 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1284 } else { 1285 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1286 } 1287 1288 if (!rank) { 1289 /* calculate the number of nonzeros on each processor */ 1290 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 1291 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1292 for (i=0; i<size; i++) { 1293 for (j=rowners[i]; j< rowners[i+1]; j++) { 1294 procsnz[i] += rowlengths[j]; 1295 } 1296 } 1297 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1298 1299 /* determine max buffer needed and allocate it */ 1300 maxnz = 0; 1301 for (i=0; i<size; i++) { 1302 maxnz = PetscMax(maxnz,procsnz[i]); 1303 } 1304 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 1305 1306 /* read in my part of the matrix column indices */ 1307 nz = procsnz[0]; 1308 mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols); 1309 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1310 1311 /* read in every one elses and ship off */ 1312 for (i=1; i<size; i++) { 1313 nz = procsnz[i]; 1314 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1315 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1316 } 1317 ierr = PetscFree(cols);CHKERRQ(ierr); 1318 } else { 1319 /* determine buffer space needed for message */ 1320 nz = 0; 1321 for (i=0; i<m; i++) { 1322 nz += ourlens[i]; 1323 } 1324 mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols); 1325 1326 /* receive message of column indices*/ 1327 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1328 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1329 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1330 } 1331 1332 /* loop over local rows, determining number of off diagonal entries */ 1333 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1334 jj = 0; 1335 for (i=0; i<m; i++) { 1336 for (j=0; j<ourlens[i]; j++) { 1337 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1338 jj++; 1339 } 1340 } 1341 1342 /* create our matrix */ 1343 for (i=0; i<m; i++) { 1344 ourlens[i] -= offlens[i]; 1345 } 1346 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1347 A = *newmat; 1348 for (i=0; i<m; i++) { 1349 ourlens[i] += offlens[i]; 1350 } 1351 1352 if (!rank) { 1353 vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals); 1354 1355 /* read in my part of the matrix numerical values */ 1356 nz = procsnz[0]; 1357 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1358 1359 /* insert into matrix */ 1360 jj = rstart; 1361 smycols = mycols; 1362 svals = vals; 1363 for (i=0; i<m; i++) { 1364 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1365 smycols += ourlens[i]; 1366 svals += ourlens[i]; 1367 jj++; 1368 } 1369 1370 /* read in other processors and ship out */ 1371 for (i=1; i<size; i++) { 1372 nz = procsnz[i]; 1373 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1374 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1375 } 1376 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1377 } else { 1378 /* receive numeric values */ 1379 vals = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(vals); 1380 1381 /* receive message of values*/ 1382 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1383 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1384 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1385 1386 /* insert into matrix */ 1387 jj = rstart; 1388 smycols = mycols; 1389 svals = vals; 1390 for (i=0; i<m; i++) { 1391 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1392 smycols += ourlens[i]; 1393 svals += ourlens[i]; 1394 jj++; 1395 } 1396 } 1397 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1398 ierr = PetscFree(vals);CHKERRQ(ierr); 1399 ierr = PetscFree(mycols);CHKERRQ(ierr); 1400 ierr = PetscFree(rowners);CHKERRQ(ierr); 1401 1402 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1403 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 EXTERN_C_END 1407 1408 1409 1410 1411 1412