1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,ierr; 17 PetscScalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 27 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 28 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 29 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 30 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 33 *iscopy = PETSC_TRUE; 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatSetValues_MPIDense" 40 int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv) 41 { 42 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 43 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 44 PetscTruth roworiented = A->roworiented; 45 46 PetscFunctionBegin; 47 for (i=0; i<m; i++) { 48 if (idxm[i] < 0) continue; 49 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 50 if (idxm[i] >= rstart && idxm[i] < rend) { 51 row = idxm[i] - rstart; 52 if (roworiented) { 53 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 54 } else { 55 for (j=0; j<n; j++) { 56 if (idxn[j] < 0) continue; 57 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 58 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 59 } 60 } 61 } else { 62 if (!A->donotstash) { 63 if (roworiented) { 64 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 65 } else { 66 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 67 } 68 } 69 } 70 } 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetValues_MPIDense" 76 int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 79 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 80 81 PetscFunctionBegin; 82 for (i=0; i<m; i++) { 83 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 84 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 85 if (idxm[i] >= rstart && idxm[i] < rend) { 86 row = idxm[i] - rstart; 87 for (j=0; j<n; j++) { 88 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 89 if (idxn[j] >= mat->N) { 90 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 91 } 92 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 93 } 94 } else { 95 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 96 } 97 } 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatGetArray_MPIDense" 103 int MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 104 { 105 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 110 PetscFunctionReturn(0); 111 } 112 113 #undef __FUNCT__ 114 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 115 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 116 { 117 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 118 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 119 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 120 PetscScalar *av,*bv,*v = lmat->v; 121 Mat newmat; 122 123 PetscFunctionBegin; 124 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 125 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 126 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 127 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 128 129 /* No parallel redistribution currently supported! Should really check each index set 130 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 131 original matrix! */ 132 133 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 134 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 135 136 /* Check submatrix call */ 137 if (scall == MAT_REUSE_MATRIX) { 138 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 139 /* Really need to test rows and column sizes! */ 140 newmat = *B; 141 } else { 142 /* Create and fill new matrix */ 143 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 144 } 145 146 /* Now extract the data pointers and do the copy, column at a time */ 147 newmatd = (Mat_MPIDense*)newmat->data; 148 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 149 150 for (i=0; i<ncols; i++) { 151 av = v + nlrows*icol[i]; 152 for (j=0; j<nrows; j++) { 153 *bv++ = av[irow[j] - rstart]; 154 } 155 } 156 157 /* Assemble the matrices so that the correct flags are set */ 158 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 159 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 160 161 /* Free work space */ 162 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 163 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 164 *B = newmat; 165 PetscFunctionReturn(0); 166 } 167 168 #undef __FUNCT__ 169 #define __FUNCT__ "MatRestoreArray_MPIDense" 170 int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 171 { 172 PetscFunctionBegin; 173 PetscFunctionReturn(0); 174 } 175 176 #undef __FUNCT__ 177 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 178 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 179 { 180 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 181 MPI_Comm comm = mat->comm; 182 int ierr,nstash,reallocs; 183 InsertMode addv; 184 185 PetscFunctionBegin; 186 /* make sure all processors are either in INSERTMODE or ADDMODE */ 187 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 188 if (addv == (ADD_VALUES|INSERT_VALUES)) { 189 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 190 } 191 mat->insertmode = addv; /* in case this processor had no cache */ 192 193 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 194 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 195 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 196 PetscFunctionReturn(0); 197 } 198 199 #undef __FUNCT__ 200 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 201 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 202 { 203 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 204 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 205 PetscScalar *val; 206 InsertMode addv=mat->insertmode; 207 208 PetscFunctionBegin; 209 /* wait on receives */ 210 while (1) { 211 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 212 if (!flg) break; 213 214 for (i=0; i<n;) { 215 /* Now identify the consecutive vals belonging to the same row */ 216 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 217 if (j < n) ncols = j-i; 218 else ncols = n-i; 219 /* Now assemble all these values with a single function call */ 220 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 221 i = j; 222 } 223 } 224 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 225 226 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 227 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 228 229 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 230 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 231 } 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatZeroEntries_MPIDense" 237 int MatZeroEntries_MPIDense(Mat A) 238 { 239 int ierr; 240 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 241 242 PetscFunctionBegin; 243 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatGetBlockSize_MPIDense" 249 int MatGetBlockSize_MPIDense(Mat A,int *bs) 250 { 251 PetscFunctionBegin; 252 *bs = 1; 253 PetscFunctionReturn(0); 254 } 255 256 /* the code does not do the diagonal entries correctly unless the 257 matrix is square and the column and row owerships are identical. 258 This is a BUG. The only way to fix it seems to be to access 259 mdn->A and mdn->B directly and not through the MatZeroRows() 260 routine. 261 */ 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatZeroRows_MPIDense" 264 int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag) 265 { 266 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 267 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 268 int *nprocs,j,idx,nsends; 269 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 270 int *rvalues,tag = A->tag,count,base,slen,n,*source; 271 int *lens,imdex,*lrows,*values; 272 MPI_Comm comm = A->comm; 273 MPI_Request *send_waits,*recv_waits; 274 MPI_Status recv_status,*send_status; 275 IS istmp; 276 PetscTruth found; 277 278 PetscFunctionBegin; 279 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 280 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 281 282 /* first count number of contributors to each processor */ 283 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 284 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 285 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 286 for (i=0; i<N; i++) { 287 idx = rows[i]; 288 found = PETSC_FALSE; 289 for (j=0; j<size; j++) { 290 if (idx >= owners[j] && idx < owners[j+1]) { 291 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 292 } 293 } 294 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 295 } 296 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 297 298 /* inform other processors of number of messages and max length*/ 299 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 300 301 /* post receives: */ 302 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 303 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 304 for (i=0; i<nrecvs; i++) { 305 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 306 } 307 308 /* do sends: 309 1) starts[i] gives the starting index in svalues for stuff going to 310 the ith processor 311 */ 312 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 313 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 314 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 315 starts[0] = 0; 316 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 317 for (i=0; i<N; i++) { 318 svalues[starts[owner[i]]++] = rows[i]; 319 } 320 ISRestoreIndices(is,&rows); 321 322 starts[0] = 0; 323 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 324 count = 0; 325 for (i=0; i<size; i++) { 326 if (nprocs[2*i+1]) { 327 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 328 } 329 } 330 ierr = PetscFree(starts);CHKERRQ(ierr); 331 332 base = owners[rank]; 333 334 /* wait on receives */ 335 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 336 source = lens + nrecvs; 337 count = nrecvs; slen = 0; 338 while (count) { 339 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 340 /* unpack receives into our local space */ 341 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 342 source[imdex] = recv_status.MPI_SOURCE; 343 lens[imdex] = n; 344 slen += n; 345 count--; 346 } 347 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 348 349 /* move the data into the send scatter */ 350 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 351 count = 0; 352 for (i=0; i<nrecvs; i++) { 353 values = rvalues + i*nmax; 354 for (j=0; j<lens[i]; j++) { 355 lrows[count++] = values[j] - base; 356 } 357 } 358 ierr = PetscFree(rvalues);CHKERRQ(ierr); 359 ierr = PetscFree(lens);CHKERRQ(ierr); 360 ierr = PetscFree(owner);CHKERRQ(ierr); 361 ierr = PetscFree(nprocs);CHKERRQ(ierr); 362 363 /* actually zap the local rows */ 364 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 365 PetscLogObjectParent(A,istmp); 366 ierr = PetscFree(lrows);CHKERRQ(ierr); 367 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 368 ierr = ISDestroy(istmp);CHKERRQ(ierr); 369 370 /* wait on sends */ 371 if (nsends) { 372 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 373 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 374 ierr = PetscFree(send_status);CHKERRQ(ierr); 375 } 376 ierr = PetscFree(send_waits);CHKERRQ(ierr); 377 ierr = PetscFree(svalues);CHKERRQ(ierr); 378 379 PetscFunctionReturn(0); 380 } 381 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatMult_MPIDense" 384 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 385 { 386 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 387 int ierr; 388 389 PetscFunctionBegin; 390 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 391 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 392 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 393 PetscFunctionReturn(0); 394 } 395 396 #undef __FUNCT__ 397 #define __FUNCT__ "MatMultAdd_MPIDense" 398 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 399 { 400 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 401 int ierr; 402 403 PetscFunctionBegin; 404 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 405 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 406 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "MatMultTranspose_MPIDense" 412 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 413 { 414 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 415 int ierr; 416 PetscScalar zero = 0.0; 417 418 PetscFunctionBegin; 419 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 420 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 421 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 422 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 428 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 429 { 430 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 431 int ierr; 432 433 PetscFunctionBegin; 434 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 435 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 436 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 437 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 438 PetscFunctionReturn(0); 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatGetDiagonal_MPIDense" 443 int MatGetDiagonal_MPIDense(Mat A,Vec v) 444 { 445 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 446 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 447 int ierr,len,i,n,m = A->m,radd; 448 PetscScalar *x,zero = 0.0; 449 450 PetscFunctionBegin; 451 ierr = VecSet(&zero,v);CHKERRQ(ierr); 452 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 453 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 454 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 455 len = PetscMin(a->A->m,a->A->n); 456 radd = a->rstart*m; 457 for (i=0; i<len; i++) { 458 x[i] = aloc->v[radd + i*m + i]; 459 } 460 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatDestroy_MPIDense" 466 int MatDestroy_MPIDense(Mat mat) 467 { 468 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 469 int ierr; 470 471 PetscFunctionBegin; 472 473 #if defined(PETSC_USE_LOG) 474 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 475 #endif 476 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 477 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 478 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 479 if (mdn->lvec) VecDestroy(mdn->lvec); 480 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 481 if (mdn->factor) { 482 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 483 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 484 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 485 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 486 } 487 ierr = PetscFree(mdn);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatView_MPIDense_Binary" 493 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 494 { 495 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 496 int ierr; 497 498 PetscFunctionBegin; 499 if (mdn->size == 1) { 500 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 501 } 502 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 508 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 509 { 510 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 511 int ierr,size = mdn->size,rank = mdn->rank; 512 PetscViewerType vtype; 513 PetscTruth isascii,isdraw; 514 PetscViewer sviewer; 515 PetscViewerFormat format; 516 517 PetscFunctionBegin; 518 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 519 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 520 if (isascii) { 521 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 522 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 523 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 524 MatInfo info; 525 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 526 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 527 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 528 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 529 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 530 PetscFunctionReturn(0); 531 } else if (format == PETSC_VIEWER_ASCII_INFO) { 532 PetscFunctionReturn(0); 533 } 534 } else if (isdraw) { 535 PetscDraw draw; 536 PetscTruth isnull; 537 538 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 539 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 540 if (isnull) PetscFunctionReturn(0); 541 } 542 543 if (size == 1) { 544 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 545 } else { 546 /* assemble the entire matrix onto first processor. */ 547 Mat A; 548 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 549 PetscScalar *vals; 550 551 if (!rank) { 552 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 553 } else { 554 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 555 } 556 PetscLogObjectParent(mat,A); 557 558 /* Copy the matrix ... This isn't the most efficient means, 559 but it's quick for now */ 560 row = mdn->rstart; m = mdn->A->m; 561 for (i=0; i<m; i++) { 562 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 563 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 564 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 565 row++; 566 } 567 568 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 569 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 570 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 571 if (!rank) { 572 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 573 } 574 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 575 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 576 ierr = MatDestroy(A);CHKERRQ(ierr); 577 } 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatView_MPIDense" 583 int MatView_MPIDense(Mat mat,PetscViewer viewer) 584 { 585 int ierr; 586 PetscTruth isascii,isbinary,isdraw,issocket; 587 588 PetscFunctionBegin; 589 590 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 591 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 592 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 593 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 594 595 if (isascii || issocket || isdraw) { 596 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 597 } else if (isbinary) { 598 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 599 } else { 600 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 601 } 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatGetInfo_MPIDense" 607 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 608 { 609 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 610 Mat mdn = mat->A; 611 int ierr; 612 PetscReal isend[5],irecv[5]; 613 614 PetscFunctionBegin; 615 info->rows_global = (double)A->M; 616 info->columns_global = (double)A->N; 617 info->rows_local = (double)A->m; 618 info->columns_local = (double)A->N; 619 info->block_size = 1.0; 620 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 621 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 622 isend[3] = info->memory; isend[4] = info->mallocs; 623 if (flag == MAT_LOCAL) { 624 info->nz_used = isend[0]; 625 info->nz_allocated = isend[1]; 626 info->nz_unneeded = isend[2]; 627 info->memory = isend[3]; 628 info->mallocs = isend[4]; 629 } else if (flag == MAT_GLOBAL_MAX) { 630 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 631 info->nz_used = irecv[0]; 632 info->nz_allocated = irecv[1]; 633 info->nz_unneeded = irecv[2]; 634 info->memory = irecv[3]; 635 info->mallocs = irecv[4]; 636 } else if (flag == MAT_GLOBAL_SUM) { 637 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 638 info->nz_used = irecv[0]; 639 info->nz_allocated = irecv[1]; 640 info->nz_unneeded = irecv[2]; 641 info->memory = irecv[3]; 642 info->mallocs = irecv[4]; 643 } 644 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 645 info->fill_ratio_needed = 0; 646 info->factor_mallocs = 0; 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatSetOption_MPIDense" 652 int MatSetOption_MPIDense(Mat A,MatOption op) 653 { 654 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 655 int ierr; 656 657 PetscFunctionBegin; 658 switch (op) { 659 case MAT_NO_NEW_NONZERO_LOCATIONS: 660 case MAT_YES_NEW_NONZERO_LOCATIONS: 661 case MAT_NEW_NONZERO_LOCATION_ERR: 662 case MAT_NEW_NONZERO_ALLOCATION_ERR: 663 case MAT_COLUMNS_SORTED: 664 case MAT_COLUMNS_UNSORTED: 665 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 666 break; 667 case MAT_ROW_ORIENTED: 668 a->roworiented = PETSC_TRUE; 669 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 670 break; 671 case MAT_ROWS_SORTED: 672 case MAT_ROWS_UNSORTED: 673 case MAT_YES_NEW_DIAGONALS: 674 case MAT_USE_HASH_TABLE: 675 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 676 break; 677 case MAT_COLUMN_ORIENTED: 678 a->roworiented = PETSC_FALSE; 679 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 680 break; 681 case MAT_IGNORE_OFF_PROC_ENTRIES: 682 a->donotstash = PETSC_TRUE; 683 break; 684 case MAT_NO_NEW_DIAGONALS: 685 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 686 case MAT_SYMMETRIC: 687 case MAT_STRUCTURALLY_SYMMETRIC: 688 case MAT_NOT_SYMMETRIC: 689 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 690 case MAT_HERMITIAN: 691 case MAT_NOT_HERMITIAN: 692 case MAT_SYMMETRY_ETERNAL: 693 case MAT_NOT_SYMMETRY_ETERNAL: 694 break; 695 default: 696 SETERRQ(PETSC_ERR_SUP,"unknown option"); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatGetRow_MPIDense" 703 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v) 704 { 705 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 706 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 707 708 PetscFunctionBegin; 709 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 710 lrow = row - rstart; 711 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatRestoreRow_MPIDense" 717 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 718 { 719 int ierr; 720 721 PetscFunctionBegin; 722 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 723 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatDiagonalScale_MPIDense" 729 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 730 { 731 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 732 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 733 PetscScalar *l,*r,x,*v; 734 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 735 736 PetscFunctionBegin; 737 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 738 if (ll) { 739 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 740 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 741 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 742 for (i=0; i<m; i++) { 743 x = l[i]; 744 v = mat->v + i; 745 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 746 } 747 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 748 PetscLogFlops(n*m); 749 } 750 if (rr) { 751 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 752 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 753 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 754 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 755 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 756 for (i=0; i<n; i++) { 757 x = r[i]; 758 v = mat->v + i*m; 759 for (j=0; j<m; j++) { (*v++) *= x;} 760 } 761 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 762 PetscLogFlops(n*m); 763 } 764 PetscFunctionReturn(0); 765 } 766 767 #undef __FUNCT__ 768 #define __FUNCT__ "MatNorm_MPIDense" 769 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 770 { 771 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 772 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 773 int ierr,i,j; 774 PetscReal sum = 0.0; 775 PetscScalar *v = mat->v; 776 777 PetscFunctionBegin; 778 if (mdn->size == 1) { 779 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 780 } else { 781 if (type == NORM_FROBENIUS) { 782 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 783 #if defined(PETSC_USE_COMPLEX) 784 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 785 #else 786 sum += (*v)*(*v); v++; 787 #endif 788 } 789 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 790 *nrm = sqrt(*nrm); 791 PetscLogFlops(2*mdn->A->n*mdn->A->m); 792 } else if (type == NORM_1) { 793 PetscReal *tmp,*tmp2; 794 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 795 tmp2 = tmp + A->N; 796 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 797 *nrm = 0.0; 798 v = mat->v; 799 for (j=0; j<mdn->A->n; j++) { 800 for (i=0; i<mdn->A->m; i++) { 801 tmp[j] += PetscAbsScalar(*v); v++; 802 } 803 } 804 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 805 for (j=0; j<A->N; j++) { 806 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 807 } 808 ierr = PetscFree(tmp);CHKERRQ(ierr); 809 PetscLogFlops(A->n*A->m); 810 } else if (type == NORM_INFINITY) { /* max row norm */ 811 PetscReal ntemp; 812 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 813 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 814 } else { 815 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 816 } 817 } 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNCT__ 822 #define __FUNCT__ "MatTranspose_MPIDense" 823 int MatTranspose_MPIDense(Mat A,Mat *matout) 824 { 825 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 826 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 827 Mat B; 828 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 829 int j,i,ierr; 830 PetscScalar *v; 831 832 PetscFunctionBegin; 833 if (!matout && M != N) { 834 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 835 } 836 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 837 838 m = a->A->m; n = a->A->n; v = Aloc->v; 839 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 840 for (j=0; j<n; j++) { 841 for (i=0; i<m; i++) rwork[i] = rstart + i; 842 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 843 v += m; 844 } 845 ierr = PetscFree(rwork);CHKERRQ(ierr); 846 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 847 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 848 if (matout) { 849 *matout = B; 850 } else { 851 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 852 } 853 PetscFunctionReturn(0); 854 } 855 856 #include "petscblaslapack.h" 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatScale_MPIDense" 859 int MatScale_MPIDense(const PetscScalar *alpha,Mat inA) 860 { 861 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 862 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 863 int one = 1,nz; 864 865 PetscFunctionBegin; 866 nz = inA->m*inA->N; 867 BLscal_(&nz,(PetscScalar*)alpha,a->v,&one); 868 PetscLogFlops(nz); 869 PetscFunctionReturn(0); 870 } 871 872 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 876 int MatSetUpPreallocation_MPIDense(Mat A) 877 { 878 int ierr; 879 880 PetscFunctionBegin; 881 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 882 PetscFunctionReturn(0); 883 } 884 885 /* -------------------------------------------------------------------*/ 886 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 887 MatGetRow_MPIDense, 888 MatRestoreRow_MPIDense, 889 MatMult_MPIDense, 890 /* 4*/ MatMultAdd_MPIDense, 891 MatMultTranspose_MPIDense, 892 MatMultTransposeAdd_MPIDense, 893 0, 894 0, 895 0, 896 /*10*/ 0, 897 0, 898 0, 899 0, 900 MatTranspose_MPIDense, 901 /*15*/ MatGetInfo_MPIDense, 902 0, 903 MatGetDiagonal_MPIDense, 904 MatDiagonalScale_MPIDense, 905 MatNorm_MPIDense, 906 /*20*/ MatAssemblyBegin_MPIDense, 907 MatAssemblyEnd_MPIDense, 908 0, 909 MatSetOption_MPIDense, 910 MatZeroEntries_MPIDense, 911 /*25*/ MatZeroRows_MPIDense, 912 0, 913 0, 914 0, 915 0, 916 /*30*/ MatSetUpPreallocation_MPIDense, 917 0, 918 0, 919 MatGetArray_MPIDense, 920 MatRestoreArray_MPIDense, 921 /*35*/ MatDuplicate_MPIDense, 922 0, 923 0, 924 0, 925 0, 926 /*40*/ 0, 927 MatGetSubMatrices_MPIDense, 928 0, 929 MatGetValues_MPIDense, 930 0, 931 /*45*/ 0, 932 MatScale_MPIDense, 933 0, 934 0, 935 0, 936 /*50*/ MatGetBlockSize_MPIDense, 937 0, 938 0, 939 0, 940 0, 941 /*55*/ 0, 942 0, 943 0, 944 0, 945 0, 946 /*60*/ MatGetSubMatrix_MPIDense, 947 MatDestroy_MPIDense, 948 MatView_MPIDense, 949 MatGetPetscMaps_Petsc, 950 0, 951 /*65*/ 0, 952 0, 953 0, 954 0, 955 0, 956 /*70*/ 0, 957 0, 958 0, 959 0, 960 0, 961 /*75*/ 0, 962 0, 963 0, 964 0, 965 0, 966 /*80*/ 0, 967 0, 968 0, 969 0, 970 /*85*/ MatLoad_MPIDense}; 971 972 EXTERN_C_BEGIN 973 #undef __FUNCT__ 974 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 975 int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 976 { 977 Mat_MPIDense *a; 978 int ierr; 979 980 PetscFunctionBegin; 981 mat->preallocated = PETSC_TRUE; 982 /* Note: For now, when data is specified above, this assumes the user correctly 983 allocates the local dense storage space. We should add error checking. */ 984 985 a = (Mat_MPIDense*)mat->data; 986 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 987 PetscLogObjectParent(mat,a->A); 988 PetscFunctionReturn(0); 989 } 990 EXTERN_C_END 991 992 /*MC 993 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 994 995 Options Database Keys: 996 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 997 998 Level: beginner 999 1000 .seealso: MatCreateMPIDense 1001 M*/ 1002 1003 EXTERN_C_BEGIN 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatCreate_MPIDense" 1006 int MatCreate_MPIDense(Mat mat) 1007 { 1008 Mat_MPIDense *a; 1009 int ierr,i; 1010 1011 PetscFunctionBegin; 1012 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1013 mat->data = (void*)a; 1014 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1015 mat->factor = 0; 1016 mat->mapping = 0; 1017 1018 a->factor = 0; 1019 mat->insertmode = NOT_SET_VALUES; 1020 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 1021 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 1022 1023 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 1024 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 1025 a->nvec = mat->n; 1026 1027 /* the information in the maps duplicates the information computed below, eventually 1028 we should remove the duplicate information that is not contained in the maps */ 1029 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 1030 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 1031 1032 /* build local table of row and column ownerships */ 1033 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1034 a->cowners = a->rowners + a->size + 1; 1035 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1036 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1037 a->rowners[0] = 0; 1038 for (i=2; i<=a->size; i++) { 1039 a->rowners[i] += a->rowners[i-1]; 1040 } 1041 a->rstart = a->rowners[a->rank]; 1042 a->rend = a->rowners[a->rank+1]; 1043 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1044 a->cowners[0] = 0; 1045 for (i=2; i<=a->size; i++) { 1046 a->cowners[i] += a->cowners[i-1]; 1047 } 1048 1049 /* build cache for off array entries formed */ 1050 a->donotstash = PETSC_FALSE; 1051 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1052 1053 /* stuff used for matrix vector multiply */ 1054 a->lvec = 0; 1055 a->Mvctx = 0; 1056 a->roworiented = PETSC_TRUE; 1057 1058 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1059 "MatGetDiagonalBlock_MPIDense", 1060 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1061 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1062 "MatMPIDenseSetPreallocation_MPIDense", 1063 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 EXTERN_C_END 1067 1068 /*MC 1069 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1070 1071 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1072 and MATMPIDENSE otherwise. 1073 1074 Options Database Keys: 1075 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1076 1077 Level: beginner 1078 1079 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1080 M*/ 1081 1082 EXTERN_C_BEGIN 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatCreate_Dense" 1085 int MatCreate_Dense(Mat A) { 1086 int ierr,size; 1087 1088 PetscFunctionBegin; 1089 ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr); 1090 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1091 if (size == 1) { 1092 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1093 } else { 1094 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1095 } 1096 PetscFunctionReturn(0); 1097 } 1098 EXTERN_C_END 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1102 /*@C 1103 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1104 1105 Not collective 1106 1107 Input Parameters: 1108 . A - the matrix 1109 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1110 to control all matrix memory allocation. 1111 1112 Notes: 1113 The dense format is fully compatible with standard Fortran 77 1114 storage by columns. 1115 1116 The data input variable is intended primarily for Fortran programmers 1117 who wish to allocate their own matrix memory space. Most users should 1118 set data=PETSC_NULL. 1119 1120 Level: intermediate 1121 1122 .keywords: matrix,dense, parallel 1123 1124 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1125 @*/ 1126 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1127 { 1128 int ierr,(*f)(Mat,PetscScalar *); 1129 1130 PetscFunctionBegin; 1131 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1132 if (f) { 1133 ierr = (*f)(mat,data);CHKERRQ(ierr); 1134 } 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatCreateMPIDense" 1140 /*@C 1141 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1142 1143 Collective on MPI_Comm 1144 1145 Input Parameters: 1146 + comm - MPI communicator 1147 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1148 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1149 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1150 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1151 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1152 to control all matrix memory allocation. 1153 1154 Output Parameter: 1155 . A - the matrix 1156 1157 Notes: 1158 The dense format is fully compatible with standard Fortran 77 1159 storage by columns. 1160 1161 The data input variable is intended primarily for Fortran programmers 1162 who wish to allocate their own matrix memory space. Most users should 1163 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1164 1165 The user MUST specify either the local or global matrix dimensions 1166 (possibly both). 1167 1168 Level: intermediate 1169 1170 .keywords: matrix,dense, parallel 1171 1172 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1173 @*/ 1174 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A) 1175 { 1176 int ierr,size; 1177 1178 PetscFunctionBegin; 1179 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1180 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1181 if (size > 1) { 1182 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1183 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1184 } else { 1185 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1186 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1187 } 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatDuplicate_MPIDense" 1193 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1194 { 1195 Mat mat; 1196 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1197 int ierr; 1198 1199 PetscFunctionBegin; 1200 *newmat = 0; 1201 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1202 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1203 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1204 mat->data = (void*)a; 1205 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1206 mat->factor = A->factor; 1207 mat->assembled = PETSC_TRUE; 1208 mat->preallocated = PETSC_TRUE; 1209 1210 a->rstart = oldmat->rstart; 1211 a->rend = oldmat->rend; 1212 a->size = oldmat->size; 1213 a->rank = oldmat->rank; 1214 mat->insertmode = NOT_SET_VALUES; 1215 a->nvec = oldmat->nvec; 1216 a->donotstash = oldmat->donotstash; 1217 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1218 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1219 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1220 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1221 1222 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1223 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1224 PetscLogObjectParent(mat,a->A); 1225 *newmat = mat; 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #include "petscsys.h" 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1233 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1234 { 1235 int *rowners,i,size,rank,m,ierr,nz,j; 1236 PetscScalar *array,*vals,*vals_ptr; 1237 MPI_Status status; 1238 1239 PetscFunctionBegin; 1240 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1241 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1242 1243 /* determine ownership of all rows */ 1244 m = M/size + ((M % size) > rank); 1245 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1246 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1247 rowners[0] = 0; 1248 for (i=2; i<=size; i++) { 1249 rowners[i] += rowners[i-1]; 1250 } 1251 1252 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1253 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1254 1255 if (!rank) { 1256 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1257 1258 /* read in my part of the matrix numerical values */ 1259 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1260 1261 /* insert into matrix-by row (this is why cannot directly read into array */ 1262 vals_ptr = vals; 1263 for (i=0; i<m; i++) { 1264 for (j=0; j<N; j++) { 1265 array[i + j*m] = *vals_ptr++; 1266 } 1267 } 1268 1269 /* read in other processors and ship out */ 1270 for (i=1; i<size; i++) { 1271 nz = (rowners[i+1] - rowners[i])*N; 1272 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1273 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1274 } 1275 } else { 1276 /* receive numeric values */ 1277 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1278 1279 /* receive message of values*/ 1280 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1281 1282 /* insert into matrix-by row (this is why cannot directly read into array */ 1283 vals_ptr = vals; 1284 for (i=0; i<m; i++) { 1285 for (j=0; j<N; j++) { 1286 array[i + j*m] = *vals_ptr++; 1287 } 1288 } 1289 } 1290 ierr = PetscFree(rowners);CHKERRQ(ierr); 1291 ierr = PetscFree(vals);CHKERRQ(ierr); 1292 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1293 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatLoad_MPIDense" 1299 int MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1300 { 1301 Mat A; 1302 PetscScalar *vals,*svals; 1303 MPI_Comm comm = ((PetscObject)viewer)->comm; 1304 MPI_Status status; 1305 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1306 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1307 int tag = ((PetscObject)viewer)->tag; 1308 int i,nz,ierr,j,rstart,rend,fd; 1309 1310 PetscFunctionBegin; 1311 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1312 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1313 if (!rank) { 1314 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1315 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1316 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1317 } 1318 1319 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1320 M = header[1]; N = header[2]; nz = header[3]; 1321 1322 /* 1323 Handle case where matrix is stored on disk as a dense matrix 1324 */ 1325 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1326 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 /* determine ownership of all rows */ 1331 m = M/size + ((M % size) > rank); 1332 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1333 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1334 rowners[0] = 0; 1335 for (i=2; i<=size; i++) { 1336 rowners[i] += rowners[i-1]; 1337 } 1338 rstart = rowners[rank]; 1339 rend = rowners[rank+1]; 1340 1341 /* distribute row lengths to all processors */ 1342 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1343 offlens = ourlens + (rend-rstart); 1344 if (!rank) { 1345 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1346 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1347 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1348 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1349 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1350 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1351 } else { 1352 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1353 } 1354 1355 if (!rank) { 1356 /* calculate the number of nonzeros on each processor */ 1357 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1358 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1359 for (i=0; i<size; i++) { 1360 for (j=rowners[i]; j< rowners[i+1]; j++) { 1361 procsnz[i] += rowlengths[j]; 1362 } 1363 } 1364 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1365 1366 /* determine max buffer needed and allocate it */ 1367 maxnz = 0; 1368 for (i=0; i<size; i++) { 1369 maxnz = PetscMax(maxnz,procsnz[i]); 1370 } 1371 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1372 1373 /* read in my part of the matrix column indices */ 1374 nz = procsnz[0]; 1375 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1376 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1377 1378 /* read in every one elses and ship off */ 1379 for (i=1; i<size; i++) { 1380 nz = procsnz[i]; 1381 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1382 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1383 } 1384 ierr = PetscFree(cols);CHKERRQ(ierr); 1385 } else { 1386 /* determine buffer space needed for message */ 1387 nz = 0; 1388 for (i=0; i<m; i++) { 1389 nz += ourlens[i]; 1390 } 1391 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1392 1393 /* receive message of column indices*/ 1394 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1395 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1396 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1397 } 1398 1399 /* loop over local rows, determining number of off diagonal entries */ 1400 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1401 jj = 0; 1402 for (i=0; i<m; i++) { 1403 for (j=0; j<ourlens[i]; j++) { 1404 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1405 jj++; 1406 } 1407 } 1408 1409 /* create our matrix */ 1410 for (i=0; i<m; i++) { 1411 ourlens[i] -= offlens[i]; 1412 } 1413 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1414 A = *newmat; 1415 for (i=0; i<m; i++) { 1416 ourlens[i] += offlens[i]; 1417 } 1418 1419 if (!rank) { 1420 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1421 1422 /* read in my part of the matrix numerical values */ 1423 nz = procsnz[0]; 1424 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1425 1426 /* insert into matrix */ 1427 jj = rstart; 1428 smycols = mycols; 1429 svals = vals; 1430 for (i=0; i<m; i++) { 1431 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1432 smycols += ourlens[i]; 1433 svals += ourlens[i]; 1434 jj++; 1435 } 1436 1437 /* read in other processors and ship out */ 1438 for (i=1; i<size; i++) { 1439 nz = procsnz[i]; 1440 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1441 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1442 } 1443 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1444 } else { 1445 /* receive numeric values */ 1446 ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1447 1448 /* receive message of values*/ 1449 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1450 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1451 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1452 1453 /* insert into matrix */ 1454 jj = rstart; 1455 smycols = mycols; 1456 svals = vals; 1457 for (i=0; i<m; i++) { 1458 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1459 smycols += ourlens[i]; 1460 svals += ourlens[i]; 1461 jj++; 1462 } 1463 } 1464 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1465 ierr = PetscFree(vals);CHKERRQ(ierr); 1466 ierr = PetscFree(mycols);CHKERRQ(ierr); 1467 ierr = PetscFree(rowners);CHKERRQ(ierr); 1468 1469 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1470 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 1475 1476 1477 1478