1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,ierr; 17 PetscScalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 27 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 28 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 29 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 30 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 33 *iscopy = PETSC_TRUE; 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatSetValues_MPIDense" 40 int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv) 41 { 42 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 43 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 44 PetscTruth roworiented = A->roworiented; 45 46 PetscFunctionBegin; 47 for (i=0; i<m; i++) { 48 if (idxm[i] < 0) continue; 49 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 50 if (idxm[i] >= rstart && idxm[i] < rend) { 51 row = idxm[i] - rstart; 52 if (roworiented) { 53 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 54 } else { 55 for (j=0; j<n; j++) { 56 if (idxn[j] < 0) continue; 57 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 58 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 59 } 60 } 61 } else { 62 if (!A->donotstash) { 63 if (roworiented) { 64 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 65 } else { 66 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 67 } 68 } 69 } 70 } 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetValues_MPIDense" 76 int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 79 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 80 81 PetscFunctionBegin; 82 for (i=0; i<m; i++) { 83 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 84 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 85 if (idxm[i] >= rstart && idxm[i] < rend) { 86 row = idxm[i] - rstart; 87 for (j=0; j<n; j++) { 88 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 89 if (idxn[j] >= mat->N) { 90 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 91 } 92 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 93 } 94 } else { 95 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 96 } 97 } 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatGetArray_MPIDense" 103 int MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 104 { 105 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 110 PetscFunctionReturn(0); 111 } 112 113 #undef __FUNCT__ 114 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 115 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 116 { 117 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 118 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 119 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 120 PetscScalar *av,*bv,*v = lmat->v; 121 Mat newmat; 122 123 PetscFunctionBegin; 124 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 125 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 126 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 127 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 128 129 /* No parallel redistribution currently supported! Should really check each index set 130 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 131 original matrix! */ 132 133 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 134 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 135 136 /* Check submatrix call */ 137 if (scall == MAT_REUSE_MATRIX) { 138 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 139 /* Really need to test rows and column sizes! */ 140 newmat = *B; 141 } else { 142 /* Create and fill new matrix */ 143 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 144 } 145 146 /* Now extract the data pointers and do the copy, column at a time */ 147 newmatd = (Mat_MPIDense*)newmat->data; 148 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 149 150 for (i=0; i<ncols; i++) { 151 av = v + nlrows*icol[i]; 152 for (j=0; j<nrows; j++) { 153 *bv++ = av[irow[j] - rstart]; 154 } 155 } 156 157 /* Assemble the matrices so that the correct flags are set */ 158 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 159 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 160 161 /* Free work space */ 162 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 163 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 164 *B = newmat; 165 PetscFunctionReturn(0); 166 } 167 168 #undef __FUNCT__ 169 #define __FUNCT__ "MatRestoreArray_MPIDense" 170 int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 171 { 172 PetscFunctionBegin; 173 PetscFunctionReturn(0); 174 } 175 176 #undef __FUNCT__ 177 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 178 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 179 { 180 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 181 MPI_Comm comm = mat->comm; 182 int ierr,nstash,reallocs; 183 InsertMode addv; 184 185 PetscFunctionBegin; 186 /* make sure all processors are either in INSERTMODE or ADDMODE */ 187 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 188 if (addv == (ADD_VALUES|INSERT_VALUES)) { 189 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 190 } 191 mat->insertmode = addv; /* in case this processor had no cache */ 192 193 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 194 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 195 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 196 PetscFunctionReturn(0); 197 } 198 199 #undef __FUNCT__ 200 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 201 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 202 { 203 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 204 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 205 PetscScalar *val; 206 InsertMode addv=mat->insertmode; 207 208 PetscFunctionBegin; 209 /* wait on receives */ 210 while (1) { 211 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 212 if (!flg) break; 213 214 for (i=0; i<n;) { 215 /* Now identify the consecutive vals belonging to the same row */ 216 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 217 if (j < n) ncols = j-i; 218 else ncols = n-i; 219 /* Now assemble all these values with a single function call */ 220 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 221 i = j; 222 } 223 } 224 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 225 226 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 227 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 228 229 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 230 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 231 } 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatZeroEntries_MPIDense" 237 int MatZeroEntries_MPIDense(Mat A) 238 { 239 int ierr; 240 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 241 242 PetscFunctionBegin; 243 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatGetBlockSize_MPIDense" 249 int MatGetBlockSize_MPIDense(Mat A,int *bs) 250 { 251 PetscFunctionBegin; 252 *bs = 1; 253 PetscFunctionReturn(0); 254 } 255 256 /* the code does not do the diagonal entries correctly unless the 257 matrix is square and the column and row owerships are identical. 258 This is a BUG. The only way to fix it seems to be to access 259 mdn->A and mdn->B directly and not through the MatZeroRows() 260 routine. 261 */ 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatZeroRows_MPIDense" 264 int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag) 265 { 266 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 267 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 268 int *nprocs,j,idx,nsends; 269 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 270 int *rvalues,tag = A->tag,count,base,slen,n,*source; 271 int *lens,imdex,*lrows,*values; 272 MPI_Comm comm = A->comm; 273 MPI_Request *send_waits,*recv_waits; 274 MPI_Status recv_status,*send_status; 275 IS istmp; 276 PetscTruth found; 277 278 PetscFunctionBegin; 279 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 280 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 281 282 /* first count number of contributors to each processor */ 283 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 284 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 285 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 286 for (i=0; i<N; i++) { 287 idx = rows[i]; 288 found = PETSC_FALSE; 289 for (j=0; j<size; j++) { 290 if (idx >= owners[j] && idx < owners[j+1]) { 291 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 292 } 293 } 294 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 295 } 296 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 297 298 /* inform other processors of number of messages and max length*/ 299 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 300 301 /* post receives: */ 302 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 303 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 304 for (i=0; i<nrecvs; i++) { 305 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 306 } 307 308 /* do sends: 309 1) starts[i] gives the starting index in svalues for stuff going to 310 the ith processor 311 */ 312 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 313 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 314 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 315 starts[0] = 0; 316 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 317 for (i=0; i<N; i++) { 318 svalues[starts[owner[i]]++] = rows[i]; 319 } 320 ISRestoreIndices(is,&rows); 321 322 starts[0] = 0; 323 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 324 count = 0; 325 for (i=0; i<size; i++) { 326 if (nprocs[2*i+1]) { 327 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 328 } 329 } 330 ierr = PetscFree(starts);CHKERRQ(ierr); 331 332 base = owners[rank]; 333 334 /* wait on receives */ 335 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 336 source = lens + nrecvs; 337 count = nrecvs; slen = 0; 338 while (count) { 339 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 340 /* unpack receives into our local space */ 341 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 342 source[imdex] = recv_status.MPI_SOURCE; 343 lens[imdex] = n; 344 slen += n; 345 count--; 346 } 347 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 348 349 /* move the data into the send scatter */ 350 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 351 count = 0; 352 for (i=0; i<nrecvs; i++) { 353 values = rvalues + i*nmax; 354 for (j=0; j<lens[i]; j++) { 355 lrows[count++] = values[j] - base; 356 } 357 } 358 ierr = PetscFree(rvalues);CHKERRQ(ierr); 359 ierr = PetscFree(lens);CHKERRQ(ierr); 360 ierr = PetscFree(owner);CHKERRQ(ierr); 361 ierr = PetscFree(nprocs);CHKERRQ(ierr); 362 363 /* actually zap the local rows */ 364 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 365 PetscLogObjectParent(A,istmp); 366 ierr = PetscFree(lrows);CHKERRQ(ierr); 367 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 368 ierr = ISDestroy(istmp);CHKERRQ(ierr); 369 370 /* wait on sends */ 371 if (nsends) { 372 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 373 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 374 ierr = PetscFree(send_status);CHKERRQ(ierr); 375 } 376 ierr = PetscFree(send_waits);CHKERRQ(ierr); 377 ierr = PetscFree(svalues);CHKERRQ(ierr); 378 379 PetscFunctionReturn(0); 380 } 381 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatMult_MPIDense" 384 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 385 { 386 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 387 int ierr; 388 389 PetscFunctionBegin; 390 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 391 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 392 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 393 PetscFunctionReturn(0); 394 } 395 396 #undef __FUNCT__ 397 #define __FUNCT__ "MatMultAdd_MPIDense" 398 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 399 { 400 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 401 int ierr; 402 403 PetscFunctionBegin; 404 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 405 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 406 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "MatMultTranspose_MPIDense" 412 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 413 { 414 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 415 int ierr; 416 PetscScalar zero = 0.0; 417 418 PetscFunctionBegin; 419 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 420 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 421 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 422 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 428 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 429 { 430 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 431 int ierr; 432 433 PetscFunctionBegin; 434 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 435 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 436 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 437 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 438 PetscFunctionReturn(0); 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatGetDiagonal_MPIDense" 443 int MatGetDiagonal_MPIDense(Mat A,Vec v) 444 { 445 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 446 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 447 int ierr,len,i,n,m = A->m,radd; 448 PetscScalar *x,zero = 0.0; 449 450 PetscFunctionBegin; 451 ierr = VecSet(&zero,v);CHKERRQ(ierr); 452 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 453 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 454 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 455 len = PetscMin(a->A->m,a->A->n); 456 radd = a->rstart*m; 457 for (i=0; i<len; i++) { 458 x[i] = aloc->v[radd + i*m + i]; 459 } 460 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatDestroy_MPIDense" 466 int MatDestroy_MPIDense(Mat mat) 467 { 468 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 469 int ierr; 470 471 PetscFunctionBegin; 472 473 #if defined(PETSC_USE_LOG) 474 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 475 #endif 476 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 477 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 478 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 479 if (mdn->lvec) VecDestroy(mdn->lvec); 480 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 481 if (mdn->factor) { 482 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 483 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 484 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 485 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 486 } 487 ierr = PetscFree(mdn);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatView_MPIDense_Binary" 493 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 494 { 495 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 496 int ierr; 497 498 PetscFunctionBegin; 499 if (mdn->size == 1) { 500 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 501 } 502 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 508 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 509 { 510 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 511 int ierr,size = mdn->size,rank = mdn->rank; 512 PetscViewerType vtype; 513 PetscTruth isascii,isdraw; 514 PetscViewer sviewer; 515 PetscViewerFormat format; 516 517 PetscFunctionBegin; 518 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 519 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 520 if (isascii) { 521 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 522 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 523 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 524 MatInfo info; 525 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 526 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 527 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 528 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 529 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 530 PetscFunctionReturn(0); 531 } else if (format == PETSC_VIEWER_ASCII_INFO) { 532 PetscFunctionReturn(0); 533 } 534 } else if (isdraw) { 535 PetscDraw draw; 536 PetscTruth isnull; 537 538 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 539 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 540 if (isnull) PetscFunctionReturn(0); 541 } 542 543 if (size == 1) { 544 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 545 } else { 546 /* assemble the entire matrix onto first processor. */ 547 Mat A; 548 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 549 PetscScalar *vals; 550 551 if (!rank) { 552 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 553 } else { 554 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 555 } 556 PetscLogObjectParent(mat,A); 557 558 /* Copy the matrix ... This isn't the most efficient means, 559 but it's quick for now */ 560 row = mdn->rstart; m = mdn->A->m; 561 for (i=0; i<m; i++) { 562 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 563 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 564 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 565 row++; 566 } 567 568 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 569 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 570 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 571 if (!rank) { 572 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 573 } 574 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 575 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 576 ierr = MatDestroy(A);CHKERRQ(ierr); 577 } 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatView_MPIDense" 583 int MatView_MPIDense(Mat mat,PetscViewer viewer) 584 { 585 int ierr; 586 PetscTruth isascii,isbinary,isdraw,issocket; 587 588 PetscFunctionBegin; 589 590 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 591 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 592 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 593 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 594 595 if (isascii || issocket || isdraw) { 596 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 597 } else if (isbinary) { 598 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 599 } else { 600 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 601 } 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatGetInfo_MPIDense" 607 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 608 { 609 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 610 Mat mdn = mat->A; 611 int ierr; 612 PetscReal isend[5],irecv[5]; 613 614 PetscFunctionBegin; 615 info->rows_global = (double)A->M; 616 info->columns_global = (double)A->N; 617 info->rows_local = (double)A->m; 618 info->columns_local = (double)A->N; 619 info->block_size = 1.0; 620 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 621 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 622 isend[3] = info->memory; isend[4] = info->mallocs; 623 if (flag == MAT_LOCAL) { 624 info->nz_used = isend[0]; 625 info->nz_allocated = isend[1]; 626 info->nz_unneeded = isend[2]; 627 info->memory = isend[3]; 628 info->mallocs = isend[4]; 629 } else if (flag == MAT_GLOBAL_MAX) { 630 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 631 info->nz_used = irecv[0]; 632 info->nz_allocated = irecv[1]; 633 info->nz_unneeded = irecv[2]; 634 info->memory = irecv[3]; 635 info->mallocs = irecv[4]; 636 } else if (flag == MAT_GLOBAL_SUM) { 637 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 638 info->nz_used = irecv[0]; 639 info->nz_allocated = irecv[1]; 640 info->nz_unneeded = irecv[2]; 641 info->memory = irecv[3]; 642 info->mallocs = irecv[4]; 643 } 644 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 645 info->fill_ratio_needed = 0; 646 info->factor_mallocs = 0; 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatSetOption_MPIDense" 652 int MatSetOption_MPIDense(Mat A,MatOption op) 653 { 654 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 655 int ierr; 656 657 PetscFunctionBegin; 658 switch (op) { 659 case MAT_NO_NEW_NONZERO_LOCATIONS: 660 case MAT_YES_NEW_NONZERO_LOCATIONS: 661 case MAT_NEW_NONZERO_LOCATION_ERR: 662 case MAT_NEW_NONZERO_ALLOCATION_ERR: 663 case MAT_COLUMNS_SORTED: 664 case MAT_COLUMNS_UNSORTED: 665 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 666 break; 667 case MAT_ROW_ORIENTED: 668 a->roworiented = PETSC_TRUE; 669 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 670 break; 671 case MAT_ROWS_SORTED: 672 case MAT_ROWS_UNSORTED: 673 case MAT_YES_NEW_DIAGONALS: 674 case MAT_USE_HASH_TABLE: 675 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 676 break; 677 case MAT_COLUMN_ORIENTED: 678 a->roworiented = PETSC_FALSE; 679 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 680 break; 681 case MAT_IGNORE_OFF_PROC_ENTRIES: 682 a->donotstash = PETSC_TRUE; 683 break; 684 case MAT_NO_NEW_DIAGONALS: 685 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 686 default: 687 SETERRQ(PETSC_ERR_SUP,"unknown option"); 688 } 689 PetscFunctionReturn(0); 690 } 691 692 #undef __FUNCT__ 693 #define __FUNCT__ "MatGetRow_MPIDense" 694 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v) 695 { 696 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 697 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 698 699 PetscFunctionBegin; 700 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 701 lrow = row - rstart; 702 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } 705 706 #undef __FUNCT__ 707 #define __FUNCT__ "MatRestoreRow_MPIDense" 708 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 709 { 710 int ierr; 711 712 PetscFunctionBegin; 713 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 714 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 715 PetscFunctionReturn(0); 716 } 717 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatDiagonalScale_MPIDense" 720 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 721 { 722 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 723 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 724 PetscScalar *l,*r,x,*v; 725 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 726 727 PetscFunctionBegin; 728 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 729 if (ll) { 730 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 731 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 732 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 733 for (i=0; i<m; i++) { 734 x = l[i]; 735 v = mat->v + i; 736 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 737 } 738 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 739 PetscLogFlops(n*m); 740 } 741 if (rr) { 742 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 743 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 744 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 745 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 746 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 747 for (i=0; i<n; i++) { 748 x = r[i]; 749 v = mat->v + i*m; 750 for (j=0; j<m; j++) { (*v++) *= x;} 751 } 752 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 753 PetscLogFlops(n*m); 754 } 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatNorm_MPIDense" 760 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 761 { 762 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 763 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 764 int ierr,i,j; 765 PetscReal sum = 0.0; 766 PetscScalar *v = mat->v; 767 768 PetscFunctionBegin; 769 if (mdn->size == 1) { 770 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 771 } else { 772 if (type == NORM_FROBENIUS) { 773 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 774 #if defined(PETSC_USE_COMPLEX) 775 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 776 #else 777 sum += (*v)*(*v); v++; 778 #endif 779 } 780 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 781 *nrm = sqrt(*nrm); 782 PetscLogFlops(2*mdn->A->n*mdn->A->m); 783 } else if (type == NORM_1) { 784 PetscReal *tmp,*tmp2; 785 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 786 tmp2 = tmp + A->N; 787 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 788 *nrm = 0.0; 789 v = mat->v; 790 for (j=0; j<mdn->A->n; j++) { 791 for (i=0; i<mdn->A->m; i++) { 792 tmp[j] += PetscAbsScalar(*v); v++; 793 } 794 } 795 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 796 for (j=0; j<A->N; j++) { 797 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 798 } 799 ierr = PetscFree(tmp);CHKERRQ(ierr); 800 PetscLogFlops(A->n*A->m); 801 } else if (type == NORM_INFINITY) { /* max row norm */ 802 PetscReal ntemp; 803 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 804 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 805 } else { 806 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 807 } 808 } 809 PetscFunctionReturn(0); 810 } 811 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatTranspose_MPIDense" 814 int MatTranspose_MPIDense(Mat A,Mat *matout) 815 { 816 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 817 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 818 Mat B; 819 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 820 int j,i,ierr; 821 PetscScalar *v; 822 823 PetscFunctionBegin; 824 if (!matout && M != N) { 825 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 826 } 827 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 828 829 m = a->A->m; n = a->A->n; v = Aloc->v; 830 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 831 for (j=0; j<n; j++) { 832 for (i=0; i<m; i++) rwork[i] = rstart + i; 833 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 834 v += m; 835 } 836 ierr = PetscFree(rwork);CHKERRQ(ierr); 837 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 839 if (matout) { 840 *matout = B; 841 } else { 842 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 843 } 844 PetscFunctionReturn(0); 845 } 846 847 #include "petscblaslapack.h" 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatScale_MPIDense" 850 int MatScale_MPIDense(const PetscScalar *alpha,Mat inA) 851 { 852 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 853 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 854 int one = 1,nz; 855 856 PetscFunctionBegin; 857 nz = inA->m*inA->N; 858 BLscal_(&nz,(PetscScalar*)alpha,a->v,&one); 859 PetscLogFlops(nz); 860 PetscFunctionReturn(0); 861 } 862 863 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 867 int MatSetUpPreallocation_MPIDense(Mat A) 868 { 869 int ierr; 870 871 PetscFunctionBegin; 872 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 873 PetscFunctionReturn(0); 874 } 875 876 /* -------------------------------------------------------------------*/ 877 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 878 MatGetRow_MPIDense, 879 MatRestoreRow_MPIDense, 880 MatMult_MPIDense, 881 /* 4*/ MatMultAdd_MPIDense, 882 MatMultTranspose_MPIDense, 883 MatMultTransposeAdd_MPIDense, 884 0, 885 0, 886 0, 887 /*10*/ 0, 888 0, 889 0, 890 0, 891 MatTranspose_MPIDense, 892 /*15*/ MatGetInfo_MPIDense, 893 0, 894 MatGetDiagonal_MPIDense, 895 MatDiagonalScale_MPIDense, 896 MatNorm_MPIDense, 897 /*20*/ MatAssemblyBegin_MPIDense, 898 MatAssemblyEnd_MPIDense, 899 0, 900 MatSetOption_MPIDense, 901 MatZeroEntries_MPIDense, 902 /*25*/ MatZeroRows_MPIDense, 903 0, 904 0, 905 0, 906 0, 907 /*30*/ MatSetUpPreallocation_MPIDense, 908 0, 909 0, 910 MatGetArray_MPIDense, 911 MatRestoreArray_MPIDense, 912 /*35*/ MatDuplicate_MPIDense, 913 0, 914 0, 915 0, 916 0, 917 /*40*/ 0, 918 MatGetSubMatrices_MPIDense, 919 0, 920 MatGetValues_MPIDense, 921 0, 922 /*45*/ 0, 923 MatScale_MPIDense, 924 0, 925 0, 926 0, 927 /*50*/ MatGetBlockSize_MPIDense, 928 0, 929 0, 930 0, 931 0, 932 /*55*/ 0, 933 0, 934 0, 935 0, 936 0, 937 /*60*/ MatGetSubMatrix_MPIDense, 938 MatDestroy_MPIDense, 939 MatView_MPIDense, 940 MatGetPetscMaps_Petsc, 941 0, 942 /*65*/ 0, 943 0, 944 0, 945 0, 946 0, 947 /*70*/ 0, 948 0, 949 0, 950 0, 951 0, 952 /*75*/ 0, 953 0, 954 0, 955 0, 956 0, 957 /*80*/ 0, 958 0, 959 0, 960 0, 961 0, 962 /*85*/ MatLoad_MPIDense}; 963 964 EXTERN_C_BEGIN 965 #undef __FUNCT__ 966 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 967 int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 968 { 969 Mat_MPIDense *a; 970 int ierr; 971 972 PetscFunctionBegin; 973 mat->preallocated = PETSC_TRUE; 974 /* Note: For now, when data is specified above, this assumes the user correctly 975 allocates the local dense storage space. We should add error checking. */ 976 977 a = (Mat_MPIDense*)mat->data; 978 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 979 PetscLogObjectParent(mat,a->A); 980 PetscFunctionReturn(0); 981 } 982 EXTERN_C_END 983 984 /*MC 985 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 986 987 Options Database Keys: 988 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 989 990 Level: beginner 991 992 .seealso: MatCreateMPIDense 993 M*/ 994 995 EXTERN_C_BEGIN 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatCreate_MPIDense" 998 int MatCreate_MPIDense(Mat mat) 999 { 1000 Mat_MPIDense *a; 1001 int ierr,i; 1002 1003 PetscFunctionBegin; 1004 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1005 mat->data = (void*)a; 1006 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1007 mat->factor = 0; 1008 mat->mapping = 0; 1009 1010 a->factor = 0; 1011 mat->insertmode = NOT_SET_VALUES; 1012 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 1013 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 1014 1015 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 1016 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 1017 a->nvec = mat->n; 1018 1019 /* the information in the maps duplicates the information computed below, eventually 1020 we should remove the duplicate information that is not contained in the maps */ 1021 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 1022 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 1023 1024 /* build local table of row and column ownerships */ 1025 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1026 a->cowners = a->rowners + a->size + 1; 1027 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1028 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1029 a->rowners[0] = 0; 1030 for (i=2; i<=a->size; i++) { 1031 a->rowners[i] += a->rowners[i-1]; 1032 } 1033 a->rstart = a->rowners[a->rank]; 1034 a->rend = a->rowners[a->rank+1]; 1035 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1036 a->cowners[0] = 0; 1037 for (i=2; i<=a->size; i++) { 1038 a->cowners[i] += a->cowners[i-1]; 1039 } 1040 1041 /* build cache for off array entries formed */ 1042 a->donotstash = PETSC_FALSE; 1043 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1044 1045 /* stuff used for matrix vector multiply */ 1046 a->lvec = 0; 1047 a->Mvctx = 0; 1048 a->roworiented = PETSC_TRUE; 1049 1050 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1051 "MatGetDiagonalBlock_MPIDense", 1052 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1053 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1054 "MatMPIDenseSetPreallocation_MPIDense", 1055 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1056 PetscFunctionReturn(0); 1057 } 1058 EXTERN_C_END 1059 1060 /*MC 1061 MATDENSE = "dense" - A matrix type to be used for dense matrices. 1062 1063 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1064 and MATMPIDENSE otherwise. 1065 1066 Options Database Keys: 1067 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1068 1069 Level: beginner 1070 1071 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1072 M*/ 1073 1074 EXTERN_C_BEGIN 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatCreate_Dense" 1077 int MatCreate_Dense(Mat A) { 1078 int ierr,size; 1079 1080 PetscFunctionBegin; 1081 ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr); 1082 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1083 if (size == 1) { 1084 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1085 } else { 1086 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1087 } 1088 PetscFunctionReturn(0); 1089 } 1090 EXTERN_C_END 1091 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1094 /*@C 1095 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1096 1097 Not collective 1098 1099 Input Parameters: 1100 . A - the matrix 1101 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1102 to control all matrix memory allocation. 1103 1104 Notes: 1105 The dense format is fully compatible with standard Fortran 77 1106 storage by columns. 1107 1108 The data input variable is intended primarily for Fortran programmers 1109 who wish to allocate their own matrix memory space. Most users should 1110 set data=PETSC_NULL. 1111 1112 Level: intermediate 1113 1114 .keywords: matrix,dense, parallel 1115 1116 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1117 @*/ 1118 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1119 { 1120 int ierr,(*f)(Mat,PetscScalar *); 1121 1122 PetscFunctionBegin; 1123 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1124 if (f) { 1125 ierr = (*f)(mat,data);CHKERRQ(ierr); 1126 } 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatCreateMPIDense" 1132 /*@C 1133 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1134 1135 Collective on MPI_Comm 1136 1137 Input Parameters: 1138 + comm - MPI communicator 1139 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1140 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1141 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1142 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1143 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1144 to control all matrix memory allocation. 1145 1146 Output Parameter: 1147 . A - the matrix 1148 1149 Notes: 1150 The dense format is fully compatible with standard Fortran 77 1151 storage by columns. 1152 1153 The data input variable is intended primarily for Fortran programmers 1154 who wish to allocate their own matrix memory space. Most users should 1155 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1156 1157 The user MUST specify either the local or global matrix dimensions 1158 (possibly both). 1159 1160 Level: intermediate 1161 1162 .keywords: matrix,dense, parallel 1163 1164 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1165 @*/ 1166 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A) 1167 { 1168 int ierr,size; 1169 1170 PetscFunctionBegin; 1171 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1172 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1173 if (size > 1) { 1174 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1175 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1176 } else { 1177 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1178 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1179 } 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatDuplicate_MPIDense" 1185 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1186 { 1187 Mat mat; 1188 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1189 int ierr; 1190 1191 PetscFunctionBegin; 1192 *newmat = 0; 1193 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1194 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1195 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1196 mat->data = (void*)a; 1197 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1198 mat->factor = A->factor; 1199 mat->assembled = PETSC_TRUE; 1200 mat->preallocated = PETSC_TRUE; 1201 1202 a->rstart = oldmat->rstart; 1203 a->rend = oldmat->rend; 1204 a->size = oldmat->size; 1205 a->rank = oldmat->rank; 1206 mat->insertmode = NOT_SET_VALUES; 1207 a->nvec = oldmat->nvec; 1208 a->donotstash = oldmat->donotstash; 1209 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1210 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1211 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1212 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1213 1214 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1215 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1216 PetscLogObjectParent(mat,a->A); 1217 *newmat = mat; 1218 PetscFunctionReturn(0); 1219 } 1220 1221 #include "petscsys.h" 1222 1223 #undef __FUNCT__ 1224 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1225 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1226 { 1227 int *rowners,i,size,rank,m,ierr,nz,j; 1228 PetscScalar *array,*vals,*vals_ptr; 1229 MPI_Status status; 1230 1231 PetscFunctionBegin; 1232 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1233 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1234 1235 /* determine ownership of all rows */ 1236 m = M/size + ((M % size) > rank); 1237 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1238 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1239 rowners[0] = 0; 1240 for (i=2; i<=size; i++) { 1241 rowners[i] += rowners[i-1]; 1242 } 1243 1244 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1245 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1246 1247 if (!rank) { 1248 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1249 1250 /* read in my part of the matrix numerical values */ 1251 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1252 1253 /* insert into matrix-by row (this is why cannot directly read into array */ 1254 vals_ptr = vals; 1255 for (i=0; i<m; i++) { 1256 for (j=0; j<N; j++) { 1257 array[i + j*m] = *vals_ptr++; 1258 } 1259 } 1260 1261 /* read in other processors and ship out */ 1262 for (i=1; i<size; i++) { 1263 nz = (rowners[i+1] - rowners[i])*N; 1264 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1265 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1266 } 1267 } else { 1268 /* receive numeric values */ 1269 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1270 1271 /* receive message of values*/ 1272 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1273 1274 /* insert into matrix-by row (this is why cannot directly read into array */ 1275 vals_ptr = vals; 1276 for (i=0; i<m; i++) { 1277 for (j=0; j<N; j++) { 1278 array[i + j*m] = *vals_ptr++; 1279 } 1280 } 1281 } 1282 ierr = PetscFree(rowners);CHKERRQ(ierr); 1283 ierr = PetscFree(vals);CHKERRQ(ierr); 1284 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1285 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "MatLoad_MPIDense" 1291 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat) 1292 { 1293 Mat A; 1294 PetscScalar *vals,*svals; 1295 MPI_Comm comm = ((PetscObject)viewer)->comm; 1296 MPI_Status status; 1297 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1298 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1299 int tag = ((PetscObject)viewer)->tag; 1300 int i,nz,ierr,j,rstart,rend,fd; 1301 1302 PetscFunctionBegin; 1303 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1304 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1305 if (!rank) { 1306 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1307 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1308 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1309 } 1310 1311 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1312 M = header[1]; N = header[2]; nz = header[3]; 1313 1314 /* 1315 Handle case where matrix is stored on disk as a dense matrix 1316 */ 1317 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1318 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 /* determine ownership of all rows */ 1323 m = M/size + ((M % size) > rank); 1324 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1325 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1326 rowners[0] = 0; 1327 for (i=2; i<=size; i++) { 1328 rowners[i] += rowners[i-1]; 1329 } 1330 rstart = rowners[rank]; 1331 rend = rowners[rank+1]; 1332 1333 /* distribute row lengths to all processors */ 1334 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1335 offlens = ourlens + (rend-rstart); 1336 if (!rank) { 1337 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1338 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1339 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1340 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1341 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1342 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1343 } else { 1344 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1345 } 1346 1347 if (!rank) { 1348 /* calculate the number of nonzeros on each processor */ 1349 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1350 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1351 for (i=0; i<size; i++) { 1352 for (j=rowners[i]; j< rowners[i+1]; j++) { 1353 procsnz[i] += rowlengths[j]; 1354 } 1355 } 1356 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1357 1358 /* determine max buffer needed and allocate it */ 1359 maxnz = 0; 1360 for (i=0; i<size; i++) { 1361 maxnz = PetscMax(maxnz,procsnz[i]); 1362 } 1363 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1364 1365 /* read in my part of the matrix column indices */ 1366 nz = procsnz[0]; 1367 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1368 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1369 1370 /* read in every one elses and ship off */ 1371 for (i=1; i<size; i++) { 1372 nz = procsnz[i]; 1373 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1374 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1375 } 1376 ierr = PetscFree(cols);CHKERRQ(ierr); 1377 } else { 1378 /* determine buffer space needed for message */ 1379 nz = 0; 1380 for (i=0; i<m; i++) { 1381 nz += ourlens[i]; 1382 } 1383 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1384 1385 /* receive message of column indices*/ 1386 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1387 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1388 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1389 } 1390 1391 /* loop over local rows, determining number of off diagonal entries */ 1392 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1393 jj = 0; 1394 for (i=0; i<m; i++) { 1395 for (j=0; j<ourlens[i]; j++) { 1396 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1397 jj++; 1398 } 1399 } 1400 1401 /* create our matrix */ 1402 for (i=0; i<m; i++) { 1403 ourlens[i] -= offlens[i]; 1404 } 1405 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1406 A = *newmat; 1407 for (i=0; i<m; i++) { 1408 ourlens[i] += offlens[i]; 1409 } 1410 1411 if (!rank) { 1412 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1413 1414 /* read in my part of the matrix numerical values */ 1415 nz = procsnz[0]; 1416 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1417 1418 /* insert into matrix */ 1419 jj = rstart; 1420 smycols = mycols; 1421 svals = vals; 1422 for (i=0; i<m; i++) { 1423 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1424 smycols += ourlens[i]; 1425 svals += ourlens[i]; 1426 jj++; 1427 } 1428 1429 /* read in other processors and ship out */ 1430 for (i=1; i<size; i++) { 1431 nz = procsnz[i]; 1432 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1433 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1434 } 1435 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1436 } else { 1437 /* receive numeric values */ 1438 ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1439 1440 /* receive message of values*/ 1441 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1442 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1443 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1444 1445 /* insert into matrix */ 1446 jj = rstart; 1447 smycols = mycols; 1448 svals = vals; 1449 for (i=0; i<m; i++) { 1450 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1451 smycols += ourlens[i]; 1452 svals += ourlens[i]; 1453 jj++; 1454 } 1455 } 1456 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1457 ierr = PetscFree(vals);CHKERRQ(ierr); 1458 ierr = PetscFree(mycols);CHKERRQ(ierr); 1459 ierr = PetscFree(rowners);CHKERRQ(ierr); 1460 1461 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1462 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 1467 1468 1469 1470