1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 9 EXTERN_C_BEGIN 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 12 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 13 { 14 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 15 int m = A->m,rstart = mdn->rstart,ierr; 16 PetscScalar *array; 17 MPI_Comm comm; 18 19 PetscFunctionBegin; 20 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 21 22 /* The reuse aspect is not implemented efficiently */ 23 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 24 25 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 26 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 27 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 28 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 29 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 30 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31 32 *iscopy = PETSC_TRUE; 33 PetscFunctionReturn(0); 34 } 35 EXTERN_C_END 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatSetValues_MPIDense" 39 int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv) 40 { 41 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 42 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 43 PetscTruth roworiented = A->roworiented; 44 45 PetscFunctionBegin; 46 for (i=0; i<m; i++) { 47 if (idxm[i] < 0) continue; 48 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 49 if (idxm[i] >= rstart && idxm[i] < rend) { 50 row = idxm[i] - rstart; 51 if (roworiented) { 52 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 53 } else { 54 for (j=0; j<n; j++) { 55 if (idxn[j] < 0) continue; 56 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 57 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 58 } 59 } 60 } else { 61 if (!A->donotstash) { 62 if (roworiented) { 63 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 64 } else { 65 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 66 } 67 } 68 } 69 } 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatGetValues_MPIDense" 75 int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 76 { 77 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 78 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 79 80 PetscFunctionBegin; 81 for (i=0; i<m; i++) { 82 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 83 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 84 if (idxm[i] >= rstart && idxm[i] < rend) { 85 row = idxm[i] - rstart; 86 for (j=0; j<n; j++) { 87 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 88 if (idxn[j] >= mat->N) { 89 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 90 } 91 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 92 } 93 } else { 94 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 95 } 96 } 97 PetscFunctionReturn(0); 98 } 99 100 #undef __FUNCT__ 101 #define __FUNCT__ "MatGetArray_MPIDense" 102 int MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 103 { 104 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 105 int ierr; 106 107 PetscFunctionBegin; 108 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 109 PetscFunctionReturn(0); 110 } 111 112 #undef __FUNCT__ 113 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 114 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 115 { 116 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 117 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 118 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 119 PetscScalar *av,*bv,*v = lmat->v; 120 Mat newmat; 121 122 PetscFunctionBegin; 123 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 124 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 125 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 126 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 127 128 /* No parallel redistribution currently supported! Should really check each index set 129 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 130 original matrix! */ 131 132 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 133 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 134 135 /* Check submatrix call */ 136 if (scall == MAT_REUSE_MATRIX) { 137 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 138 /* Really need to test rows and column sizes! */ 139 newmat = *B; 140 } else { 141 /* Create and fill new matrix */ 142 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 143 } 144 145 /* Now extract the data pointers and do the copy, column at a time */ 146 newmatd = (Mat_MPIDense*)newmat->data; 147 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 148 149 for (i=0; i<ncols; i++) { 150 av = v + nlrows*icol[i]; 151 for (j=0; j<nrows; j++) { 152 *bv++ = av[irow[j] - rstart]; 153 } 154 } 155 156 /* Assemble the matrices so that the correct flags are set */ 157 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 158 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 159 160 /* Free work space */ 161 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 162 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 163 *B = newmat; 164 PetscFunctionReturn(0); 165 } 166 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatRestoreArray_MPIDense" 169 int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 170 { 171 PetscFunctionBegin; 172 PetscFunctionReturn(0); 173 } 174 175 #undef __FUNCT__ 176 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 177 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 178 { 179 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 180 MPI_Comm comm = mat->comm; 181 int ierr,nstash,reallocs; 182 InsertMode addv; 183 184 PetscFunctionBegin; 185 /* make sure all processors are either in INSERTMODE or ADDMODE */ 186 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 187 if (addv == (ADD_VALUES|INSERT_VALUES)) { 188 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 189 } 190 mat->insertmode = addv; /* in case this processor had no cache */ 191 192 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 193 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 194 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 195 PetscFunctionReturn(0); 196 } 197 198 #undef __FUNCT__ 199 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 200 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 201 { 202 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 203 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 204 PetscScalar *val; 205 InsertMode addv=mat->insertmode; 206 207 PetscFunctionBegin; 208 /* wait on receives */ 209 while (1) { 210 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 211 if (!flg) break; 212 213 for (i=0; i<n;) { 214 /* Now identify the consecutive vals belonging to the same row */ 215 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 216 if (j < n) ncols = j-i; 217 else ncols = n-i; 218 /* Now assemble all these values with a single function call */ 219 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 220 i = j; 221 } 222 } 223 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 224 225 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 226 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 227 228 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 229 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 230 } 231 PetscFunctionReturn(0); 232 } 233 234 #undef __FUNCT__ 235 #define __FUNCT__ "MatZeroEntries_MPIDense" 236 int MatZeroEntries_MPIDense(Mat A) 237 { 238 int ierr; 239 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 240 241 PetscFunctionBegin; 242 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatGetBlockSize_MPIDense" 248 int MatGetBlockSize_MPIDense(Mat A,int *bs) 249 { 250 PetscFunctionBegin; 251 *bs = 1; 252 PetscFunctionReturn(0); 253 } 254 255 /* the code does not do the diagonal entries correctly unless the 256 matrix is square and the column and row owerships are identical. 257 This is a BUG. The only way to fix it seems to be to access 258 mdn->A and mdn->B directly and not through the MatZeroRows() 259 routine. 260 */ 261 #undef __FUNCT__ 262 #define __FUNCT__ "MatZeroRows_MPIDense" 263 int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag) 264 { 265 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 266 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 267 int *nprocs,j,idx,nsends; 268 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 269 int *rvalues,tag = A->tag,count,base,slen,n,*source; 270 int *lens,imdex,*lrows,*values; 271 MPI_Comm comm = A->comm; 272 MPI_Request *send_waits,*recv_waits; 273 MPI_Status recv_status,*send_status; 274 IS istmp; 275 PetscTruth found; 276 277 PetscFunctionBegin; 278 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 279 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 280 281 /* first count number of contributors to each processor */ 282 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 283 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 284 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 285 for (i=0; i<N; i++) { 286 idx = rows[i]; 287 found = PETSC_FALSE; 288 for (j=0; j<size; j++) { 289 if (idx >= owners[j] && idx < owners[j+1]) { 290 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 291 } 292 } 293 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 294 } 295 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 296 297 /* inform other processors of number of messages and max length*/ 298 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 299 300 /* post receives: */ 301 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 302 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 303 for (i=0; i<nrecvs; i++) { 304 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 305 } 306 307 /* do sends: 308 1) starts[i] gives the starting index in svalues for stuff going to 309 the ith processor 310 */ 311 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 312 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 313 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 314 starts[0] = 0; 315 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 316 for (i=0; i<N; i++) { 317 svalues[starts[owner[i]]++] = rows[i]; 318 } 319 ISRestoreIndices(is,&rows); 320 321 starts[0] = 0; 322 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 323 count = 0; 324 for (i=0; i<size; i++) { 325 if (nprocs[2*i+1]) { 326 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 327 } 328 } 329 ierr = PetscFree(starts);CHKERRQ(ierr); 330 331 base = owners[rank]; 332 333 /* wait on receives */ 334 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 335 source = lens + nrecvs; 336 count = nrecvs; slen = 0; 337 while (count) { 338 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 339 /* unpack receives into our local space */ 340 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 341 source[imdex] = recv_status.MPI_SOURCE; 342 lens[imdex] = n; 343 slen += n; 344 count--; 345 } 346 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 347 348 /* move the data into the send scatter */ 349 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 350 count = 0; 351 for (i=0; i<nrecvs; i++) { 352 values = rvalues + i*nmax; 353 for (j=0; j<lens[i]; j++) { 354 lrows[count++] = values[j] - base; 355 } 356 } 357 ierr = PetscFree(rvalues);CHKERRQ(ierr); 358 ierr = PetscFree(lens);CHKERRQ(ierr); 359 ierr = PetscFree(owner);CHKERRQ(ierr); 360 ierr = PetscFree(nprocs);CHKERRQ(ierr); 361 362 /* actually zap the local rows */ 363 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 364 PetscLogObjectParent(A,istmp); 365 ierr = PetscFree(lrows);CHKERRQ(ierr); 366 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 367 ierr = ISDestroy(istmp);CHKERRQ(ierr); 368 369 /* wait on sends */ 370 if (nsends) { 371 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 372 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 373 ierr = PetscFree(send_status);CHKERRQ(ierr); 374 } 375 ierr = PetscFree(send_waits);CHKERRQ(ierr); 376 ierr = PetscFree(svalues);CHKERRQ(ierr); 377 378 PetscFunctionReturn(0); 379 } 380 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatMult_MPIDense" 383 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 384 { 385 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 386 int ierr; 387 388 PetscFunctionBegin; 389 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 390 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 391 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 392 PetscFunctionReturn(0); 393 } 394 395 #undef __FUNCT__ 396 #define __FUNCT__ "MatMultAdd_MPIDense" 397 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 398 { 399 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 400 int ierr; 401 402 PetscFunctionBegin; 403 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 404 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 405 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatMultTranspose_MPIDense" 411 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 412 { 413 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 414 int ierr; 415 PetscScalar zero = 0.0; 416 417 PetscFunctionBegin; 418 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 419 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 420 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 421 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNCT__ 426 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 427 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 428 { 429 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 430 int ierr; 431 432 PetscFunctionBegin; 433 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 434 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 435 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 436 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 437 PetscFunctionReturn(0); 438 } 439 440 #undef __FUNCT__ 441 #define __FUNCT__ "MatGetDiagonal_MPIDense" 442 int MatGetDiagonal_MPIDense(Mat A,Vec v) 443 { 444 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 445 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 446 int ierr,len,i,n,m = A->m,radd; 447 PetscScalar *x,zero = 0.0; 448 449 PetscFunctionBegin; 450 ierr = VecSet(&zero,v);CHKERRQ(ierr); 451 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 452 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 453 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 454 len = PetscMin(a->A->m,a->A->n); 455 radd = a->rstart*m; 456 for (i=0; i<len; i++) { 457 x[i] = aloc->v[radd + i*m + i]; 458 } 459 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatDestroy_MPIDense" 465 int MatDestroy_MPIDense(Mat mat) 466 { 467 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 468 int ierr; 469 470 PetscFunctionBegin; 471 472 #if defined(PETSC_USE_LOG) 473 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 474 #endif 475 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 476 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 477 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 478 if (mdn->lvec) VecDestroy(mdn->lvec); 479 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 480 if (mdn->factor) { 481 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 482 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 483 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 484 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 485 } 486 ierr = PetscFree(mdn);CHKERRQ(ierr); 487 PetscFunctionReturn(0); 488 } 489 490 #undef __FUNCT__ 491 #define __FUNCT__ "MatView_MPIDense_Binary" 492 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 493 { 494 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 495 int ierr; 496 497 PetscFunctionBegin; 498 if (mdn->size == 1) { 499 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 500 } 501 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 502 PetscFunctionReturn(0); 503 } 504 505 #undef __FUNCT__ 506 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 507 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 508 { 509 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 510 int ierr,size = mdn->size,rank = mdn->rank; 511 PetscViewerType vtype; 512 PetscTruth isascii,isdraw; 513 PetscViewer sviewer; 514 PetscViewerFormat format; 515 516 PetscFunctionBegin; 517 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 518 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 519 if (isascii) { 520 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 521 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 522 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 523 MatInfo info; 524 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 525 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 526 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 527 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 528 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 529 PetscFunctionReturn(0); 530 } else if (format == PETSC_VIEWER_ASCII_INFO) { 531 PetscFunctionReturn(0); 532 } 533 } else if (isdraw) { 534 PetscDraw draw; 535 PetscTruth isnull; 536 537 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 538 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 539 if (isnull) PetscFunctionReturn(0); 540 } 541 542 if (size == 1) { 543 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 544 } else { 545 /* assemble the entire matrix onto first processor. */ 546 Mat A; 547 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 548 PetscScalar *vals; 549 550 if (!rank) { 551 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 552 } else { 553 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 554 } 555 PetscLogObjectParent(mat,A); 556 557 /* Copy the matrix ... This isn't the most efficient means, 558 but it's quick for now */ 559 row = mdn->rstart; m = mdn->A->m; 560 for (i=0; i<m; i++) { 561 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 562 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 563 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 564 row++; 565 } 566 567 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 568 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 569 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 570 if (!rank) { 571 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 572 } 573 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 574 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 575 ierr = MatDestroy(A);CHKERRQ(ierr); 576 } 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatView_MPIDense" 582 int MatView_MPIDense(Mat mat,PetscViewer viewer) 583 { 584 int ierr; 585 PetscTruth isascii,isbinary,isdraw,issocket; 586 587 PetscFunctionBegin; 588 589 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 590 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 591 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 592 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 593 594 if (isascii || issocket || isdraw) { 595 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 596 } else if (isbinary) { 597 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 598 } else { 599 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 600 } 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatGetInfo_MPIDense" 606 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 607 { 608 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 609 Mat mdn = mat->A; 610 int ierr; 611 PetscReal isend[5],irecv[5]; 612 613 PetscFunctionBegin; 614 info->rows_global = (double)A->M; 615 info->columns_global = (double)A->N; 616 info->rows_local = (double)A->m; 617 info->columns_local = (double)A->N; 618 info->block_size = 1.0; 619 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 620 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 621 isend[3] = info->memory; isend[4] = info->mallocs; 622 if (flag == MAT_LOCAL) { 623 info->nz_used = isend[0]; 624 info->nz_allocated = isend[1]; 625 info->nz_unneeded = isend[2]; 626 info->memory = isend[3]; 627 info->mallocs = isend[4]; 628 } else if (flag == MAT_GLOBAL_MAX) { 629 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 630 info->nz_used = irecv[0]; 631 info->nz_allocated = irecv[1]; 632 info->nz_unneeded = irecv[2]; 633 info->memory = irecv[3]; 634 info->mallocs = irecv[4]; 635 } else if (flag == MAT_GLOBAL_SUM) { 636 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 637 info->nz_used = irecv[0]; 638 info->nz_allocated = irecv[1]; 639 info->nz_unneeded = irecv[2]; 640 info->memory = irecv[3]; 641 info->mallocs = irecv[4]; 642 } 643 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 644 info->fill_ratio_needed = 0; 645 info->factor_mallocs = 0; 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatSetOption_MPIDense" 651 int MatSetOption_MPIDense(Mat A,MatOption op) 652 { 653 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 654 int ierr; 655 656 PetscFunctionBegin; 657 switch (op) { 658 case MAT_NO_NEW_NONZERO_LOCATIONS: 659 case MAT_YES_NEW_NONZERO_LOCATIONS: 660 case MAT_NEW_NONZERO_LOCATION_ERR: 661 case MAT_NEW_NONZERO_ALLOCATION_ERR: 662 case MAT_COLUMNS_SORTED: 663 case MAT_COLUMNS_UNSORTED: 664 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 665 break; 666 case MAT_ROW_ORIENTED: 667 a->roworiented = PETSC_TRUE; 668 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 669 break; 670 case MAT_ROWS_SORTED: 671 case MAT_ROWS_UNSORTED: 672 case MAT_YES_NEW_DIAGONALS: 673 case MAT_USE_HASH_TABLE: 674 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 675 break; 676 case MAT_COLUMN_ORIENTED: 677 a->roworiented = PETSC_FALSE; 678 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 679 break; 680 case MAT_IGNORE_OFF_PROC_ENTRIES: 681 a->donotstash = PETSC_TRUE; 682 break; 683 case MAT_NO_NEW_DIAGONALS: 684 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 685 case MAT_SYMMETRIC: 686 case MAT_STRUCTURALLY_SYMMETRIC: 687 case MAT_NOT_SYMMETRIC: 688 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 689 case MAT_HERMITIAN: 690 case MAT_NOT_HERMITIAN: 691 case MAT_SYMMETRY_ETERNAL: 692 case MAT_NOT_SYMMETRY_ETERNAL: 693 break; 694 default: 695 SETERRQ(PETSC_ERR_SUP,"unknown option"); 696 } 697 PetscFunctionReturn(0); 698 } 699 700 #undef __FUNCT__ 701 #define __FUNCT__ "MatGetRow_MPIDense" 702 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v) 703 { 704 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 705 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 706 707 PetscFunctionBegin; 708 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 709 lrow = row - rstart; 710 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 711 PetscFunctionReturn(0); 712 } 713 714 #undef __FUNCT__ 715 #define __FUNCT__ "MatRestoreRow_MPIDense" 716 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 717 { 718 int ierr; 719 720 PetscFunctionBegin; 721 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 722 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatDiagonalScale_MPIDense" 728 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 729 { 730 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 731 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 732 PetscScalar *l,*r,x,*v; 733 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 734 735 PetscFunctionBegin; 736 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 737 if (ll) { 738 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 739 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 740 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 741 for (i=0; i<m; i++) { 742 x = l[i]; 743 v = mat->v + i; 744 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 745 } 746 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 747 PetscLogFlops(n*m); 748 } 749 if (rr) { 750 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 751 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 752 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 753 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 754 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 755 for (i=0; i<n; i++) { 756 x = r[i]; 757 v = mat->v + i*m; 758 for (j=0; j<m; j++) { (*v++) *= x;} 759 } 760 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 761 PetscLogFlops(n*m); 762 } 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNCT__ 767 #define __FUNCT__ "MatNorm_MPIDense" 768 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 769 { 770 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 771 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 772 int ierr,i,j; 773 PetscReal sum = 0.0; 774 PetscScalar *v = mat->v; 775 776 PetscFunctionBegin; 777 if (mdn->size == 1) { 778 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 779 } else { 780 if (type == NORM_FROBENIUS) { 781 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 782 #if defined(PETSC_USE_COMPLEX) 783 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 784 #else 785 sum += (*v)*(*v); v++; 786 #endif 787 } 788 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 789 *nrm = sqrt(*nrm); 790 PetscLogFlops(2*mdn->A->n*mdn->A->m); 791 } else if (type == NORM_1) { 792 PetscReal *tmp,*tmp2; 793 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 794 tmp2 = tmp + A->N; 795 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 796 *nrm = 0.0; 797 v = mat->v; 798 for (j=0; j<mdn->A->n; j++) { 799 for (i=0; i<mdn->A->m; i++) { 800 tmp[j] += PetscAbsScalar(*v); v++; 801 } 802 } 803 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 804 for (j=0; j<A->N; j++) { 805 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 806 } 807 ierr = PetscFree(tmp);CHKERRQ(ierr); 808 PetscLogFlops(A->n*A->m); 809 } else if (type == NORM_INFINITY) { /* max row norm */ 810 PetscReal ntemp; 811 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 812 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 813 } else { 814 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 815 } 816 } 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatTranspose_MPIDense" 822 int MatTranspose_MPIDense(Mat A,Mat *matout) 823 { 824 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 825 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 826 Mat B; 827 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 828 int j,i,ierr; 829 PetscScalar *v; 830 831 PetscFunctionBegin; 832 if (!matout && M != N) { 833 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 834 } 835 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 836 837 m = a->A->m; n = a->A->n; v = Aloc->v; 838 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 839 for (j=0; j<n; j++) { 840 for (i=0; i<m; i++) rwork[i] = rstart + i; 841 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 842 v += m; 843 } 844 ierr = PetscFree(rwork);CHKERRQ(ierr); 845 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 846 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 847 if (matout) { 848 *matout = B; 849 } else { 850 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 851 } 852 PetscFunctionReturn(0); 853 } 854 855 #include "petscblaslapack.h" 856 #undef __FUNCT__ 857 #define __FUNCT__ "MatScale_MPIDense" 858 int MatScale_MPIDense(const PetscScalar *alpha,Mat inA) 859 { 860 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 861 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 862 int one = 1,nz; 863 864 PetscFunctionBegin; 865 nz = inA->m*inA->N; 866 BLscal_(&nz,(PetscScalar*)alpha,a->v,&one); 867 PetscLogFlops(nz); 868 PetscFunctionReturn(0); 869 } 870 871 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 875 int MatSetUpPreallocation_MPIDense(Mat A) 876 { 877 int ierr; 878 879 PetscFunctionBegin; 880 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 881 PetscFunctionReturn(0); 882 } 883 884 /* -------------------------------------------------------------------*/ 885 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 886 MatGetRow_MPIDense, 887 MatRestoreRow_MPIDense, 888 MatMult_MPIDense, 889 /* 4*/ MatMultAdd_MPIDense, 890 MatMultTranspose_MPIDense, 891 MatMultTransposeAdd_MPIDense, 892 0, 893 0, 894 0, 895 /*10*/ 0, 896 0, 897 0, 898 0, 899 MatTranspose_MPIDense, 900 /*15*/ MatGetInfo_MPIDense, 901 0, 902 MatGetDiagonal_MPIDense, 903 MatDiagonalScale_MPIDense, 904 MatNorm_MPIDense, 905 /*20*/ MatAssemblyBegin_MPIDense, 906 MatAssemblyEnd_MPIDense, 907 0, 908 MatSetOption_MPIDense, 909 MatZeroEntries_MPIDense, 910 /*25*/ MatZeroRows_MPIDense, 911 0, 912 0, 913 0, 914 0, 915 /*30*/ MatSetUpPreallocation_MPIDense, 916 0, 917 0, 918 MatGetArray_MPIDense, 919 MatRestoreArray_MPIDense, 920 /*35*/ MatDuplicate_MPIDense, 921 0, 922 0, 923 0, 924 0, 925 /*40*/ 0, 926 MatGetSubMatrices_MPIDense, 927 0, 928 MatGetValues_MPIDense, 929 0, 930 /*45*/ 0, 931 MatScale_MPIDense, 932 0, 933 0, 934 0, 935 /*50*/ MatGetBlockSize_MPIDense, 936 0, 937 0, 938 0, 939 0, 940 /*55*/ 0, 941 0, 942 0, 943 0, 944 0, 945 /*60*/ MatGetSubMatrix_MPIDense, 946 MatDestroy_MPIDense, 947 MatView_MPIDense, 948 MatGetPetscMaps_Petsc, 949 0, 950 /*65*/ 0, 951 0, 952 0, 953 0, 954 0, 955 /*70*/ 0, 956 0, 957 0, 958 0, 959 0, 960 /*75*/ 0, 961 0, 962 0, 963 0, 964 0, 965 /*80*/ 0, 966 0, 967 0, 968 0, 969 /*85*/ MatLoad_MPIDense}; 970 971 EXTERN_C_BEGIN 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 974 int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 975 { 976 Mat_MPIDense *a; 977 int ierr; 978 979 PetscFunctionBegin; 980 mat->preallocated = PETSC_TRUE; 981 /* Note: For now, when data is specified above, this assumes the user correctly 982 allocates the local dense storage space. We should add error checking. */ 983 984 a = (Mat_MPIDense*)mat->data; 985 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 986 PetscLogObjectParent(mat,a->A); 987 PetscFunctionReturn(0); 988 } 989 EXTERN_C_END 990 991 /*MC 992 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 993 994 Options Database Keys: 995 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 996 997 Level: beginner 998 999 .seealso: MatCreateMPIDense 1000 M*/ 1001 1002 EXTERN_C_BEGIN 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatCreate_MPIDense" 1005 int MatCreate_MPIDense(Mat mat) 1006 { 1007 Mat_MPIDense *a; 1008 int ierr,i; 1009 1010 PetscFunctionBegin; 1011 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1012 mat->data = (void*)a; 1013 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1014 mat->factor = 0; 1015 mat->mapping = 0; 1016 1017 a->factor = 0; 1018 mat->insertmode = NOT_SET_VALUES; 1019 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 1020 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 1021 1022 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 1023 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 1024 a->nvec = mat->n; 1025 1026 /* the information in the maps duplicates the information computed below, eventually 1027 we should remove the duplicate information that is not contained in the maps */ 1028 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 1029 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 1030 1031 /* build local table of row and column ownerships */ 1032 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1033 a->cowners = a->rowners + a->size + 1; 1034 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1035 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1036 a->rowners[0] = 0; 1037 for (i=2; i<=a->size; i++) { 1038 a->rowners[i] += a->rowners[i-1]; 1039 } 1040 a->rstart = a->rowners[a->rank]; 1041 a->rend = a->rowners[a->rank+1]; 1042 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1043 a->cowners[0] = 0; 1044 for (i=2; i<=a->size; i++) { 1045 a->cowners[i] += a->cowners[i-1]; 1046 } 1047 1048 /* build cache for off array entries formed */ 1049 a->donotstash = PETSC_FALSE; 1050 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1051 1052 /* stuff used for matrix vector multiply */ 1053 a->lvec = 0; 1054 a->Mvctx = 0; 1055 a->roworiented = PETSC_TRUE; 1056 1057 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1058 "MatGetDiagonalBlock_MPIDense", 1059 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1060 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1061 "MatMPIDenseSetPreallocation_MPIDense", 1062 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 EXTERN_C_END 1066 1067 /*MC 1068 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1069 1070 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1071 and MATMPIDENSE otherwise. 1072 1073 Options Database Keys: 1074 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1075 1076 Level: beginner 1077 1078 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1079 M*/ 1080 1081 EXTERN_C_BEGIN 1082 #undef __FUNCT__ 1083 #define __FUNCT__ "MatCreate_Dense" 1084 int MatCreate_Dense(Mat A) { 1085 int ierr,size; 1086 1087 PetscFunctionBegin; 1088 ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr); 1089 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1090 if (size == 1) { 1091 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } 1097 EXTERN_C_END 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1101 /*@C 1102 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1103 1104 Not collective 1105 1106 Input Parameters: 1107 . A - the matrix 1108 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1109 to control all matrix memory allocation. 1110 1111 Notes: 1112 The dense format is fully compatible with standard Fortran 77 1113 storage by columns. 1114 1115 The data input variable is intended primarily for Fortran programmers 1116 who wish to allocate their own matrix memory space. Most users should 1117 set data=PETSC_NULL. 1118 1119 Level: intermediate 1120 1121 .keywords: matrix,dense, parallel 1122 1123 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1124 @*/ 1125 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1126 { 1127 int ierr,(*f)(Mat,PetscScalar *); 1128 1129 PetscFunctionBegin; 1130 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1131 if (f) { 1132 ierr = (*f)(mat,data);CHKERRQ(ierr); 1133 } 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatCreateMPIDense" 1139 /*@C 1140 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1141 1142 Collective on MPI_Comm 1143 1144 Input Parameters: 1145 + comm - MPI communicator 1146 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1147 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1148 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1149 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1150 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1151 to control all matrix memory allocation. 1152 1153 Output Parameter: 1154 . A - the matrix 1155 1156 Notes: 1157 The dense format is fully compatible with standard Fortran 77 1158 storage by columns. 1159 1160 The data input variable is intended primarily for Fortran programmers 1161 who wish to allocate their own matrix memory space. Most users should 1162 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1163 1164 The user MUST specify either the local or global matrix dimensions 1165 (possibly both). 1166 1167 Level: intermediate 1168 1169 .keywords: matrix,dense, parallel 1170 1171 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1172 @*/ 1173 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A) 1174 { 1175 int ierr,size; 1176 1177 PetscFunctionBegin; 1178 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1180 if (size > 1) { 1181 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1182 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1183 } else { 1184 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1185 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1186 } 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatDuplicate_MPIDense" 1192 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1193 { 1194 Mat mat; 1195 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1196 int ierr; 1197 1198 PetscFunctionBegin; 1199 *newmat = 0; 1200 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1201 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1202 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1203 mat->data = (void*)a; 1204 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1205 mat->factor = A->factor; 1206 mat->assembled = PETSC_TRUE; 1207 mat->preallocated = PETSC_TRUE; 1208 1209 a->rstart = oldmat->rstart; 1210 a->rend = oldmat->rend; 1211 a->size = oldmat->size; 1212 a->rank = oldmat->rank; 1213 mat->insertmode = NOT_SET_VALUES; 1214 a->nvec = oldmat->nvec; 1215 a->donotstash = oldmat->donotstash; 1216 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1217 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1218 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1219 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1220 1221 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1222 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1223 PetscLogObjectParent(mat,a->A); 1224 *newmat = mat; 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #include "petscsys.h" 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1232 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1233 { 1234 int *rowners,i,size,rank,m,ierr,nz,j; 1235 PetscScalar *array,*vals,*vals_ptr; 1236 MPI_Status status; 1237 1238 PetscFunctionBegin; 1239 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1240 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1241 1242 /* determine ownership of all rows */ 1243 m = M/size + ((M % size) > rank); 1244 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1245 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1246 rowners[0] = 0; 1247 for (i=2; i<=size; i++) { 1248 rowners[i] += rowners[i-1]; 1249 } 1250 1251 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1252 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1253 1254 if (!rank) { 1255 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1256 1257 /* read in my part of the matrix numerical values */ 1258 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1259 1260 /* insert into matrix-by row (this is why cannot directly read into array */ 1261 vals_ptr = vals; 1262 for (i=0; i<m; i++) { 1263 for (j=0; j<N; j++) { 1264 array[i + j*m] = *vals_ptr++; 1265 } 1266 } 1267 1268 /* read in other processors and ship out */ 1269 for (i=1; i<size; i++) { 1270 nz = (rowners[i+1] - rowners[i])*N; 1271 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1272 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1273 } 1274 } else { 1275 /* receive numeric values */ 1276 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1277 1278 /* receive message of values*/ 1279 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1280 1281 /* insert into matrix-by row (this is why cannot directly read into array */ 1282 vals_ptr = vals; 1283 for (i=0; i<m; i++) { 1284 for (j=0; j<N; j++) { 1285 array[i + j*m] = *vals_ptr++; 1286 } 1287 } 1288 } 1289 ierr = PetscFree(rowners);CHKERRQ(ierr); 1290 ierr = PetscFree(vals);CHKERRQ(ierr); 1291 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1292 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatLoad_MPIDense" 1298 int MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1299 { 1300 Mat A; 1301 PetscScalar *vals,*svals; 1302 MPI_Comm comm = ((PetscObject)viewer)->comm; 1303 MPI_Status status; 1304 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1305 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1306 int tag = ((PetscObject)viewer)->tag; 1307 int i,nz,ierr,j,rstart,rend,fd; 1308 1309 PetscFunctionBegin; 1310 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1311 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1312 if (!rank) { 1313 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1314 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1315 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1316 } 1317 1318 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1319 M = header[1]; N = header[2]; nz = header[3]; 1320 1321 /* 1322 Handle case where matrix is stored on disk as a dense matrix 1323 */ 1324 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1325 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1326 PetscFunctionReturn(0); 1327 } 1328 1329 /* determine ownership of all rows */ 1330 m = M/size + ((M % size) > rank); 1331 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1332 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1333 rowners[0] = 0; 1334 for (i=2; i<=size; i++) { 1335 rowners[i] += rowners[i-1]; 1336 } 1337 rstart = rowners[rank]; 1338 rend = rowners[rank+1]; 1339 1340 /* distribute row lengths to all processors */ 1341 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1342 offlens = ourlens + (rend-rstart); 1343 if (!rank) { 1344 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1345 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1346 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1347 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1348 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1349 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1350 } else { 1351 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1352 } 1353 1354 if (!rank) { 1355 /* calculate the number of nonzeros on each processor */ 1356 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1357 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1358 for (i=0; i<size; i++) { 1359 for (j=rowners[i]; j< rowners[i+1]; j++) { 1360 procsnz[i] += rowlengths[j]; 1361 } 1362 } 1363 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1364 1365 /* determine max buffer needed and allocate it */ 1366 maxnz = 0; 1367 for (i=0; i<size; i++) { 1368 maxnz = PetscMax(maxnz,procsnz[i]); 1369 } 1370 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1371 1372 /* read in my part of the matrix column indices */ 1373 nz = procsnz[0]; 1374 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1375 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1376 1377 /* read in every one elses and ship off */ 1378 for (i=1; i<size; i++) { 1379 nz = procsnz[i]; 1380 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1381 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1382 } 1383 ierr = PetscFree(cols);CHKERRQ(ierr); 1384 } else { 1385 /* determine buffer space needed for message */ 1386 nz = 0; 1387 for (i=0; i<m; i++) { 1388 nz += ourlens[i]; 1389 } 1390 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1391 1392 /* receive message of column indices*/ 1393 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1394 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1395 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1396 } 1397 1398 /* loop over local rows, determining number of off diagonal entries */ 1399 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1400 jj = 0; 1401 for (i=0; i<m; i++) { 1402 for (j=0; j<ourlens[i]; j++) { 1403 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1404 jj++; 1405 } 1406 } 1407 1408 /* create our matrix */ 1409 for (i=0; i<m; i++) { 1410 ourlens[i] -= offlens[i]; 1411 } 1412 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1413 A = *newmat; 1414 for (i=0; i<m; i++) { 1415 ourlens[i] += offlens[i]; 1416 } 1417 1418 if (!rank) { 1419 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1420 1421 /* read in my part of the matrix numerical values */ 1422 nz = procsnz[0]; 1423 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1424 1425 /* insert into matrix */ 1426 jj = rstart; 1427 smycols = mycols; 1428 svals = vals; 1429 for (i=0; i<m; i++) { 1430 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1431 smycols += ourlens[i]; 1432 svals += ourlens[i]; 1433 jj++; 1434 } 1435 1436 /* read in other processors and ship out */ 1437 for (i=1; i<size; i++) { 1438 nz = procsnz[i]; 1439 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1440 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1441 } 1442 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1443 } else { 1444 /* receive numeric values */ 1445 ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1446 1447 /* receive message of values*/ 1448 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1449 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1450 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1451 1452 /* insert into matrix */ 1453 jj = rstart; 1454 smycols = mycols; 1455 svals = vals; 1456 for (i=0; i<m; i++) { 1457 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1458 smycols += ourlens[i]; 1459 svals += ourlens[i]; 1460 jj++; 1461 } 1462 } 1463 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1464 ierr = PetscFree(vals);CHKERRQ(ierr); 1465 ierr = PetscFree(mycols);CHKERRQ(ierr); 1466 ierr = PetscFree(rowners);CHKERRQ(ierr); 1467 1468 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1469 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } 1472 1473 1474 1475 1476 1477