1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,ierr; 17 PetscScalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 27 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 28 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 29 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 30 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 33 *iscopy = PETSC_TRUE; 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatSetValues_MPIDense" 40 int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv) 41 { 42 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 43 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 44 PetscTruth roworiented = A->roworiented; 45 46 PetscFunctionBegin; 47 for (i=0; i<m; i++) { 48 if (idxm[i] < 0) continue; 49 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 50 if (idxm[i] >= rstart && idxm[i] < rend) { 51 row = idxm[i] - rstart; 52 if (roworiented) { 53 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 54 } else { 55 for (j=0; j<n; j++) { 56 if (idxn[j] < 0) continue; 57 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 58 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 59 } 60 } 61 } else { 62 if (!A->donotstash) { 63 if (roworiented) { 64 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 65 } else { 66 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 67 } 68 } 69 } 70 } 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetValues_MPIDense" 76 int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 79 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 80 81 PetscFunctionBegin; 82 for (i=0; i<m; i++) { 83 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 84 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 85 if (idxm[i] >= rstart && idxm[i] < rend) { 86 row = idxm[i] - rstart; 87 for (j=0; j<n; j++) { 88 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 89 if (idxn[j] >= mat->N) { 90 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 91 } 92 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 93 } 94 } else { 95 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 96 } 97 } 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatGetArray_MPIDense" 103 int MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 104 { 105 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 110 PetscFunctionReturn(0); 111 } 112 113 #undef __FUNCT__ 114 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 115 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 116 { 117 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 118 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 119 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 120 PetscScalar *av,*bv,*v = lmat->v; 121 Mat newmat; 122 123 PetscFunctionBegin; 124 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 125 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 126 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 127 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 128 129 /* No parallel redistribution currently supported! Should really check each index set 130 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 131 original matrix! */ 132 133 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 134 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 135 136 /* Check submatrix call */ 137 if (scall == MAT_REUSE_MATRIX) { 138 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 139 /* Really need to test rows and column sizes! */ 140 newmat = *B; 141 } else { 142 /* Create and fill new matrix */ 143 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 144 } 145 146 /* Now extract the data pointers and do the copy, column at a time */ 147 newmatd = (Mat_MPIDense*)newmat->data; 148 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 149 150 for (i=0; i<ncols; i++) { 151 av = v + nlrows*icol[i]; 152 for (j=0; j<nrows; j++) { 153 *bv++ = av[irow[j] - rstart]; 154 } 155 } 156 157 /* Assemble the matrices so that the correct flags are set */ 158 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 159 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 160 161 /* Free work space */ 162 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 163 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 164 *B = newmat; 165 PetscFunctionReturn(0); 166 } 167 168 #undef __FUNCT__ 169 #define __FUNCT__ "MatRestoreArray_MPIDense" 170 int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 171 { 172 PetscFunctionBegin; 173 PetscFunctionReturn(0); 174 } 175 176 #undef __FUNCT__ 177 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 178 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 179 { 180 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 181 MPI_Comm comm = mat->comm; 182 int ierr,nstash,reallocs; 183 InsertMode addv; 184 185 PetscFunctionBegin; 186 /* make sure all processors are either in INSERTMODE or ADDMODE */ 187 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 188 if (addv == (ADD_VALUES|INSERT_VALUES)) { 189 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 190 } 191 mat->insertmode = addv; /* in case this processor had no cache */ 192 193 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 194 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 195 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 196 PetscFunctionReturn(0); 197 } 198 199 #undef __FUNCT__ 200 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 201 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 202 { 203 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 204 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 205 PetscScalar *val; 206 InsertMode addv=mat->insertmode; 207 208 PetscFunctionBegin; 209 /* wait on receives */ 210 while (1) { 211 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 212 if (!flg) break; 213 214 for (i=0; i<n;) { 215 /* Now identify the consecutive vals belonging to the same row */ 216 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 217 if (j < n) ncols = j-i; 218 else ncols = n-i; 219 /* Now assemble all these values with a single function call */ 220 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 221 i = j; 222 } 223 } 224 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 225 226 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 227 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 228 229 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 230 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 231 } 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatZeroEntries_MPIDense" 237 int MatZeroEntries_MPIDense(Mat A) 238 { 239 int ierr; 240 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 241 242 PetscFunctionBegin; 243 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatGetBlockSize_MPIDense" 249 int MatGetBlockSize_MPIDense(Mat A,int *bs) 250 { 251 PetscFunctionBegin; 252 *bs = 1; 253 PetscFunctionReturn(0); 254 } 255 256 /* the code does not do the diagonal entries correctly unless the 257 matrix is square and the column and row owerships are identical. 258 This is a BUG. The only way to fix it seems to be to access 259 mdn->A and mdn->B directly and not through the MatZeroRows() 260 routine. 261 */ 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatZeroRows_MPIDense" 264 int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag) 265 { 266 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 267 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 268 int *nprocs,j,idx,nsends; 269 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 270 int *rvalues,tag = A->tag,count,base,slen,n,*source; 271 int *lens,imdex,*lrows,*values; 272 MPI_Comm comm = A->comm; 273 MPI_Request *send_waits,*recv_waits; 274 MPI_Status recv_status,*send_status; 275 IS istmp; 276 PetscTruth found; 277 278 PetscFunctionBegin; 279 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 280 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 281 282 /* first count number of contributors to each processor */ 283 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 284 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 285 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 286 for (i=0; i<N; i++) { 287 idx = rows[i]; 288 found = PETSC_FALSE; 289 for (j=0; j<size; j++) { 290 if (idx >= owners[j] && idx < owners[j+1]) { 291 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 292 } 293 } 294 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 295 } 296 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 297 298 /* inform other processors of number of messages and max length*/ 299 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 300 301 /* post receives: */ 302 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 303 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 304 for (i=0; i<nrecvs; i++) { 305 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 306 } 307 308 /* do sends: 309 1) starts[i] gives the starting index in svalues for stuff going to 310 the ith processor 311 */ 312 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 313 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 314 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 315 starts[0] = 0; 316 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 317 for (i=0; i<N; i++) { 318 svalues[starts[owner[i]]++] = rows[i]; 319 } 320 ISRestoreIndices(is,&rows); 321 322 starts[0] = 0; 323 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 324 count = 0; 325 for (i=0; i<size; i++) { 326 if (nprocs[2*i+1]) { 327 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 328 } 329 } 330 ierr = PetscFree(starts);CHKERRQ(ierr); 331 332 base = owners[rank]; 333 334 /* wait on receives */ 335 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 336 source = lens + nrecvs; 337 count = nrecvs; slen = 0; 338 while (count) { 339 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 340 /* unpack receives into our local space */ 341 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 342 source[imdex] = recv_status.MPI_SOURCE; 343 lens[imdex] = n; 344 slen += n; 345 count--; 346 } 347 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 348 349 /* move the data into the send scatter */ 350 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 351 count = 0; 352 for (i=0; i<nrecvs; i++) { 353 values = rvalues + i*nmax; 354 for (j=0; j<lens[i]; j++) { 355 lrows[count++] = values[j] - base; 356 } 357 } 358 ierr = PetscFree(rvalues);CHKERRQ(ierr); 359 ierr = PetscFree(lens);CHKERRQ(ierr); 360 ierr = PetscFree(owner);CHKERRQ(ierr); 361 ierr = PetscFree(nprocs);CHKERRQ(ierr); 362 363 /* actually zap the local rows */ 364 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 365 PetscLogObjectParent(A,istmp); 366 ierr = PetscFree(lrows);CHKERRQ(ierr); 367 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 368 ierr = ISDestroy(istmp);CHKERRQ(ierr); 369 370 /* wait on sends */ 371 if (nsends) { 372 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 373 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 374 ierr = PetscFree(send_status);CHKERRQ(ierr); 375 } 376 ierr = PetscFree(send_waits);CHKERRQ(ierr); 377 ierr = PetscFree(svalues);CHKERRQ(ierr); 378 379 PetscFunctionReturn(0); 380 } 381 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatMult_MPIDense" 384 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 385 { 386 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 387 int ierr; 388 389 PetscFunctionBegin; 390 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 391 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 392 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 393 PetscFunctionReturn(0); 394 } 395 396 #undef __FUNCT__ 397 #define __FUNCT__ "MatMultAdd_MPIDense" 398 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 399 { 400 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 401 int ierr; 402 403 PetscFunctionBegin; 404 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 405 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 406 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "MatMultTranspose_MPIDense" 412 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 413 { 414 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 415 int ierr; 416 PetscScalar zero = 0.0; 417 418 PetscFunctionBegin; 419 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 420 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 421 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 422 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 428 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 429 { 430 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 431 int ierr; 432 433 PetscFunctionBegin; 434 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 435 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 436 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 437 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 438 PetscFunctionReturn(0); 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatGetDiagonal_MPIDense" 443 int MatGetDiagonal_MPIDense(Mat A,Vec v) 444 { 445 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 446 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 447 int ierr,len,i,n,m = A->m,radd; 448 PetscScalar *x,zero = 0.0; 449 450 PetscFunctionBegin; 451 ierr = VecSet(&zero,v);CHKERRQ(ierr); 452 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 453 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 454 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 455 len = PetscMin(a->A->m,a->A->n); 456 radd = a->rstart*m; 457 for (i=0; i<len; i++) { 458 x[i] = aloc->v[radd + i*m + i]; 459 } 460 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatDestroy_MPIDense" 466 int MatDestroy_MPIDense(Mat mat) 467 { 468 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 469 int ierr; 470 471 PetscFunctionBegin; 472 473 #if defined(PETSC_USE_LOG) 474 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 475 #endif 476 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 477 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 478 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 479 if (mdn->lvec) VecDestroy(mdn->lvec); 480 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 481 if (mdn->factor) { 482 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 483 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 484 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 485 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 486 } 487 ierr = PetscFree(mdn);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatView_MPIDense_Binary" 493 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 494 { 495 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 496 int ierr; 497 498 PetscFunctionBegin; 499 if (mdn->size == 1) { 500 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 501 } 502 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 508 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 509 { 510 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 511 int ierr,size = mdn->size,rank = mdn->rank; 512 PetscViewerType vtype; 513 PetscTruth isascii,isdraw; 514 PetscViewer sviewer; 515 PetscViewerFormat format; 516 517 PetscFunctionBegin; 518 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 519 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 520 if (isascii) { 521 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 522 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 523 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 524 MatInfo info; 525 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 526 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 527 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 528 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 529 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 530 PetscFunctionReturn(0); 531 } else if (format == PETSC_VIEWER_ASCII_INFO) { 532 PetscFunctionReturn(0); 533 } 534 } else if (isdraw) { 535 PetscDraw draw; 536 PetscTruth isnull; 537 538 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 539 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 540 if (isnull) PetscFunctionReturn(0); 541 } 542 543 if (size == 1) { 544 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 545 } else { 546 /* assemble the entire matrix onto first processor. */ 547 Mat A; 548 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 549 PetscScalar *vals; 550 551 if (!rank) { 552 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 553 } else { 554 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 555 } 556 PetscLogObjectParent(mat,A); 557 558 /* Copy the matrix ... This isn't the most efficient means, 559 but it's quick for now */ 560 row = mdn->rstart; m = mdn->A->m; 561 for (i=0; i<m; i++) { 562 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 563 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 564 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 565 row++; 566 } 567 568 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 569 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 570 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 571 if (!rank) { 572 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 573 } 574 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 575 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 576 ierr = MatDestroy(A);CHKERRQ(ierr); 577 } 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatView_MPIDense" 583 int MatView_MPIDense(Mat mat,PetscViewer viewer) 584 { 585 int ierr; 586 PetscTruth isascii,isbinary,isdraw,issocket; 587 588 PetscFunctionBegin; 589 590 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 591 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 592 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 593 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 594 595 if (isascii || issocket || isdraw) { 596 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 597 } else if (isbinary) { 598 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 599 } else { 600 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 601 } 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatGetInfo_MPIDense" 607 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 608 { 609 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 610 Mat mdn = mat->A; 611 int ierr; 612 PetscReal isend[5],irecv[5]; 613 614 PetscFunctionBegin; 615 info->rows_global = (double)A->M; 616 info->columns_global = (double)A->N; 617 info->rows_local = (double)A->m; 618 info->columns_local = (double)A->N; 619 info->block_size = 1.0; 620 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 621 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 622 isend[3] = info->memory; isend[4] = info->mallocs; 623 if (flag == MAT_LOCAL) { 624 info->nz_used = isend[0]; 625 info->nz_allocated = isend[1]; 626 info->nz_unneeded = isend[2]; 627 info->memory = isend[3]; 628 info->mallocs = isend[4]; 629 } else if (flag == MAT_GLOBAL_MAX) { 630 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 631 info->nz_used = irecv[0]; 632 info->nz_allocated = irecv[1]; 633 info->nz_unneeded = irecv[2]; 634 info->memory = irecv[3]; 635 info->mallocs = irecv[4]; 636 } else if (flag == MAT_GLOBAL_SUM) { 637 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 638 info->nz_used = irecv[0]; 639 info->nz_allocated = irecv[1]; 640 info->nz_unneeded = irecv[2]; 641 info->memory = irecv[3]; 642 info->mallocs = irecv[4]; 643 } 644 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 645 info->fill_ratio_needed = 0; 646 info->factor_mallocs = 0; 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatSetOption_MPIDense" 652 int MatSetOption_MPIDense(Mat A,MatOption op) 653 { 654 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 655 int ierr; 656 657 PetscFunctionBegin; 658 switch (op) { 659 case MAT_NO_NEW_NONZERO_LOCATIONS: 660 case MAT_YES_NEW_NONZERO_LOCATIONS: 661 case MAT_NEW_NONZERO_LOCATION_ERR: 662 case MAT_NEW_NONZERO_ALLOCATION_ERR: 663 case MAT_COLUMNS_SORTED: 664 case MAT_COLUMNS_UNSORTED: 665 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 666 break; 667 case MAT_ROW_ORIENTED: 668 a->roworiented = PETSC_TRUE; 669 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 670 break; 671 case MAT_ROWS_SORTED: 672 case MAT_ROWS_UNSORTED: 673 case MAT_YES_NEW_DIAGONALS: 674 case MAT_USE_HASH_TABLE: 675 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 676 break; 677 case MAT_COLUMN_ORIENTED: 678 a->roworiented = PETSC_FALSE; 679 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 680 break; 681 case MAT_IGNORE_OFF_PROC_ENTRIES: 682 a->donotstash = PETSC_TRUE; 683 break; 684 case MAT_NO_NEW_DIAGONALS: 685 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 686 case MAT_SYMMETRIC: 687 case MAT_STRUCTURALLY_SYMMETRIC: 688 break; 689 default: 690 SETERRQ(PETSC_ERR_SUP,"unknown option"); 691 } 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatGetRow_MPIDense" 697 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v) 698 { 699 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 700 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 701 702 PetscFunctionBegin; 703 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 704 lrow = row - rstart; 705 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } 708 709 #undef __FUNCT__ 710 #define __FUNCT__ "MatRestoreRow_MPIDense" 711 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 712 { 713 int ierr; 714 715 PetscFunctionBegin; 716 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 717 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatDiagonalScale_MPIDense" 723 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 724 { 725 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 726 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 727 PetscScalar *l,*r,x,*v; 728 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 729 730 PetscFunctionBegin; 731 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 732 if (ll) { 733 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 734 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 735 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 736 for (i=0; i<m; i++) { 737 x = l[i]; 738 v = mat->v + i; 739 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 740 } 741 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 742 PetscLogFlops(n*m); 743 } 744 if (rr) { 745 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 746 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 747 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 748 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 749 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 750 for (i=0; i<n; i++) { 751 x = r[i]; 752 v = mat->v + i*m; 753 for (j=0; j<m; j++) { (*v++) *= x;} 754 } 755 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 756 PetscLogFlops(n*m); 757 } 758 PetscFunctionReturn(0); 759 } 760 761 #undef __FUNCT__ 762 #define __FUNCT__ "MatNorm_MPIDense" 763 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 764 { 765 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 766 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 767 int ierr,i,j; 768 PetscReal sum = 0.0; 769 PetscScalar *v = mat->v; 770 771 PetscFunctionBegin; 772 if (mdn->size == 1) { 773 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 774 } else { 775 if (type == NORM_FROBENIUS) { 776 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 777 #if defined(PETSC_USE_COMPLEX) 778 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 779 #else 780 sum += (*v)*(*v); v++; 781 #endif 782 } 783 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 784 *nrm = sqrt(*nrm); 785 PetscLogFlops(2*mdn->A->n*mdn->A->m); 786 } else if (type == NORM_1) { 787 PetscReal *tmp,*tmp2; 788 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 789 tmp2 = tmp + A->N; 790 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 791 *nrm = 0.0; 792 v = mat->v; 793 for (j=0; j<mdn->A->n; j++) { 794 for (i=0; i<mdn->A->m; i++) { 795 tmp[j] += PetscAbsScalar(*v); v++; 796 } 797 } 798 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 799 for (j=0; j<A->N; j++) { 800 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 801 } 802 ierr = PetscFree(tmp);CHKERRQ(ierr); 803 PetscLogFlops(A->n*A->m); 804 } else if (type == NORM_INFINITY) { /* max row norm */ 805 PetscReal ntemp; 806 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 807 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 808 } else { 809 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 810 } 811 } 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNCT__ 816 #define __FUNCT__ "MatTranspose_MPIDense" 817 int MatTranspose_MPIDense(Mat A,Mat *matout) 818 { 819 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 820 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 821 Mat B; 822 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 823 int j,i,ierr; 824 PetscScalar *v; 825 826 PetscFunctionBegin; 827 if (!matout && M != N) { 828 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 829 } 830 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 831 832 m = a->A->m; n = a->A->n; v = Aloc->v; 833 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 834 for (j=0; j<n; j++) { 835 for (i=0; i<m; i++) rwork[i] = rstart + i; 836 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 837 v += m; 838 } 839 ierr = PetscFree(rwork);CHKERRQ(ierr); 840 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 841 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 842 if (matout) { 843 *matout = B; 844 } else { 845 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 846 } 847 PetscFunctionReturn(0); 848 } 849 850 #include "petscblaslapack.h" 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatScale_MPIDense" 853 int MatScale_MPIDense(const PetscScalar *alpha,Mat inA) 854 { 855 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 856 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 857 int one = 1,nz; 858 859 PetscFunctionBegin; 860 nz = inA->m*inA->N; 861 BLscal_(&nz,(PetscScalar*)alpha,a->v,&one); 862 PetscLogFlops(nz); 863 PetscFunctionReturn(0); 864 } 865 866 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 870 int MatSetUpPreallocation_MPIDense(Mat A) 871 { 872 int ierr; 873 874 PetscFunctionBegin; 875 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 /* -------------------------------------------------------------------*/ 880 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 881 MatGetRow_MPIDense, 882 MatRestoreRow_MPIDense, 883 MatMult_MPIDense, 884 /* 4*/ MatMultAdd_MPIDense, 885 MatMultTranspose_MPIDense, 886 MatMultTransposeAdd_MPIDense, 887 0, 888 0, 889 0, 890 /*10*/ 0, 891 0, 892 0, 893 0, 894 MatTranspose_MPIDense, 895 /*15*/ MatGetInfo_MPIDense, 896 0, 897 MatGetDiagonal_MPIDense, 898 MatDiagonalScale_MPIDense, 899 MatNorm_MPIDense, 900 /*20*/ MatAssemblyBegin_MPIDense, 901 MatAssemblyEnd_MPIDense, 902 0, 903 MatSetOption_MPIDense, 904 MatZeroEntries_MPIDense, 905 /*25*/ MatZeroRows_MPIDense, 906 0, 907 0, 908 0, 909 0, 910 /*30*/ MatSetUpPreallocation_MPIDense, 911 0, 912 0, 913 MatGetArray_MPIDense, 914 MatRestoreArray_MPIDense, 915 /*35*/ MatDuplicate_MPIDense, 916 0, 917 0, 918 0, 919 0, 920 /*40*/ 0, 921 MatGetSubMatrices_MPIDense, 922 0, 923 MatGetValues_MPIDense, 924 0, 925 /*45*/ 0, 926 MatScale_MPIDense, 927 0, 928 0, 929 0, 930 /*50*/ MatGetBlockSize_MPIDense, 931 0, 932 0, 933 0, 934 0, 935 /*55*/ 0, 936 0, 937 0, 938 0, 939 0, 940 /*60*/ MatGetSubMatrix_MPIDense, 941 MatDestroy_MPIDense, 942 MatView_MPIDense, 943 MatGetPetscMaps_Petsc, 944 0, 945 /*65*/ 0, 946 0, 947 0, 948 0, 949 0, 950 /*70*/ 0, 951 0, 952 0, 953 0, 954 0, 955 /*75*/ 0, 956 0, 957 0, 958 0, 959 0, 960 /*80*/ 0, 961 0, 962 0, 963 0, 964 0, 965 /*85*/ MatLoad_MPIDense}; 966 967 EXTERN_C_BEGIN 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 970 int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 971 { 972 Mat_MPIDense *a; 973 int ierr; 974 975 PetscFunctionBegin; 976 mat->preallocated = PETSC_TRUE; 977 /* Note: For now, when data is specified above, this assumes the user correctly 978 allocates the local dense storage space. We should add error checking. */ 979 980 a = (Mat_MPIDense*)mat->data; 981 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 982 PetscLogObjectParent(mat,a->A); 983 PetscFunctionReturn(0); 984 } 985 EXTERN_C_END 986 987 /*MC 988 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 989 990 Options Database Keys: 991 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 992 993 Level: beginner 994 995 .seealso: MatCreateMPIDense 996 M*/ 997 998 EXTERN_C_BEGIN 999 #undef __FUNCT__ 1000 #define __FUNCT__ "MatCreate_MPIDense" 1001 int MatCreate_MPIDense(Mat mat) 1002 { 1003 Mat_MPIDense *a; 1004 int ierr,i; 1005 1006 PetscFunctionBegin; 1007 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1008 mat->data = (void*)a; 1009 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1010 mat->factor = 0; 1011 mat->mapping = 0; 1012 1013 a->factor = 0; 1014 mat->insertmode = NOT_SET_VALUES; 1015 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 1016 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 1017 1018 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 1019 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 1020 a->nvec = mat->n; 1021 1022 /* the information in the maps duplicates the information computed below, eventually 1023 we should remove the duplicate information that is not contained in the maps */ 1024 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 1025 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 1026 1027 /* build local table of row and column ownerships */ 1028 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1029 a->cowners = a->rowners + a->size + 1; 1030 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1031 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1032 a->rowners[0] = 0; 1033 for (i=2; i<=a->size; i++) { 1034 a->rowners[i] += a->rowners[i-1]; 1035 } 1036 a->rstart = a->rowners[a->rank]; 1037 a->rend = a->rowners[a->rank+1]; 1038 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1039 a->cowners[0] = 0; 1040 for (i=2; i<=a->size; i++) { 1041 a->cowners[i] += a->cowners[i-1]; 1042 } 1043 1044 /* build cache for off array entries formed */ 1045 a->donotstash = PETSC_FALSE; 1046 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1047 1048 /* stuff used for matrix vector multiply */ 1049 a->lvec = 0; 1050 a->Mvctx = 0; 1051 a->roworiented = PETSC_TRUE; 1052 1053 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1054 "MatGetDiagonalBlock_MPIDense", 1055 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1056 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1057 "MatMPIDenseSetPreallocation_MPIDense", 1058 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1059 PetscFunctionReturn(0); 1060 } 1061 EXTERN_C_END 1062 1063 /*MC 1064 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1065 1066 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1067 and MATMPIDENSE otherwise. 1068 1069 Options Database Keys: 1070 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1071 1072 Level: beginner 1073 1074 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1075 M*/ 1076 1077 EXTERN_C_BEGIN 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatCreate_Dense" 1080 int MatCreate_Dense(Mat A) { 1081 int ierr,size; 1082 1083 PetscFunctionBegin; 1084 ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr); 1085 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1086 if (size == 1) { 1087 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1088 } else { 1089 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1090 } 1091 PetscFunctionReturn(0); 1092 } 1093 EXTERN_C_END 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1097 /*@C 1098 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1099 1100 Not collective 1101 1102 Input Parameters: 1103 . A - the matrix 1104 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1105 to control all matrix memory allocation. 1106 1107 Notes: 1108 The dense format is fully compatible with standard Fortran 77 1109 storage by columns. 1110 1111 The data input variable is intended primarily for Fortran programmers 1112 who wish to allocate their own matrix memory space. Most users should 1113 set data=PETSC_NULL. 1114 1115 Level: intermediate 1116 1117 .keywords: matrix,dense, parallel 1118 1119 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1120 @*/ 1121 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1122 { 1123 int ierr,(*f)(Mat,PetscScalar *); 1124 1125 PetscFunctionBegin; 1126 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1127 if (f) { 1128 ierr = (*f)(mat,data);CHKERRQ(ierr); 1129 } 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatCreateMPIDense" 1135 /*@C 1136 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1137 1138 Collective on MPI_Comm 1139 1140 Input Parameters: 1141 + comm - MPI communicator 1142 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1143 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1144 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1145 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1146 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1147 to control all matrix memory allocation. 1148 1149 Output Parameter: 1150 . A - the matrix 1151 1152 Notes: 1153 The dense format is fully compatible with standard Fortran 77 1154 storage by columns. 1155 1156 The data input variable is intended primarily for Fortran programmers 1157 who wish to allocate their own matrix memory space. Most users should 1158 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1159 1160 The user MUST specify either the local or global matrix dimensions 1161 (possibly both). 1162 1163 Level: intermediate 1164 1165 .keywords: matrix,dense, parallel 1166 1167 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1168 @*/ 1169 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A) 1170 { 1171 int ierr,size; 1172 1173 PetscFunctionBegin; 1174 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1175 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1176 if (size > 1) { 1177 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1178 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1179 } else { 1180 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1181 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1182 } 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatDuplicate_MPIDense" 1188 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1189 { 1190 Mat mat; 1191 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1192 int ierr; 1193 1194 PetscFunctionBegin; 1195 *newmat = 0; 1196 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1197 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1198 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1199 mat->data = (void*)a; 1200 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1201 mat->factor = A->factor; 1202 mat->assembled = PETSC_TRUE; 1203 mat->preallocated = PETSC_TRUE; 1204 1205 a->rstart = oldmat->rstart; 1206 a->rend = oldmat->rend; 1207 a->size = oldmat->size; 1208 a->rank = oldmat->rank; 1209 mat->insertmode = NOT_SET_VALUES; 1210 a->nvec = oldmat->nvec; 1211 a->donotstash = oldmat->donotstash; 1212 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1213 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1214 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1215 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1216 1217 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1218 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1219 PetscLogObjectParent(mat,a->A); 1220 *newmat = mat; 1221 PetscFunctionReturn(0); 1222 } 1223 1224 #include "petscsys.h" 1225 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1228 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1229 { 1230 int *rowners,i,size,rank,m,ierr,nz,j; 1231 PetscScalar *array,*vals,*vals_ptr; 1232 MPI_Status status; 1233 1234 PetscFunctionBegin; 1235 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1236 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1237 1238 /* determine ownership of all rows */ 1239 m = M/size + ((M % size) > rank); 1240 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1241 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1242 rowners[0] = 0; 1243 for (i=2; i<=size; i++) { 1244 rowners[i] += rowners[i-1]; 1245 } 1246 1247 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1248 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1249 1250 if (!rank) { 1251 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1252 1253 /* read in my part of the matrix numerical values */ 1254 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1255 1256 /* insert into matrix-by row (this is why cannot directly read into array */ 1257 vals_ptr = vals; 1258 for (i=0; i<m; i++) { 1259 for (j=0; j<N; j++) { 1260 array[i + j*m] = *vals_ptr++; 1261 } 1262 } 1263 1264 /* read in other processors and ship out */ 1265 for (i=1; i<size; i++) { 1266 nz = (rowners[i+1] - rowners[i])*N; 1267 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1268 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1269 } 1270 } else { 1271 /* receive numeric values */ 1272 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1273 1274 /* receive message of values*/ 1275 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1276 1277 /* insert into matrix-by row (this is why cannot directly read into array */ 1278 vals_ptr = vals; 1279 for (i=0; i<m; i++) { 1280 for (j=0; j<N; j++) { 1281 array[i + j*m] = *vals_ptr++; 1282 } 1283 } 1284 } 1285 ierr = PetscFree(rowners);CHKERRQ(ierr); 1286 ierr = PetscFree(vals);CHKERRQ(ierr); 1287 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1288 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatLoad_MPIDense" 1294 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat) 1295 { 1296 Mat A; 1297 PetscScalar *vals,*svals; 1298 MPI_Comm comm = ((PetscObject)viewer)->comm; 1299 MPI_Status status; 1300 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1301 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1302 int tag = ((PetscObject)viewer)->tag; 1303 int i,nz,ierr,j,rstart,rend,fd; 1304 1305 PetscFunctionBegin; 1306 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1307 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1308 if (!rank) { 1309 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1310 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1311 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1312 } 1313 1314 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1315 M = header[1]; N = header[2]; nz = header[3]; 1316 1317 /* 1318 Handle case where matrix is stored on disk as a dense matrix 1319 */ 1320 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1321 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 /* determine ownership of all rows */ 1326 m = M/size + ((M % size) > rank); 1327 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1328 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1329 rowners[0] = 0; 1330 for (i=2; i<=size; i++) { 1331 rowners[i] += rowners[i-1]; 1332 } 1333 rstart = rowners[rank]; 1334 rend = rowners[rank+1]; 1335 1336 /* distribute row lengths to all processors */ 1337 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1338 offlens = ourlens + (rend-rstart); 1339 if (!rank) { 1340 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1341 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1342 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1343 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1344 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1345 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1346 } else { 1347 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1348 } 1349 1350 if (!rank) { 1351 /* calculate the number of nonzeros on each processor */ 1352 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1353 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1354 for (i=0; i<size; i++) { 1355 for (j=rowners[i]; j< rowners[i+1]; j++) { 1356 procsnz[i] += rowlengths[j]; 1357 } 1358 } 1359 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1360 1361 /* determine max buffer needed and allocate it */ 1362 maxnz = 0; 1363 for (i=0; i<size; i++) { 1364 maxnz = PetscMax(maxnz,procsnz[i]); 1365 } 1366 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1367 1368 /* read in my part of the matrix column indices */ 1369 nz = procsnz[0]; 1370 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1371 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1372 1373 /* read in every one elses and ship off */ 1374 for (i=1; i<size; i++) { 1375 nz = procsnz[i]; 1376 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1377 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1378 } 1379 ierr = PetscFree(cols);CHKERRQ(ierr); 1380 } else { 1381 /* determine buffer space needed for message */ 1382 nz = 0; 1383 for (i=0; i<m; i++) { 1384 nz += ourlens[i]; 1385 } 1386 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1387 1388 /* receive message of column indices*/ 1389 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1390 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1391 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1392 } 1393 1394 /* loop over local rows, determining number of off diagonal entries */ 1395 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1396 jj = 0; 1397 for (i=0; i<m; i++) { 1398 for (j=0; j<ourlens[i]; j++) { 1399 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1400 jj++; 1401 } 1402 } 1403 1404 /* create our matrix */ 1405 for (i=0; i<m; i++) { 1406 ourlens[i] -= offlens[i]; 1407 } 1408 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1409 A = *newmat; 1410 for (i=0; i<m; i++) { 1411 ourlens[i] += offlens[i]; 1412 } 1413 1414 if (!rank) { 1415 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1416 1417 /* read in my part of the matrix numerical values */ 1418 nz = procsnz[0]; 1419 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1420 1421 /* insert into matrix */ 1422 jj = rstart; 1423 smycols = mycols; 1424 svals = vals; 1425 for (i=0; i<m; i++) { 1426 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1427 smycols += ourlens[i]; 1428 svals += ourlens[i]; 1429 jj++; 1430 } 1431 1432 /* read in other processors and ship out */ 1433 for (i=1; i<size; i++) { 1434 nz = procsnz[i]; 1435 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1436 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1437 } 1438 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1439 } else { 1440 /* receive numeric values */ 1441 ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1442 1443 /* receive message of values*/ 1444 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1445 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1446 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1447 1448 /* insert into matrix */ 1449 jj = rstart; 1450 smycols = mycols; 1451 svals = vals; 1452 for (i=0; i<m; i++) { 1453 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1454 smycols += ourlens[i]; 1455 svals += ourlens[i]; 1456 jj++; 1457 } 1458 } 1459 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1460 ierr = PetscFree(vals);CHKERRQ(ierr); 1461 ierr = PetscFree(mycols);CHKERRQ(ierr); 1462 ierr = PetscFree(rowners);CHKERRQ(ierr); 1463 1464 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1465 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1466 PetscFunctionReturn(0); 1467 } 1468 1469 1470 1471 1472 1473