1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,ierr; 17 PetscScalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 27 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 28 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 29 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 30 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 33 *iscopy = PETSC_TRUE; 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatSetValues_MPIDense" 40 int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv) 41 { 42 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 43 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 44 PetscTruth roworiented = A->roworiented; 45 46 PetscFunctionBegin; 47 for (i=0; i<m; i++) { 48 if (idxm[i] < 0) continue; 49 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 50 if (idxm[i] >= rstart && idxm[i] < rend) { 51 row = idxm[i] - rstart; 52 if (roworiented) { 53 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 54 } else { 55 for (j=0; j<n; j++) { 56 if (idxn[j] < 0) continue; 57 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 58 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 59 } 60 } 61 } else { 62 if (!A->donotstash) { 63 if (roworiented) { 64 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 65 } else { 66 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 67 } 68 } 69 } 70 } 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetValues_MPIDense" 76 int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 79 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 80 81 PetscFunctionBegin; 82 for (i=0; i<m; i++) { 83 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 84 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 85 if (idxm[i] >= rstart && idxm[i] < rend) { 86 row = idxm[i] - rstart; 87 for (j=0; j<n; j++) { 88 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 89 if (idxn[j] >= mat->N) { 90 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 91 } 92 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 93 } 94 } else { 95 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 96 } 97 } 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatGetArray_MPIDense" 103 int MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 104 { 105 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 110 PetscFunctionReturn(0); 111 } 112 113 #undef __FUNCT__ 114 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 115 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 116 { 117 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 118 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 119 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 120 PetscScalar *av,*bv,*v = lmat->v; 121 Mat newmat; 122 123 PetscFunctionBegin; 124 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 125 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 126 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 127 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 128 129 /* No parallel redistribution currently supported! Should really check each index set 130 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 131 original matrix! */ 132 133 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 134 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 135 136 /* Check submatrix call */ 137 if (scall == MAT_REUSE_MATRIX) { 138 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 139 /* Really need to test rows and column sizes! */ 140 newmat = *B; 141 } else { 142 /* Create and fill new matrix */ 143 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 144 } 145 146 /* Now extract the data pointers and do the copy, column at a time */ 147 newmatd = (Mat_MPIDense*)newmat->data; 148 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 149 150 for (i=0; i<ncols; i++) { 151 av = v + nlrows*icol[i]; 152 for (j=0; j<nrows; j++) { 153 *bv++ = av[irow[j] - rstart]; 154 } 155 } 156 157 /* Assemble the matrices so that the correct flags are set */ 158 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 159 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 160 161 /* Free work space */ 162 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 163 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 164 *B = newmat; 165 PetscFunctionReturn(0); 166 } 167 168 #undef __FUNCT__ 169 #define __FUNCT__ "MatRestoreArray_MPIDense" 170 int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 171 { 172 PetscFunctionBegin; 173 PetscFunctionReturn(0); 174 } 175 176 #undef __FUNCT__ 177 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 178 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 179 { 180 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 181 MPI_Comm comm = mat->comm; 182 int ierr,nstash,reallocs; 183 InsertMode addv; 184 185 PetscFunctionBegin; 186 /* make sure all processors are either in INSERTMODE or ADDMODE */ 187 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 188 if (addv == (ADD_VALUES|INSERT_VALUES)) { 189 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 190 } 191 mat->insertmode = addv; /* in case this processor had no cache */ 192 193 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 194 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 195 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 196 PetscFunctionReturn(0); 197 } 198 199 #undef __FUNCT__ 200 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 201 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 202 { 203 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 204 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 205 PetscScalar *val; 206 InsertMode addv=mat->insertmode; 207 208 PetscFunctionBegin; 209 /* wait on receives */ 210 while (1) { 211 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 212 if (!flg) break; 213 214 for (i=0; i<n;) { 215 /* Now identify the consecutive vals belonging to the same row */ 216 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 217 if (j < n) ncols = j-i; 218 else ncols = n-i; 219 /* Now assemble all these values with a single function call */ 220 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 221 i = j; 222 } 223 } 224 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 225 226 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 227 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 228 229 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 230 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 231 } 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatZeroEntries_MPIDense" 237 int MatZeroEntries_MPIDense(Mat A) 238 { 239 int ierr; 240 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 241 242 PetscFunctionBegin; 243 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatGetBlockSize_MPIDense" 249 int MatGetBlockSize_MPIDense(Mat A,int *bs) 250 { 251 PetscFunctionBegin; 252 *bs = 1; 253 PetscFunctionReturn(0); 254 } 255 256 /* the code does not do the diagonal entries correctly unless the 257 matrix is square and the column and row owerships are identical. 258 This is a BUG. The only way to fix it seems to be to access 259 mdn->A and mdn->B directly and not through the MatZeroRows() 260 routine. 261 */ 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatZeroRows_MPIDense" 264 int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag) 265 { 266 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 267 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 268 int *nprocs,j,idx,nsends; 269 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 270 int *rvalues,tag = A->tag,count,base,slen,n,*source; 271 int *lens,imdex,*lrows,*values; 272 MPI_Comm comm = A->comm; 273 MPI_Request *send_waits,*recv_waits; 274 MPI_Status recv_status,*send_status; 275 IS istmp; 276 PetscTruth found; 277 278 PetscFunctionBegin; 279 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 280 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 281 282 /* first count number of contributors to each processor */ 283 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 284 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 285 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 286 for (i=0; i<N; i++) { 287 idx = rows[i]; 288 found = PETSC_FALSE; 289 for (j=0; j<size; j++) { 290 if (idx >= owners[j] && idx < owners[j+1]) { 291 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 292 } 293 } 294 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 295 } 296 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 297 298 /* inform other processors of number of messages and max length*/ 299 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 300 301 /* post receives: */ 302 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 303 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 304 for (i=0; i<nrecvs; i++) { 305 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 306 } 307 308 /* do sends: 309 1) starts[i] gives the starting index in svalues for stuff going to 310 the ith processor 311 */ 312 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 313 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 314 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 315 starts[0] = 0; 316 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 317 for (i=0; i<N; i++) { 318 svalues[starts[owner[i]]++] = rows[i]; 319 } 320 ISRestoreIndices(is,&rows); 321 322 starts[0] = 0; 323 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 324 count = 0; 325 for (i=0; i<size; i++) { 326 if (nprocs[2*i+1]) { 327 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 328 } 329 } 330 ierr = PetscFree(starts);CHKERRQ(ierr); 331 332 base = owners[rank]; 333 334 /* wait on receives */ 335 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 336 source = lens + nrecvs; 337 count = nrecvs; slen = 0; 338 while (count) { 339 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 340 /* unpack receives into our local space */ 341 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 342 source[imdex] = recv_status.MPI_SOURCE; 343 lens[imdex] = n; 344 slen += n; 345 count--; 346 } 347 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 348 349 /* move the data into the send scatter */ 350 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 351 count = 0; 352 for (i=0; i<nrecvs; i++) { 353 values = rvalues + i*nmax; 354 for (j=0; j<lens[i]; j++) { 355 lrows[count++] = values[j] - base; 356 } 357 } 358 ierr = PetscFree(rvalues);CHKERRQ(ierr); 359 ierr = PetscFree(lens);CHKERRQ(ierr); 360 ierr = PetscFree(owner);CHKERRQ(ierr); 361 ierr = PetscFree(nprocs);CHKERRQ(ierr); 362 363 /* actually zap the local rows */ 364 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 365 PetscLogObjectParent(A,istmp); 366 ierr = PetscFree(lrows);CHKERRQ(ierr); 367 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 368 ierr = ISDestroy(istmp);CHKERRQ(ierr); 369 370 /* wait on sends */ 371 if (nsends) { 372 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 373 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 374 ierr = PetscFree(send_status);CHKERRQ(ierr); 375 } 376 ierr = PetscFree(send_waits);CHKERRQ(ierr); 377 ierr = PetscFree(svalues);CHKERRQ(ierr); 378 379 PetscFunctionReturn(0); 380 } 381 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatMult_MPIDense" 384 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 385 { 386 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 387 int ierr; 388 389 PetscFunctionBegin; 390 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 391 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 392 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 393 PetscFunctionReturn(0); 394 } 395 396 #undef __FUNCT__ 397 #define __FUNCT__ "MatMultAdd_MPIDense" 398 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 399 { 400 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 401 int ierr; 402 403 PetscFunctionBegin; 404 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 405 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 406 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "MatMultTranspose_MPIDense" 412 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 413 { 414 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 415 int ierr; 416 PetscScalar zero = 0.0; 417 418 PetscFunctionBegin; 419 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 420 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 421 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 422 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 428 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 429 { 430 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 431 int ierr; 432 433 PetscFunctionBegin; 434 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 435 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 436 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 437 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 438 PetscFunctionReturn(0); 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatGetDiagonal_MPIDense" 443 int MatGetDiagonal_MPIDense(Mat A,Vec v) 444 { 445 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 446 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 447 int ierr,len,i,n,m = A->m,radd; 448 PetscScalar *x,zero = 0.0; 449 450 PetscFunctionBegin; 451 ierr = VecSet(&zero,v);CHKERRQ(ierr); 452 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 453 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 454 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 455 len = PetscMin(a->A->m,a->A->n); 456 radd = a->rstart*m; 457 for (i=0; i<len; i++) { 458 x[i] = aloc->v[radd + i*m + i]; 459 } 460 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatDestroy_MPIDense" 466 int MatDestroy_MPIDense(Mat mat) 467 { 468 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 469 int ierr; 470 471 PetscFunctionBegin; 472 473 #if defined(PETSC_USE_LOG) 474 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 475 #endif 476 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 477 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 478 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 479 if (mdn->lvec) VecDestroy(mdn->lvec); 480 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 481 if (mdn->factor) { 482 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 483 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 484 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 485 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 486 } 487 ierr = PetscFree(mdn);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatView_MPIDense_Binary" 493 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 494 { 495 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 496 int ierr; 497 498 PetscFunctionBegin; 499 if (mdn->size == 1) { 500 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 501 } 502 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 508 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 509 { 510 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 511 int ierr,size = mdn->size,rank = mdn->rank; 512 PetscViewerType vtype; 513 PetscTruth isascii,isdraw; 514 PetscViewer sviewer; 515 PetscViewerFormat format; 516 517 PetscFunctionBegin; 518 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 519 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 520 if (isascii) { 521 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 522 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 523 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 524 MatInfo info; 525 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 526 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 527 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 528 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 529 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 530 PetscFunctionReturn(0); 531 } else if (format == PETSC_VIEWER_ASCII_INFO) { 532 PetscFunctionReturn(0); 533 } 534 } else if (isdraw) { 535 PetscDraw draw; 536 PetscTruth isnull; 537 538 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 539 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 540 if (isnull) PetscFunctionReturn(0); 541 } 542 543 if (size == 1) { 544 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 545 } else { 546 /* assemble the entire matrix onto first processor. */ 547 Mat A; 548 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 549 PetscScalar *vals; 550 551 if (!rank) { 552 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 553 } else { 554 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 555 } 556 PetscLogObjectParent(mat,A); 557 558 /* Copy the matrix ... This isn't the most efficient means, 559 but it's quick for now */ 560 row = mdn->rstart; m = mdn->A->m; 561 for (i=0; i<m; i++) { 562 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 563 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 564 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 565 row++; 566 } 567 568 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 569 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 570 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 571 if (!rank) { 572 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 573 } 574 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 575 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 576 ierr = MatDestroy(A);CHKERRQ(ierr); 577 } 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatView_MPIDense" 583 int MatView_MPIDense(Mat mat,PetscViewer viewer) 584 { 585 int ierr; 586 PetscTruth isascii,isbinary,isdraw,issocket; 587 588 PetscFunctionBegin; 589 590 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 591 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 592 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 593 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 594 595 if (isascii || issocket || isdraw) { 596 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 597 } else if (isbinary) { 598 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 599 } else { 600 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 601 } 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatGetInfo_MPIDense" 607 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 608 { 609 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 610 Mat mdn = mat->A; 611 int ierr; 612 PetscReal isend[5],irecv[5]; 613 614 PetscFunctionBegin; 615 info->rows_global = (double)A->M; 616 info->columns_global = (double)A->N; 617 info->rows_local = (double)A->m; 618 info->columns_local = (double)A->N; 619 info->block_size = 1.0; 620 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 621 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 622 isend[3] = info->memory; isend[4] = info->mallocs; 623 if (flag == MAT_LOCAL) { 624 info->nz_used = isend[0]; 625 info->nz_allocated = isend[1]; 626 info->nz_unneeded = isend[2]; 627 info->memory = isend[3]; 628 info->mallocs = isend[4]; 629 } else if (flag == MAT_GLOBAL_MAX) { 630 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 631 info->nz_used = irecv[0]; 632 info->nz_allocated = irecv[1]; 633 info->nz_unneeded = irecv[2]; 634 info->memory = irecv[3]; 635 info->mallocs = irecv[4]; 636 } else if (flag == MAT_GLOBAL_SUM) { 637 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 638 info->nz_used = irecv[0]; 639 info->nz_allocated = irecv[1]; 640 info->nz_unneeded = irecv[2]; 641 info->memory = irecv[3]; 642 info->mallocs = irecv[4]; 643 } 644 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 645 info->fill_ratio_needed = 0; 646 info->factor_mallocs = 0; 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatSetOption_MPIDense" 652 int MatSetOption_MPIDense(Mat A,MatOption op) 653 { 654 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 655 int ierr; 656 657 PetscFunctionBegin; 658 switch (op) { 659 case MAT_NO_NEW_NONZERO_LOCATIONS: 660 case MAT_YES_NEW_NONZERO_LOCATIONS: 661 case MAT_NEW_NONZERO_LOCATION_ERR: 662 case MAT_NEW_NONZERO_ALLOCATION_ERR: 663 case MAT_COLUMNS_SORTED: 664 case MAT_COLUMNS_UNSORTED: 665 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 666 break; 667 case MAT_ROW_ORIENTED: 668 a->roworiented = PETSC_TRUE; 669 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 670 break; 671 case MAT_ROWS_SORTED: 672 case MAT_ROWS_UNSORTED: 673 case MAT_YES_NEW_DIAGONALS: 674 case MAT_USE_HASH_TABLE: 675 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 676 break; 677 case MAT_COLUMN_ORIENTED: 678 a->roworiented = PETSC_FALSE; 679 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 680 break; 681 case MAT_IGNORE_OFF_PROC_ENTRIES: 682 a->donotstash = PETSC_TRUE; 683 break; 684 case MAT_NO_NEW_DIAGONALS: 685 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 686 case MAT_SYMMETRIC: 687 case MAT_STRUCTURALLY_SYMMETRIC: 688 case MAT_NOT_SYMMETRIC: 689 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 690 case MAT_HERMITIAN: 691 case MAT_NOT_HERMITIAN: 692 case MAT_SYMMETRY_ETERNAL: 693 case MAT_NOT_SYMMETRY_ETERNAL: 694 break; 695 default: 696 SETERRQ(PETSC_ERR_SUP,"unknown option"); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatGetRow_MPIDense" 703 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v) 704 { 705 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 706 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 707 708 PetscFunctionBegin; 709 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 710 lrow = row - rstart; 711 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatRestoreRow_MPIDense" 717 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 718 { 719 int ierr; 720 721 PetscFunctionBegin; 722 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 723 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatDiagonalScale_MPIDense" 729 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 730 { 731 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 732 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 733 PetscScalar *l,*r,x,*v; 734 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 735 736 PetscFunctionBegin; 737 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 738 if (ll) { 739 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 740 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 741 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 742 for (i=0; i<m; i++) { 743 x = l[i]; 744 v = mat->v + i; 745 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 746 } 747 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 748 PetscLogFlops(n*m); 749 } 750 if (rr) { 751 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 752 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 753 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 754 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 755 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 756 for (i=0; i<n; i++) { 757 x = r[i]; 758 v = mat->v + i*m; 759 for (j=0; j<m; j++) { (*v++) *= x;} 760 } 761 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 762 PetscLogFlops(n*m); 763 } 764 PetscFunctionReturn(0); 765 } 766 767 #undef __FUNCT__ 768 #define __FUNCT__ "MatNorm_MPIDense" 769 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 770 { 771 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 772 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 773 int ierr,i,j; 774 PetscReal sum = 0.0; 775 PetscScalar *v = mat->v; 776 777 PetscFunctionBegin; 778 if (mdn->size == 1) { 779 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 780 } else { 781 if (type == NORM_FROBENIUS) { 782 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 783 #if defined(PETSC_USE_COMPLEX) 784 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 785 #else 786 sum += (*v)*(*v); v++; 787 #endif 788 } 789 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 790 *nrm = sqrt(*nrm); 791 PetscLogFlops(2*mdn->A->n*mdn->A->m); 792 } else if (type == NORM_1) { 793 PetscReal *tmp,*tmp2; 794 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 795 tmp2 = tmp + A->N; 796 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 797 *nrm = 0.0; 798 v = mat->v; 799 for (j=0; j<mdn->A->n; j++) { 800 for (i=0; i<mdn->A->m; i++) { 801 tmp[j] += PetscAbsScalar(*v); v++; 802 } 803 } 804 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 805 for (j=0; j<A->N; j++) { 806 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 807 } 808 ierr = PetscFree(tmp);CHKERRQ(ierr); 809 PetscLogFlops(A->n*A->m); 810 } else if (type == NORM_INFINITY) { /* max row norm */ 811 PetscReal ntemp; 812 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 813 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 814 } else { 815 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 816 } 817 } 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNCT__ 822 #define __FUNCT__ "MatTranspose_MPIDense" 823 int MatTranspose_MPIDense(Mat A,Mat *matout) 824 { 825 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 826 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 827 Mat B; 828 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 829 int j,i,ierr; 830 PetscScalar *v; 831 832 PetscFunctionBegin; 833 if (!matout && M != N) { 834 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 835 } 836 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 837 838 m = a->A->m; n = a->A->n; v = Aloc->v; 839 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 840 for (j=0; j<n; j++) { 841 for (i=0; i<m; i++) rwork[i] = rstart + i; 842 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 843 v += m; 844 } 845 ierr = PetscFree(rwork);CHKERRQ(ierr); 846 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 847 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 848 if (matout) { 849 *matout = B; 850 } else { 851 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 852 } 853 PetscFunctionReturn(0); 854 } 855 856 #include "petscblaslapack.h" 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatScale_MPIDense" 859 int MatScale_MPIDense(const PetscScalar *alpha,Mat inA) 860 { 861 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 862 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 863 int one = 1,nz; 864 865 PetscFunctionBegin; 866 nz = inA->m*inA->N; 867 BLscal_(&nz,(PetscScalar*)alpha,a->v,&one); 868 PetscLogFlops(nz); 869 PetscFunctionReturn(0); 870 } 871 872 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 876 int MatSetUpPreallocation_MPIDense(Mat A) 877 { 878 int ierr; 879 880 PetscFunctionBegin; 881 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 882 PetscFunctionReturn(0); 883 } 884 885 /* -------------------------------------------------------------------*/ 886 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 887 MatGetRow_MPIDense, 888 MatRestoreRow_MPIDense, 889 MatMult_MPIDense, 890 /* 4*/ MatMultAdd_MPIDense, 891 MatMultTranspose_MPIDense, 892 MatMultTransposeAdd_MPIDense, 893 0, 894 0, 895 0, 896 /*10*/ 0, 897 0, 898 0, 899 0, 900 MatTranspose_MPIDense, 901 /*15*/ MatGetInfo_MPIDense, 902 0, 903 MatGetDiagonal_MPIDense, 904 MatDiagonalScale_MPIDense, 905 MatNorm_MPIDense, 906 /*20*/ MatAssemblyBegin_MPIDense, 907 MatAssemblyEnd_MPIDense, 908 0, 909 MatSetOption_MPIDense, 910 MatZeroEntries_MPIDense, 911 /*25*/ MatZeroRows_MPIDense, 912 0, 913 0, 914 0, 915 0, 916 /*30*/ MatSetUpPreallocation_MPIDense, 917 0, 918 0, 919 MatGetArray_MPIDense, 920 MatRestoreArray_MPIDense, 921 /*35*/ MatDuplicate_MPIDense, 922 0, 923 0, 924 0, 925 0, 926 /*40*/ 0, 927 MatGetSubMatrices_MPIDense, 928 0, 929 MatGetValues_MPIDense, 930 0, 931 /*45*/ 0, 932 MatScale_MPIDense, 933 0, 934 0, 935 0, 936 /*50*/ MatGetBlockSize_MPIDense, 937 0, 938 0, 939 0, 940 0, 941 /*55*/ 0, 942 0, 943 0, 944 0, 945 0, 946 /*60*/ MatGetSubMatrix_MPIDense, 947 MatDestroy_MPIDense, 948 MatView_MPIDense, 949 MatGetPetscMaps_Petsc, 950 0, 951 /*65*/ 0, 952 0, 953 0, 954 0, 955 0, 956 /*70*/ 0, 957 0, 958 0, 959 0, 960 0, 961 /*75*/ 0, 962 0, 963 0, 964 0, 965 0, 966 /*80*/ 0, 967 0, 968 0, 969 0, 970 0, 971 /*85*/ MatLoad_MPIDense}; 972 973 EXTERN_C_BEGIN 974 #undef __FUNCT__ 975 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 976 int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 977 { 978 Mat_MPIDense *a; 979 int ierr; 980 981 PetscFunctionBegin; 982 mat->preallocated = PETSC_TRUE; 983 /* Note: For now, when data is specified above, this assumes the user correctly 984 allocates the local dense storage space. We should add error checking. */ 985 986 a = (Mat_MPIDense*)mat->data; 987 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 988 PetscLogObjectParent(mat,a->A); 989 PetscFunctionReturn(0); 990 } 991 EXTERN_C_END 992 993 /*MC 994 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 995 996 Options Database Keys: 997 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 998 999 Level: beginner 1000 1001 .seealso: MatCreateMPIDense 1002 M*/ 1003 1004 EXTERN_C_BEGIN 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatCreate_MPIDense" 1007 int MatCreate_MPIDense(Mat mat) 1008 { 1009 Mat_MPIDense *a; 1010 int ierr,i; 1011 1012 PetscFunctionBegin; 1013 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1014 mat->data = (void*)a; 1015 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1016 mat->factor = 0; 1017 mat->mapping = 0; 1018 1019 a->factor = 0; 1020 mat->insertmode = NOT_SET_VALUES; 1021 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 1022 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 1023 1024 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 1025 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 1026 a->nvec = mat->n; 1027 1028 /* the information in the maps duplicates the information computed below, eventually 1029 we should remove the duplicate information that is not contained in the maps */ 1030 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 1031 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 1032 1033 /* build local table of row and column ownerships */ 1034 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1035 a->cowners = a->rowners + a->size + 1; 1036 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1037 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1038 a->rowners[0] = 0; 1039 for (i=2; i<=a->size; i++) { 1040 a->rowners[i] += a->rowners[i-1]; 1041 } 1042 a->rstart = a->rowners[a->rank]; 1043 a->rend = a->rowners[a->rank+1]; 1044 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1045 a->cowners[0] = 0; 1046 for (i=2; i<=a->size; i++) { 1047 a->cowners[i] += a->cowners[i-1]; 1048 } 1049 1050 /* build cache for off array entries formed */ 1051 a->donotstash = PETSC_FALSE; 1052 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1053 1054 /* stuff used for matrix vector multiply */ 1055 a->lvec = 0; 1056 a->Mvctx = 0; 1057 a->roworiented = PETSC_TRUE; 1058 1059 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1060 "MatGetDiagonalBlock_MPIDense", 1061 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1062 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1063 "MatMPIDenseSetPreallocation_MPIDense", 1064 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1065 PetscFunctionReturn(0); 1066 } 1067 EXTERN_C_END 1068 1069 /*MC 1070 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1071 1072 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1073 and MATMPIDENSE otherwise. 1074 1075 Options Database Keys: 1076 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1077 1078 Level: beginner 1079 1080 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1081 M*/ 1082 1083 EXTERN_C_BEGIN 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatCreate_Dense" 1086 int MatCreate_Dense(Mat A) { 1087 int ierr,size; 1088 1089 PetscFunctionBegin; 1090 ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr); 1091 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1092 if (size == 1) { 1093 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1094 } else { 1095 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1096 } 1097 PetscFunctionReturn(0); 1098 } 1099 EXTERN_C_END 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1103 /*@C 1104 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1105 1106 Not collective 1107 1108 Input Parameters: 1109 . A - the matrix 1110 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1111 to control all matrix memory allocation. 1112 1113 Notes: 1114 The dense format is fully compatible with standard Fortran 77 1115 storage by columns. 1116 1117 The data input variable is intended primarily for Fortran programmers 1118 who wish to allocate their own matrix memory space. Most users should 1119 set data=PETSC_NULL. 1120 1121 Level: intermediate 1122 1123 .keywords: matrix,dense, parallel 1124 1125 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1126 @*/ 1127 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1128 { 1129 int ierr,(*f)(Mat,PetscScalar *); 1130 1131 PetscFunctionBegin; 1132 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1133 if (f) { 1134 ierr = (*f)(mat,data);CHKERRQ(ierr); 1135 } 1136 PetscFunctionReturn(0); 1137 } 1138 1139 #undef __FUNCT__ 1140 #define __FUNCT__ "MatCreateMPIDense" 1141 /*@C 1142 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1143 1144 Collective on MPI_Comm 1145 1146 Input Parameters: 1147 + comm - MPI communicator 1148 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1149 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1150 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1151 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1152 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1153 to control all matrix memory allocation. 1154 1155 Output Parameter: 1156 . A - the matrix 1157 1158 Notes: 1159 The dense format is fully compatible with standard Fortran 77 1160 storage by columns. 1161 1162 The data input variable is intended primarily for Fortran programmers 1163 who wish to allocate their own matrix memory space. Most users should 1164 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1165 1166 The user MUST specify either the local or global matrix dimensions 1167 (possibly both). 1168 1169 Level: intermediate 1170 1171 .keywords: matrix,dense, parallel 1172 1173 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1174 @*/ 1175 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A) 1176 { 1177 int ierr,size; 1178 1179 PetscFunctionBegin; 1180 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1181 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1182 if (size > 1) { 1183 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1184 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1185 } else { 1186 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1187 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1188 } 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatDuplicate_MPIDense" 1194 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1195 { 1196 Mat mat; 1197 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1198 int ierr; 1199 1200 PetscFunctionBegin; 1201 *newmat = 0; 1202 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1203 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1204 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1205 mat->data = (void*)a; 1206 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1207 mat->factor = A->factor; 1208 mat->assembled = PETSC_TRUE; 1209 mat->preallocated = PETSC_TRUE; 1210 1211 a->rstart = oldmat->rstart; 1212 a->rend = oldmat->rend; 1213 a->size = oldmat->size; 1214 a->rank = oldmat->rank; 1215 mat->insertmode = NOT_SET_VALUES; 1216 a->nvec = oldmat->nvec; 1217 a->donotstash = oldmat->donotstash; 1218 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1219 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1220 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1221 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1222 1223 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1224 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1225 PetscLogObjectParent(mat,a->A); 1226 *newmat = mat; 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #include "petscsys.h" 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1234 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1235 { 1236 int *rowners,i,size,rank,m,ierr,nz,j; 1237 PetscScalar *array,*vals,*vals_ptr; 1238 MPI_Status status; 1239 1240 PetscFunctionBegin; 1241 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1242 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1243 1244 /* determine ownership of all rows */ 1245 m = M/size + ((M % size) > rank); 1246 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1247 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1248 rowners[0] = 0; 1249 for (i=2; i<=size; i++) { 1250 rowners[i] += rowners[i-1]; 1251 } 1252 1253 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1254 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1255 1256 if (!rank) { 1257 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1258 1259 /* read in my part of the matrix numerical values */ 1260 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1261 1262 /* insert into matrix-by row (this is why cannot directly read into array */ 1263 vals_ptr = vals; 1264 for (i=0; i<m; i++) { 1265 for (j=0; j<N; j++) { 1266 array[i + j*m] = *vals_ptr++; 1267 } 1268 } 1269 1270 /* read in other processors and ship out */ 1271 for (i=1; i<size; i++) { 1272 nz = (rowners[i+1] - rowners[i])*N; 1273 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1274 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1275 } 1276 } else { 1277 /* receive numeric values */ 1278 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1279 1280 /* receive message of values*/ 1281 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1282 1283 /* insert into matrix-by row (this is why cannot directly read into array */ 1284 vals_ptr = vals; 1285 for (i=0; i<m; i++) { 1286 for (j=0; j<N; j++) { 1287 array[i + j*m] = *vals_ptr++; 1288 } 1289 } 1290 } 1291 ierr = PetscFree(rowners);CHKERRQ(ierr); 1292 ierr = PetscFree(vals);CHKERRQ(ierr); 1293 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1294 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "MatLoad_MPIDense" 1300 int MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1301 { 1302 Mat A; 1303 PetscScalar *vals,*svals; 1304 MPI_Comm comm = ((PetscObject)viewer)->comm; 1305 MPI_Status status; 1306 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1307 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1308 int tag = ((PetscObject)viewer)->tag; 1309 int i,nz,ierr,j,rstart,rend,fd; 1310 1311 PetscFunctionBegin; 1312 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1313 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1314 if (!rank) { 1315 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1316 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1317 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1318 } 1319 1320 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1321 M = header[1]; N = header[2]; nz = header[3]; 1322 1323 /* 1324 Handle case where matrix is stored on disk as a dense matrix 1325 */ 1326 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1327 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 /* determine ownership of all rows */ 1332 m = M/size + ((M % size) > rank); 1333 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1334 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1335 rowners[0] = 0; 1336 for (i=2; i<=size; i++) { 1337 rowners[i] += rowners[i-1]; 1338 } 1339 rstart = rowners[rank]; 1340 rend = rowners[rank+1]; 1341 1342 /* distribute row lengths to all processors */ 1343 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1344 offlens = ourlens + (rend-rstart); 1345 if (!rank) { 1346 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1347 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1348 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1349 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1350 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1351 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1352 } else { 1353 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1354 } 1355 1356 if (!rank) { 1357 /* calculate the number of nonzeros on each processor */ 1358 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1359 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1360 for (i=0; i<size; i++) { 1361 for (j=rowners[i]; j< rowners[i+1]; j++) { 1362 procsnz[i] += rowlengths[j]; 1363 } 1364 } 1365 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1366 1367 /* determine max buffer needed and allocate it */ 1368 maxnz = 0; 1369 for (i=0; i<size; i++) { 1370 maxnz = PetscMax(maxnz,procsnz[i]); 1371 } 1372 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1373 1374 /* read in my part of the matrix column indices */ 1375 nz = procsnz[0]; 1376 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1377 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1378 1379 /* read in every one elses and ship off */ 1380 for (i=1; i<size; i++) { 1381 nz = procsnz[i]; 1382 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1383 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1384 } 1385 ierr = PetscFree(cols);CHKERRQ(ierr); 1386 } else { 1387 /* determine buffer space needed for message */ 1388 nz = 0; 1389 for (i=0; i<m; i++) { 1390 nz += ourlens[i]; 1391 } 1392 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1393 1394 /* receive message of column indices*/ 1395 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1396 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1397 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1398 } 1399 1400 /* loop over local rows, determining number of off diagonal entries */ 1401 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1402 jj = 0; 1403 for (i=0; i<m; i++) { 1404 for (j=0; j<ourlens[i]; j++) { 1405 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1406 jj++; 1407 } 1408 } 1409 1410 /* create our matrix */ 1411 for (i=0; i<m; i++) { 1412 ourlens[i] -= offlens[i]; 1413 } 1414 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1415 A = *newmat; 1416 for (i=0; i<m; i++) { 1417 ourlens[i] += offlens[i]; 1418 } 1419 1420 if (!rank) { 1421 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1422 1423 /* read in my part of the matrix numerical values */ 1424 nz = procsnz[0]; 1425 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1426 1427 /* insert into matrix */ 1428 jj = rstart; 1429 smycols = mycols; 1430 svals = vals; 1431 for (i=0; i<m; i++) { 1432 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1433 smycols += ourlens[i]; 1434 svals += ourlens[i]; 1435 jj++; 1436 } 1437 1438 /* read in other processors and ship out */ 1439 for (i=1; i<size; i++) { 1440 nz = procsnz[i]; 1441 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1442 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1443 } 1444 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1445 } else { 1446 /* receive numeric values */ 1447 ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1448 1449 /* receive message of values*/ 1450 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1451 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1452 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1453 1454 /* insert into matrix */ 1455 jj = rstart; 1456 smycols = mycols; 1457 svals = vals; 1458 for (i=0; i<m; i++) { 1459 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1460 smycols += ourlens[i]; 1461 svals += ourlens[i]; 1462 jj++; 1463 } 1464 } 1465 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1466 ierr = PetscFree(vals);CHKERRQ(ierr); 1467 ierr = PetscFree(mycols);CHKERRQ(ierr); 1468 ierr = PetscFree(rowners);CHKERRQ(ierr); 1469 1470 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1471 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } 1474 1475 1476 1477 1478 1479