1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,ierr; 17 PetscScalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 27 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 28 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 29 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 30 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 33 *iscopy = PETSC_TRUE; 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN int MatSetUpMultiply_MPIDense(Mat); 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatSetValues_MPIDense" 42 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv) 43 { 44 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 45 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 46 PetscTruth roworiented = A->roworiented; 47 48 PetscFunctionBegin; 49 for (i=0; i<m; i++) { 50 if (idxm[i] < 0) continue; 51 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 52 if (idxm[i] >= rstart && idxm[i] < rend) { 53 row = idxm[i] - rstart; 54 if (roworiented) { 55 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 56 } else { 57 for (j=0; j<n; j++) { 58 if (idxn[j] < 0) continue; 59 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 60 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 61 } 62 } 63 } else { 64 if (!A->donotstash) { 65 if (roworiented) { 66 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 67 } else { 68 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 69 } 70 } 71 } 72 } 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetValues_MPIDense" 78 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 79 { 80 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 81 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 82 83 PetscFunctionBegin; 84 for (i=0; i<m; i++) { 85 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 86 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 87 if (idxm[i] >= rstart && idxm[i] < rend) { 88 row = idxm[i] - rstart; 89 for (j=0; j<n; j++) { 90 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 91 if (idxn[j] >= mat->N) { 92 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 93 } 94 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 95 } 96 } else { 97 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 98 } 99 } 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatGetArray_MPIDense" 105 int MatGetArray_MPIDense(Mat A,PetscScalar **array) 106 { 107 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 108 int ierr; 109 110 PetscFunctionBegin; 111 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 118 { 119 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 120 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 121 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 122 PetscScalar *av,*bv,*v = lmat->v; 123 Mat newmat; 124 125 PetscFunctionBegin; 126 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 127 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 128 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 129 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 130 131 /* No parallel redistribution currently supported! Should really check each index set 132 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 133 original matrix! */ 134 135 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 137 138 /* Check submatrix call */ 139 if (scall == MAT_REUSE_MATRIX) { 140 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 141 /* Really need to test rows and column sizes! */ 142 newmat = *B; 143 } else { 144 /* Create and fill new matrix */ 145 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 146 } 147 148 /* Now extract the data pointers and do the copy, column at a time */ 149 newmatd = (Mat_MPIDense*)newmat->data; 150 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 151 152 for (i=0; i<ncols; i++) { 153 av = v + nlrows*icol[i]; 154 for (j=0; j<nrows; j++) { 155 *bv++ = av[irow[j] - rstart]; 156 } 157 } 158 159 /* Assemble the matrices so that the correct flags are set */ 160 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 161 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 162 163 /* Free work space */ 164 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 165 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 166 *B = newmat; 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatRestoreArray_MPIDense" 172 int MatRestoreArray_MPIDense(Mat A,PetscScalar **array) 173 { 174 PetscFunctionBegin; 175 PetscFunctionReturn(0); 176 } 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 181 { 182 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 183 MPI_Comm comm = mat->comm; 184 int ierr,nstash,reallocs; 185 InsertMode addv; 186 187 PetscFunctionBegin; 188 /* make sure all processors are either in INSERTMODE or ADDMODE */ 189 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 190 if (addv == (ADD_VALUES|INSERT_VALUES)) { 191 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 192 } 193 mat->insertmode = addv; /* in case this processor had no cache */ 194 195 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 196 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 197 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 198 PetscFunctionReturn(0); 199 } 200 201 #undef __FUNCT__ 202 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 204 { 205 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 206 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 207 PetscScalar *val; 208 InsertMode addv=mat->insertmode; 209 210 PetscFunctionBegin; 211 /* wait on receives */ 212 while (1) { 213 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 214 if (!flg) break; 215 216 for (i=0; i<n;) { 217 /* Now identify the consecutive vals belonging to the same row */ 218 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 219 if (j < n) ncols = j-i; 220 else ncols = n-i; 221 /* Now assemble all these values with a single function call */ 222 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 223 i = j; 224 } 225 } 226 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 227 228 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 229 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 230 231 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 232 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatZeroEntries_MPIDense" 239 int MatZeroEntries_MPIDense(Mat A) 240 { 241 int ierr; 242 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 243 244 PetscFunctionBegin; 245 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatGetBlockSize_MPIDense" 251 int MatGetBlockSize_MPIDense(Mat A,int *bs) 252 { 253 PetscFunctionBegin; 254 *bs = 1; 255 PetscFunctionReturn(0); 256 } 257 258 /* the code does not do the diagonal entries correctly unless the 259 matrix is square and the column and row owerships are identical. 260 This is a BUG. The only way to fix it seems to be to access 261 mdn->A and mdn->B directly and not through the MatZeroRows() 262 routine. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatZeroRows_MPIDense" 266 int MatZeroRows_MPIDense(Mat A,IS is,PetscScalar *diag) 267 { 268 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 269 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 270 int *nprocs,j,idx,nsends; 271 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 272 int *rvalues,tag = A->tag,count,base,slen,n,*source; 273 int *lens,imdex,*lrows,*values; 274 MPI_Comm comm = A->comm; 275 MPI_Request *send_waits,*recv_waits; 276 MPI_Status recv_status,*send_status; 277 IS istmp; 278 PetscTruth found; 279 280 PetscFunctionBegin; 281 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 282 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 283 284 /* first count number of contributors to each processor */ 285 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 286 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 287 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 288 for (i=0; i<N; i++) { 289 idx = rows[i]; 290 found = PETSC_FALSE; 291 for (j=0; j<size; j++) { 292 if (idx >= owners[j] && idx < owners[j+1]) { 293 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 294 } 295 } 296 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 297 } 298 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 299 300 /* inform other processors of number of messages and max length*/ 301 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 302 303 /* post receives: */ 304 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 305 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 306 for (i=0; i<nrecvs; i++) { 307 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 308 } 309 310 /* do sends: 311 1) starts[i] gives the starting index in svalues for stuff going to 312 the ith processor 313 */ 314 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 315 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 316 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 317 starts[0] = 0; 318 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 319 for (i=0; i<N; i++) { 320 svalues[starts[owner[i]]++] = rows[i]; 321 } 322 ISRestoreIndices(is,&rows); 323 324 starts[0] = 0; 325 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 326 count = 0; 327 for (i=0; i<size; i++) { 328 if (nprocs[2*i+1]) { 329 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 330 } 331 } 332 ierr = PetscFree(starts);CHKERRQ(ierr); 333 334 base = owners[rank]; 335 336 /* wait on receives */ 337 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 338 source = lens + nrecvs; 339 count = nrecvs; slen = 0; 340 while (count) { 341 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 342 /* unpack receives into our local space */ 343 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 344 source[imdex] = recv_status.MPI_SOURCE; 345 lens[imdex] = n; 346 slen += n; 347 count--; 348 } 349 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 350 351 /* move the data into the send scatter */ 352 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 353 count = 0; 354 for (i=0; i<nrecvs; i++) { 355 values = rvalues + i*nmax; 356 for (j=0; j<lens[i]; j++) { 357 lrows[count++] = values[j] - base; 358 } 359 } 360 ierr = PetscFree(rvalues);CHKERRQ(ierr); 361 ierr = PetscFree(lens);CHKERRQ(ierr); 362 ierr = PetscFree(owner);CHKERRQ(ierr); 363 ierr = PetscFree(nprocs);CHKERRQ(ierr); 364 365 /* actually zap the local rows */ 366 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 367 PetscLogObjectParent(A,istmp); 368 ierr = PetscFree(lrows);CHKERRQ(ierr); 369 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 370 ierr = ISDestroy(istmp);CHKERRQ(ierr); 371 372 /* wait on sends */ 373 if (nsends) { 374 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 375 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 376 ierr = PetscFree(send_status);CHKERRQ(ierr); 377 } 378 ierr = PetscFree(send_waits);CHKERRQ(ierr); 379 ierr = PetscFree(svalues);CHKERRQ(ierr); 380 381 PetscFunctionReturn(0); 382 } 383 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatMult_MPIDense" 386 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 387 { 388 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 389 int ierr; 390 391 PetscFunctionBegin; 392 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 393 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 394 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 #undef __FUNCT__ 399 #define __FUNCT__ "MatMultAdd_MPIDense" 400 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 401 { 402 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 403 int ierr; 404 405 PetscFunctionBegin; 406 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 407 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 408 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 #undef __FUNCT__ 413 #define __FUNCT__ "MatMultTranspose_MPIDense" 414 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 415 { 416 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 417 int ierr; 418 PetscScalar zero = 0.0; 419 420 PetscFunctionBegin; 421 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 422 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 423 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 424 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 430 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 431 { 432 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 433 int ierr; 434 435 PetscFunctionBegin; 436 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 437 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 438 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 439 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatGetDiagonal_MPIDense" 445 int MatGetDiagonal_MPIDense(Mat A,Vec v) 446 { 447 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 448 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 449 int ierr,len,i,n,m = A->m,radd; 450 PetscScalar *x,zero = 0.0; 451 452 PetscFunctionBegin; 453 ierr = VecSet(&zero,v);CHKERRQ(ierr); 454 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 455 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 456 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 457 len = PetscMin(a->A->m,a->A->n); 458 radd = a->rstart*m; 459 for (i=0; i<len; i++) { 460 x[i] = aloc->v[radd + i*m + i]; 461 } 462 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatDestroy_MPIDense" 468 int MatDestroy_MPIDense(Mat mat) 469 { 470 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 471 int ierr; 472 473 PetscFunctionBegin; 474 475 #if defined(PETSC_USE_LOG) 476 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 477 #endif 478 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 479 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 480 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 481 if (mdn->lvec) VecDestroy(mdn->lvec); 482 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 483 if (mdn->factor) { 484 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 485 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 486 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 487 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 488 } 489 ierr = PetscFree(mdn);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 #undef __FUNCT__ 494 #define __FUNCT__ "MatView_MPIDense_Binary" 495 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 496 { 497 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 498 int ierr; 499 500 PetscFunctionBegin; 501 if (mdn->size == 1) { 502 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 503 } 504 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 510 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 511 { 512 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 513 int ierr,size = mdn->size,rank = mdn->rank; 514 PetscViewerType vtype; 515 PetscTruth isascii,isdraw; 516 PetscViewer sviewer; 517 PetscViewerFormat format; 518 519 PetscFunctionBegin; 520 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 521 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 522 if (isascii) { 523 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 524 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 525 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 526 MatInfo info; 527 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 528 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 529 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 530 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 531 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 532 PetscFunctionReturn(0); 533 } else if (format == PETSC_VIEWER_ASCII_INFO) { 534 PetscFunctionReturn(0); 535 } 536 } else if (isdraw) { 537 PetscDraw draw; 538 PetscTruth isnull; 539 540 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 541 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 542 if (isnull) PetscFunctionReturn(0); 543 } 544 545 if (size == 1) { 546 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 547 } else { 548 /* assemble the entire matrix onto first processor. */ 549 Mat A; 550 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 551 PetscScalar *vals; 552 553 if (!rank) { 554 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 555 } else { 556 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 557 } 558 PetscLogObjectParent(mat,A); 559 560 /* Copy the matrix ... This isn't the most efficient means, 561 but it's quick for now */ 562 row = mdn->rstart; m = mdn->A->m; 563 for (i=0; i<m; i++) { 564 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 565 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 566 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 567 row++; 568 } 569 570 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 571 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 572 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 573 if (!rank) { 574 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 575 } 576 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 577 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 578 ierr = MatDestroy(A);CHKERRQ(ierr); 579 } 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatView_MPIDense" 585 int MatView_MPIDense(Mat mat,PetscViewer viewer) 586 { 587 int ierr; 588 PetscTruth isascii,isbinary,isdraw,issocket; 589 590 PetscFunctionBegin; 591 592 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 593 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 594 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 595 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 596 597 if (isascii || issocket || isdraw) { 598 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 599 } else if (isbinary) { 600 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 601 } else { 602 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatGetInfo_MPIDense" 609 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 610 { 611 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 612 Mat mdn = mat->A; 613 int ierr; 614 PetscReal isend[5],irecv[5]; 615 616 PetscFunctionBegin; 617 info->rows_global = (double)A->M; 618 info->columns_global = (double)A->N; 619 info->rows_local = (double)A->m; 620 info->columns_local = (double)A->N; 621 info->block_size = 1.0; 622 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 623 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 624 isend[3] = info->memory; isend[4] = info->mallocs; 625 if (flag == MAT_LOCAL) { 626 info->nz_used = isend[0]; 627 info->nz_allocated = isend[1]; 628 info->nz_unneeded = isend[2]; 629 info->memory = isend[3]; 630 info->mallocs = isend[4]; 631 } else if (flag == MAT_GLOBAL_MAX) { 632 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 633 info->nz_used = irecv[0]; 634 info->nz_allocated = irecv[1]; 635 info->nz_unneeded = irecv[2]; 636 info->memory = irecv[3]; 637 info->mallocs = irecv[4]; 638 } else if (flag == MAT_GLOBAL_SUM) { 639 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 640 info->nz_used = irecv[0]; 641 info->nz_allocated = irecv[1]; 642 info->nz_unneeded = irecv[2]; 643 info->memory = irecv[3]; 644 info->mallocs = irecv[4]; 645 } 646 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 647 info->fill_ratio_needed = 0; 648 info->factor_mallocs = 0; 649 PetscFunctionReturn(0); 650 } 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatSetOption_MPIDense" 654 int MatSetOption_MPIDense(Mat A,MatOption op) 655 { 656 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 657 int ierr; 658 659 PetscFunctionBegin; 660 switch (op) { 661 case MAT_NO_NEW_NONZERO_LOCATIONS: 662 case MAT_YES_NEW_NONZERO_LOCATIONS: 663 case MAT_NEW_NONZERO_LOCATION_ERR: 664 case MAT_NEW_NONZERO_ALLOCATION_ERR: 665 case MAT_COLUMNS_SORTED: 666 case MAT_COLUMNS_UNSORTED: 667 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 668 break; 669 case MAT_ROW_ORIENTED: 670 a->roworiented = PETSC_TRUE; 671 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 672 break; 673 case MAT_ROWS_SORTED: 674 case MAT_ROWS_UNSORTED: 675 case MAT_YES_NEW_DIAGONALS: 676 case MAT_USE_HASH_TABLE: 677 case MAT_USE_SINGLE_PRECISION_SOLVES: 678 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 679 break; 680 case MAT_COLUMN_ORIENTED: 681 a->roworiented = PETSC_FALSE; 682 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 683 break; 684 case MAT_IGNORE_OFF_PROC_ENTRIES: 685 a->donotstash = PETSC_TRUE; 686 break; 687 case MAT_NO_NEW_DIAGONALS: 688 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 689 default: 690 SETERRQ(PETSC_ERR_SUP,"unknown option"); 691 } 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatGetRow_MPIDense" 697 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v) 698 { 699 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 700 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 701 702 PetscFunctionBegin; 703 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 704 lrow = row - rstart; 705 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } 708 709 #undef __FUNCT__ 710 #define __FUNCT__ "MatRestoreRow_MPIDense" 711 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 712 { 713 int ierr; 714 715 PetscFunctionBegin; 716 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 717 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatDiagonalScale_MPIDense" 723 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 724 { 725 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 726 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 727 PetscScalar *l,*r,x,*v; 728 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 729 730 PetscFunctionBegin; 731 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 732 if (ll) { 733 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 734 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 735 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 736 for (i=0; i<m; i++) { 737 x = l[i]; 738 v = mat->v + i; 739 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 740 } 741 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 742 PetscLogFlops(n*m); 743 } 744 if (rr) { 745 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 746 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 747 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 748 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 749 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 750 for (i=0; i<n; i++) { 751 x = r[i]; 752 v = mat->v + i*m; 753 for (j=0; j<m; j++) { (*v++) *= x;} 754 } 755 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 756 PetscLogFlops(n*m); 757 } 758 PetscFunctionReturn(0); 759 } 760 761 #undef __FUNCT__ 762 #define __FUNCT__ "MatNorm_MPIDense" 763 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 764 { 765 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 766 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 767 int ierr,i,j; 768 PetscReal sum = 0.0; 769 PetscScalar *v = mat->v; 770 771 PetscFunctionBegin; 772 if (mdn->size == 1) { 773 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 774 } else { 775 if (type == NORM_FROBENIUS) { 776 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 777 #if defined(PETSC_USE_COMPLEX) 778 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 779 #else 780 sum += (*v)*(*v); v++; 781 #endif 782 } 783 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 784 *nrm = sqrt(*nrm); 785 PetscLogFlops(2*mdn->A->n*mdn->A->m); 786 } else if (type == NORM_1) { 787 PetscReal *tmp,*tmp2; 788 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 789 tmp2 = tmp + A->N; 790 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 791 *nrm = 0.0; 792 v = mat->v; 793 for (j=0; j<mdn->A->n; j++) { 794 for (i=0; i<mdn->A->m; i++) { 795 tmp[j] += PetscAbsScalar(*v); v++; 796 } 797 } 798 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 799 for (j=0; j<A->N; j++) { 800 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 801 } 802 ierr = PetscFree(tmp);CHKERRQ(ierr); 803 PetscLogFlops(A->n*A->m); 804 } else if (type == NORM_INFINITY) { /* max row norm */ 805 PetscReal ntemp; 806 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 807 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 808 } else { 809 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 810 } 811 } 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNCT__ 816 #define __FUNCT__ "MatTranspose_MPIDense" 817 int MatTranspose_MPIDense(Mat A,Mat *matout) 818 { 819 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 820 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 821 Mat B; 822 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 823 int j,i,ierr; 824 PetscScalar *v; 825 826 PetscFunctionBegin; 827 if (!matout && M != N) { 828 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 829 } 830 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 831 832 m = a->A->m; n = a->A->n; v = Aloc->v; 833 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 834 for (j=0; j<n; j++) { 835 for (i=0; i<m; i++) rwork[i] = rstart + i; 836 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 837 v += m; 838 } 839 ierr = PetscFree(rwork);CHKERRQ(ierr); 840 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 841 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 842 if (matout) { 843 *matout = B; 844 } else { 845 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 846 } 847 PetscFunctionReturn(0); 848 } 849 850 #include "petscblaslapack.h" 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatScale_MPIDense" 853 int MatScale_MPIDense(PetscScalar *alpha,Mat inA) 854 { 855 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 856 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 857 int one = 1,nz; 858 859 PetscFunctionBegin; 860 nz = inA->m*inA->N; 861 BLscal_(&nz,alpha,a->v,&one); 862 PetscLogFlops(nz); 863 PetscFunctionReturn(0); 864 } 865 866 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 867 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 868 869 #undef __FUNCT__ 870 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 871 int MatSetUpPreallocation_MPIDense(Mat A) 872 { 873 int ierr; 874 875 PetscFunctionBegin; 876 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 877 PetscFunctionReturn(0); 878 } 879 880 /* -------------------------------------------------------------------*/ 881 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 882 MatGetRow_MPIDense, 883 MatRestoreRow_MPIDense, 884 MatMult_MPIDense, 885 MatMultAdd_MPIDense, 886 MatMultTranspose_MPIDense, 887 MatMultTransposeAdd_MPIDense, 888 0, 889 0, 890 0, 891 0, 892 0, 893 0, 894 0, 895 MatTranspose_MPIDense, 896 MatGetInfo_MPIDense,0, 897 MatGetDiagonal_MPIDense, 898 MatDiagonalScale_MPIDense, 899 MatNorm_MPIDense, 900 MatAssemblyBegin_MPIDense, 901 MatAssemblyEnd_MPIDense, 902 0, 903 MatSetOption_MPIDense, 904 MatZeroEntries_MPIDense, 905 MatZeroRows_MPIDense, 906 0, 907 0, 908 0, 909 0, 910 MatSetUpPreallocation_MPIDense, 911 0, 912 0, 913 MatGetArray_MPIDense, 914 MatRestoreArray_MPIDense, 915 MatDuplicate_MPIDense, 916 0, 917 0, 918 0, 919 0, 920 0, 921 MatGetSubMatrices_MPIDense, 922 0, 923 MatGetValues_MPIDense, 924 0, 925 0, 926 MatScale_MPIDense, 927 0, 928 0, 929 0, 930 MatGetBlockSize_MPIDense, 931 0, 932 0, 933 0, 934 0, 935 0, 936 0, 937 0, 938 0, 939 0, 940 MatGetSubMatrix_MPIDense, 941 MatDestroy_MPIDense, 942 MatView_MPIDense, 943 MatGetPetscMaps_Petsc}; 944 945 EXTERN_C_BEGIN 946 #undef __FUNCT__ 947 #define __FUNCT__ "MatCreate_MPIDense" 948 int MatCreate_MPIDense(Mat mat) 949 { 950 Mat_MPIDense *a; 951 int ierr,i; 952 953 PetscFunctionBegin; 954 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 955 mat->data = (void*)a; 956 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 957 mat->factor = 0; 958 mat->mapping = 0; 959 960 a->factor = 0; 961 mat->insertmode = NOT_SET_VALUES; 962 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 963 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 964 965 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 966 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 967 a->nvec = mat->n; 968 969 /* the information in the maps duplicates the information computed below, eventually 970 we should remove the duplicate information that is not contained in the maps */ 971 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 972 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 973 974 /* build local table of row and column ownerships */ 975 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 976 a->cowners = a->rowners + a->size + 1; 977 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 978 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 979 a->rowners[0] = 0; 980 for (i=2; i<=a->size; i++) { 981 a->rowners[i] += a->rowners[i-1]; 982 } 983 a->rstart = a->rowners[a->rank]; 984 a->rend = a->rowners[a->rank+1]; 985 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 986 a->cowners[0] = 0; 987 for (i=2; i<=a->size; i++) { 988 a->cowners[i] += a->cowners[i-1]; 989 } 990 991 /* build cache for off array entries formed */ 992 a->donotstash = PETSC_FALSE; 993 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 994 995 /* stuff used for matrix vector multiply */ 996 a->lvec = 0; 997 a->Mvctx = 0; 998 a->roworiented = PETSC_TRUE; 999 1000 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1001 "MatGetDiagonalBlock_MPIDense", 1002 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1003 PetscFunctionReturn(0); 1004 } 1005 EXTERN_C_END 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1009 /*@C 1010 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1011 1012 Not collective 1013 1014 Input Parameters: 1015 . A - the matrix 1016 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1017 to control all matrix memory allocation. 1018 1019 Notes: 1020 The dense format is fully compatible with standard Fortran 77 1021 storage by columns. 1022 1023 The data input variable is intended primarily for Fortran programmers 1024 who wish to allocate their own matrix memory space. Most users should 1025 set data=PETSC_NULL. 1026 1027 Level: intermediate 1028 1029 .keywords: matrix,dense, parallel 1030 1031 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1032 @*/ 1033 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1034 { 1035 Mat_MPIDense *a; 1036 int ierr; 1037 PetscTruth flg2; 1038 1039 PetscFunctionBegin; 1040 ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr); 1041 if (!flg2) PetscFunctionReturn(0); 1042 mat->preallocated = PETSC_TRUE; 1043 /* Note: For now, when data is specified above, this assumes the user correctly 1044 allocates the local dense storage space. We should add error checking. */ 1045 1046 a = (Mat_MPIDense*)mat->data; 1047 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 1048 PetscLogObjectParent(mat,a->A); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatCreateMPIDense" 1054 /*@C 1055 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1056 1057 Collective on MPI_Comm 1058 1059 Input Parameters: 1060 + comm - MPI communicator 1061 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1062 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1063 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1064 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1065 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1066 to control all matrix memory allocation. 1067 1068 Output Parameter: 1069 . A - the matrix 1070 1071 Notes: 1072 The dense format is fully compatible with standard Fortran 77 1073 storage by columns. 1074 1075 The data input variable is intended primarily for Fortran programmers 1076 who wish to allocate their own matrix memory space. Most users should 1077 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1078 1079 The user MUST specify either the local or global matrix dimensions 1080 (possibly both). 1081 1082 Level: intermediate 1083 1084 .keywords: matrix,dense, parallel 1085 1086 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1087 @*/ 1088 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A) 1089 { 1090 int ierr,size; 1091 1092 PetscFunctionBegin; 1093 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1094 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1095 if (size > 1) { 1096 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1097 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1098 } else { 1099 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1100 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1101 } 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatDuplicate_MPIDense" 1107 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1108 { 1109 Mat mat; 1110 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1111 int ierr; 1112 1113 PetscFunctionBegin; 1114 *newmat = 0; 1115 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1116 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1117 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1118 mat->data = (void*)a; 1119 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1120 mat->factor = A->factor; 1121 mat->assembled = PETSC_TRUE; 1122 mat->preallocated = PETSC_TRUE; 1123 1124 a->rstart = oldmat->rstart; 1125 a->rend = oldmat->rend; 1126 a->size = oldmat->size; 1127 a->rank = oldmat->rank; 1128 mat->insertmode = NOT_SET_VALUES; 1129 a->nvec = oldmat->nvec; 1130 a->donotstash = oldmat->donotstash; 1131 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1132 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1133 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1134 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1135 1136 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1137 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1138 PetscLogObjectParent(mat,a->A); 1139 *newmat = mat; 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #include "petscsys.h" 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1147 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1148 { 1149 int *rowners,i,size,rank,m,ierr,nz,j; 1150 PetscScalar *array,*vals,*vals_ptr; 1151 MPI_Status status; 1152 1153 PetscFunctionBegin; 1154 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1155 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1156 1157 /* determine ownership of all rows */ 1158 m = M/size + ((M % size) > rank); 1159 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1160 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1161 rowners[0] = 0; 1162 for (i=2; i<=size; i++) { 1163 rowners[i] += rowners[i-1]; 1164 } 1165 1166 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1167 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1168 1169 if (!rank) { 1170 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1171 1172 /* read in my part of the matrix numerical values */ 1173 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1174 1175 /* insert into matrix-by row (this is why cannot directly read into array */ 1176 vals_ptr = vals; 1177 for (i=0; i<m; i++) { 1178 for (j=0; j<N; j++) { 1179 array[i + j*m] = *vals_ptr++; 1180 } 1181 } 1182 1183 /* read in other processors and ship out */ 1184 for (i=1; i<size; i++) { 1185 nz = (rowners[i+1] - rowners[i])*N; 1186 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1187 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1188 } 1189 } else { 1190 /* receive numeric values */ 1191 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1192 1193 /* receive message of values*/ 1194 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1195 1196 /* insert into matrix-by row (this is why cannot directly read into array */ 1197 vals_ptr = vals; 1198 for (i=0; i<m; i++) { 1199 for (j=0; j<N; j++) { 1200 array[i + j*m] = *vals_ptr++; 1201 } 1202 } 1203 } 1204 ierr = PetscFree(rowners);CHKERRQ(ierr); 1205 ierr = PetscFree(vals);CHKERRQ(ierr); 1206 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1207 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 EXTERN_C_BEGIN 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatLoad_MPIDense" 1214 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat) 1215 { 1216 Mat A; 1217 PetscScalar *vals,*svals; 1218 MPI_Comm comm = ((PetscObject)viewer)->comm; 1219 MPI_Status status; 1220 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1221 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1222 int tag = ((PetscObject)viewer)->tag; 1223 int i,nz,ierr,j,rstart,rend,fd; 1224 1225 PetscFunctionBegin; 1226 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1227 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1228 if (!rank) { 1229 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1230 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1231 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1232 } 1233 1234 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1235 M = header[1]; N = header[2]; nz = header[3]; 1236 1237 /* 1238 Handle case where matrix is stored on disk as a dense matrix 1239 */ 1240 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1241 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 /* determine ownership of all rows */ 1246 m = M/size + ((M % size) > rank); 1247 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1248 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1249 rowners[0] = 0; 1250 for (i=2; i<=size; i++) { 1251 rowners[i] += rowners[i-1]; 1252 } 1253 rstart = rowners[rank]; 1254 rend = rowners[rank+1]; 1255 1256 /* distribute row lengths to all processors */ 1257 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1258 offlens = ourlens + (rend-rstart); 1259 if (!rank) { 1260 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1261 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1262 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1263 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1264 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1265 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1266 } else { 1267 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1268 } 1269 1270 if (!rank) { 1271 /* calculate the number of nonzeros on each processor */ 1272 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1273 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1274 for (i=0; i<size; i++) { 1275 for (j=rowners[i]; j< rowners[i+1]; j++) { 1276 procsnz[i] += rowlengths[j]; 1277 } 1278 } 1279 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1280 1281 /* determine max buffer needed and allocate it */ 1282 maxnz = 0; 1283 for (i=0; i<size; i++) { 1284 maxnz = PetscMax(maxnz,procsnz[i]); 1285 } 1286 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1287 1288 /* read in my part of the matrix column indices */ 1289 nz = procsnz[0]; 1290 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1291 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1292 1293 /* read in every one elses and ship off */ 1294 for (i=1; i<size; i++) { 1295 nz = procsnz[i]; 1296 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1297 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1298 } 1299 ierr = PetscFree(cols);CHKERRQ(ierr); 1300 } else { 1301 /* determine buffer space needed for message */ 1302 nz = 0; 1303 for (i=0; i<m; i++) { 1304 nz += ourlens[i]; 1305 } 1306 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1307 1308 /* receive message of column indices*/ 1309 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1310 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1311 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1312 } 1313 1314 /* loop over local rows, determining number of off diagonal entries */ 1315 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1316 jj = 0; 1317 for (i=0; i<m; i++) { 1318 for (j=0; j<ourlens[i]; j++) { 1319 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1320 jj++; 1321 } 1322 } 1323 1324 /* create our matrix */ 1325 for (i=0; i<m; i++) { 1326 ourlens[i] -= offlens[i]; 1327 } 1328 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1329 A = *newmat; 1330 for (i=0; i<m; i++) { 1331 ourlens[i] += offlens[i]; 1332 } 1333 1334 if (!rank) { 1335 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1336 1337 /* read in my part of the matrix numerical values */ 1338 nz = procsnz[0]; 1339 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1340 1341 /* insert into matrix */ 1342 jj = rstart; 1343 smycols = mycols; 1344 svals = vals; 1345 for (i=0; i<m; i++) { 1346 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1347 smycols += ourlens[i]; 1348 svals += ourlens[i]; 1349 jj++; 1350 } 1351 1352 /* read in other processors and ship out */ 1353 for (i=1; i<size; i++) { 1354 nz = procsnz[i]; 1355 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1356 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1357 } 1358 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1359 } else { 1360 /* receive numeric values */ 1361 ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1362 1363 /* receive message of values*/ 1364 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1365 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1366 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1367 1368 /* insert into matrix */ 1369 jj = rstart; 1370 smycols = mycols; 1371 svals = vals; 1372 for (i=0; i<m; i++) { 1373 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1374 smycols += ourlens[i]; 1375 svals += ourlens[i]; 1376 jj++; 1377 } 1378 } 1379 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1380 ierr = PetscFree(vals);CHKERRQ(ierr); 1381 ierr = PetscFree(mycols);CHKERRQ(ierr); 1382 ierr = PetscFree(rowners);CHKERRQ(ierr); 1383 1384 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1385 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1386 PetscFunctionReturn(0); 1387 } 1388 EXTERN_C_END 1389 1390 1391 1392 1393 1394