1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,ierr; 17 PetscScalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 27 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 28 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 29 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 30 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 33 *iscopy = PETSC_TRUE; 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN int MatSetUpMultiply_MPIDense(Mat); 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatSetValues_MPIDense" 42 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv) 43 { 44 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 45 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 46 PetscTruth roworiented = A->roworiented; 47 48 PetscFunctionBegin; 49 for (i=0; i<m; i++) { 50 if (idxm[i] < 0) continue; 51 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 52 if (idxm[i] >= rstart && idxm[i] < rend) { 53 row = idxm[i] - rstart; 54 if (roworiented) { 55 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 56 } else { 57 for (j=0; j<n; j++) { 58 if (idxn[j] < 0) continue; 59 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 60 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 61 } 62 } 63 } else { 64 if (!A->donotstash) { 65 if (roworiented) { 66 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 67 } else { 68 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 69 } 70 } 71 } 72 } 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetValues_MPIDense" 78 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 79 { 80 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 81 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 82 83 PetscFunctionBegin; 84 for (i=0; i<m; i++) { 85 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 86 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 87 if (idxm[i] >= rstart && idxm[i] < rend) { 88 row = idxm[i] - rstart; 89 for (j=0; j<n; j++) { 90 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 91 if (idxn[j] >= mat->N) { 92 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 93 } 94 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 95 } 96 } else { 97 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 98 } 99 } 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatGetArray_MPIDense" 105 int MatGetArray_MPIDense(Mat A,PetscScalar **array) 106 { 107 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 108 int ierr; 109 110 PetscFunctionBegin; 111 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 118 { 119 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 120 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 121 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 122 PetscScalar *av,*bv,*v = lmat->v; 123 Mat newmat; 124 125 PetscFunctionBegin; 126 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 127 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 128 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 129 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 130 131 /* No parallel redistribution currently supported! Should really check each index set 132 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 133 original matrix! */ 134 135 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 137 138 /* Check submatrix call */ 139 if (scall == MAT_REUSE_MATRIX) { 140 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 141 /* Really need to test rows and column sizes! */ 142 newmat = *B; 143 } else { 144 /* Create and fill new matrix */ 145 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 146 } 147 148 /* Now extract the data pointers and do the copy, column at a time */ 149 newmatd = (Mat_MPIDense*)newmat->data; 150 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 151 152 for (i=0; i<ncols; i++) { 153 av = v + nlrows*icol[i]; 154 for (j=0; j<nrows; j++) { 155 *bv++ = av[irow[j] - rstart]; 156 } 157 } 158 159 /* Assemble the matrices so that the correct flags are set */ 160 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 161 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 162 163 /* Free work space */ 164 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 165 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 166 *B = newmat; 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatRestoreArray_MPIDense" 172 int MatRestoreArray_MPIDense(Mat A,PetscScalar **array) 173 { 174 PetscFunctionBegin; 175 PetscFunctionReturn(0); 176 } 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 181 { 182 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 183 MPI_Comm comm = mat->comm; 184 int ierr,nstash,reallocs; 185 InsertMode addv; 186 187 PetscFunctionBegin; 188 /* make sure all processors are either in INSERTMODE or ADDMODE */ 189 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 190 if (addv == (ADD_VALUES|INSERT_VALUES)) { 191 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 192 } 193 mat->insertmode = addv; /* in case this processor had no cache */ 194 195 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 196 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 197 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 198 PetscFunctionReturn(0); 199 } 200 201 #undef __FUNCT__ 202 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 204 { 205 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 206 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 207 PetscScalar *val; 208 InsertMode addv=mat->insertmode; 209 210 PetscFunctionBegin; 211 /* wait on receives */ 212 while (1) { 213 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 214 if (!flg) break; 215 216 for (i=0; i<n;) { 217 /* Now identify the consecutive vals belonging to the same row */ 218 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 219 if (j < n) ncols = j-i; 220 else ncols = n-i; 221 /* Now assemble all these values with a single function call */ 222 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 223 i = j; 224 } 225 } 226 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 227 228 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 229 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 230 231 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 232 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatZeroEntries_MPIDense" 239 int MatZeroEntries_MPIDense(Mat A) 240 { 241 int ierr; 242 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 243 244 PetscFunctionBegin; 245 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatGetBlockSize_MPIDense" 251 int MatGetBlockSize_MPIDense(Mat A,int *bs) 252 { 253 PetscFunctionBegin; 254 *bs = 1; 255 PetscFunctionReturn(0); 256 } 257 258 /* the code does not do the diagonal entries correctly unless the 259 matrix is square and the column and row owerships are identical. 260 This is a BUG. The only way to fix it seems to be to access 261 mdn->A and mdn->B directly and not through the MatZeroRows() 262 routine. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatZeroRows_MPIDense" 266 int MatZeroRows_MPIDense(Mat A,IS is,PetscScalar *diag) 267 { 268 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 269 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 270 int *nprocs,j,idx,nsends; 271 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 272 int *rvalues,tag = A->tag,count,base,slen,n,*source; 273 int *lens,imdex,*lrows,*values; 274 MPI_Comm comm = A->comm; 275 MPI_Request *send_waits,*recv_waits; 276 MPI_Status recv_status,*send_status; 277 IS istmp; 278 PetscTruth found; 279 280 PetscFunctionBegin; 281 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 282 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 283 284 /* first count number of contributors to each processor */ 285 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 286 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 287 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 288 for (i=0; i<N; i++) { 289 idx = rows[i]; 290 found = PETSC_FALSE; 291 for (j=0; j<size; j++) { 292 if (idx >= owners[j] && idx < owners[j+1]) { 293 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 294 } 295 } 296 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 297 } 298 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 299 300 /* inform other processors of number of messages and max length*/ 301 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 302 303 /* post receives: */ 304 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 305 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 306 for (i=0; i<nrecvs; i++) { 307 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 308 } 309 310 /* do sends: 311 1) starts[i] gives the starting index in svalues for stuff going to 312 the ith processor 313 */ 314 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 315 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 316 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 317 starts[0] = 0; 318 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 319 for (i=0; i<N; i++) { 320 svalues[starts[owner[i]]++] = rows[i]; 321 } 322 ISRestoreIndices(is,&rows); 323 324 starts[0] = 0; 325 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 326 count = 0; 327 for (i=0; i<size; i++) { 328 if (nprocs[2*i+1]) { 329 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 330 } 331 } 332 ierr = PetscFree(starts);CHKERRQ(ierr); 333 334 base = owners[rank]; 335 336 /* wait on receives */ 337 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 338 source = lens + nrecvs; 339 count = nrecvs; slen = 0; 340 while (count) { 341 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 342 /* unpack receives into our local space */ 343 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 344 source[imdex] = recv_status.MPI_SOURCE; 345 lens[imdex] = n; 346 slen += n; 347 count--; 348 } 349 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 350 351 /* move the data into the send scatter */ 352 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 353 count = 0; 354 for (i=0; i<nrecvs; i++) { 355 values = rvalues + i*nmax; 356 for (j=0; j<lens[i]; j++) { 357 lrows[count++] = values[j] - base; 358 } 359 } 360 ierr = PetscFree(rvalues);CHKERRQ(ierr); 361 ierr = PetscFree(lens);CHKERRQ(ierr); 362 ierr = PetscFree(owner);CHKERRQ(ierr); 363 ierr = PetscFree(nprocs);CHKERRQ(ierr); 364 365 /* actually zap the local rows */ 366 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 367 PetscLogObjectParent(A,istmp); 368 ierr = PetscFree(lrows);CHKERRQ(ierr); 369 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 370 ierr = ISDestroy(istmp);CHKERRQ(ierr); 371 372 /* wait on sends */ 373 if (nsends) { 374 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 375 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 376 ierr = PetscFree(send_status);CHKERRQ(ierr); 377 } 378 ierr = PetscFree(send_waits);CHKERRQ(ierr); 379 ierr = PetscFree(svalues);CHKERRQ(ierr); 380 381 PetscFunctionReturn(0); 382 } 383 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatMult_MPIDense" 386 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 387 { 388 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 389 int ierr; 390 391 PetscFunctionBegin; 392 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 393 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 394 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 #undef __FUNCT__ 399 #define __FUNCT__ "MatMultAdd_MPIDense" 400 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 401 { 402 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 403 int ierr; 404 405 PetscFunctionBegin; 406 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 407 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 408 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 #undef __FUNCT__ 413 #define __FUNCT__ "MatMultTranspose_MPIDense" 414 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 415 { 416 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 417 int ierr; 418 PetscScalar zero = 0.0; 419 420 PetscFunctionBegin; 421 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 422 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 423 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 424 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 430 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 431 { 432 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 433 int ierr; 434 435 PetscFunctionBegin; 436 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 437 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 438 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 439 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatGetDiagonal_MPIDense" 445 int MatGetDiagonal_MPIDense(Mat A,Vec v) 446 { 447 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 448 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 449 int ierr,len,i,n,m = A->m,radd; 450 PetscScalar *x,zero = 0.0; 451 452 PetscFunctionBegin; 453 ierr = VecSet(&zero,v);CHKERRQ(ierr); 454 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 455 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 456 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 457 len = PetscMin(a->A->m,a->A->n); 458 radd = a->rstart*m; 459 for (i=0; i<len; i++) { 460 x[i] = aloc->v[radd + i*m + i]; 461 } 462 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatDestroy_MPIDense" 468 int MatDestroy_MPIDense(Mat mat) 469 { 470 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 471 int ierr; 472 473 PetscFunctionBegin; 474 475 #if defined(PETSC_USE_LOG) 476 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 477 #endif 478 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 479 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 480 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 481 if (mdn->lvec) VecDestroy(mdn->lvec); 482 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 483 if (mdn->factor) { 484 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 485 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 486 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 487 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 488 } 489 ierr = PetscFree(mdn);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 #undef __FUNCT__ 494 #define __FUNCT__ "MatView_MPIDense_Binary" 495 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 496 { 497 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 498 int ierr; 499 500 PetscFunctionBegin; 501 if (mdn->size == 1) { 502 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 503 } 504 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 510 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 511 { 512 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 513 int ierr,size = mdn->size,rank = mdn->rank; 514 PetscViewerType vtype; 515 PetscTruth isascii,isdraw; 516 PetscViewer sviewer; 517 PetscViewerFormat format; 518 519 PetscFunctionBegin; 520 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 521 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 522 if (isascii) { 523 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 524 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 525 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 526 MatInfo info; 527 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 528 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 529 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 530 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 531 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 532 PetscFunctionReturn(0); 533 } else if (format == PETSC_VIEWER_ASCII_INFO) { 534 PetscFunctionReturn(0); 535 } 536 } else if (isdraw) { 537 PetscDraw draw; 538 PetscTruth isnull; 539 540 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 541 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 542 if (isnull) PetscFunctionReturn(0); 543 } 544 545 if (size == 1) { 546 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 547 } else { 548 /* assemble the entire matrix onto first processor. */ 549 Mat A; 550 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 551 PetscScalar *vals; 552 553 if (!rank) { 554 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 555 } else { 556 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 557 } 558 PetscLogObjectParent(mat,A); 559 560 /* Copy the matrix ... This isn't the most efficient means, 561 but it's quick for now */ 562 row = mdn->rstart; m = mdn->A->m; 563 for (i=0; i<m; i++) { 564 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 565 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 566 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 567 row++; 568 } 569 570 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 571 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 572 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 573 if (!rank) { 574 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 575 } 576 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 577 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 578 ierr = MatDestroy(A);CHKERRQ(ierr); 579 } 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatView_MPIDense" 585 int MatView_MPIDense(Mat mat,PetscViewer viewer) 586 { 587 int ierr; 588 PetscTruth isascii,isbinary,isdraw,issocket; 589 590 PetscFunctionBegin; 591 592 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 593 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 594 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 595 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 596 597 if (isascii || issocket || isdraw) { 598 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 599 } else if (isbinary) { 600 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 601 } else { 602 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatGetInfo_MPIDense" 609 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 610 { 611 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 612 Mat mdn = mat->A; 613 int ierr; 614 PetscReal isend[5],irecv[5]; 615 616 PetscFunctionBegin; 617 info->rows_global = (double)A->M; 618 info->columns_global = (double)A->N; 619 info->rows_local = (double)A->m; 620 info->columns_local = (double)A->N; 621 info->block_size = 1.0; 622 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 623 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 624 isend[3] = info->memory; isend[4] = info->mallocs; 625 if (flag == MAT_LOCAL) { 626 info->nz_used = isend[0]; 627 info->nz_allocated = isend[1]; 628 info->nz_unneeded = isend[2]; 629 info->memory = isend[3]; 630 info->mallocs = isend[4]; 631 } else if (flag == MAT_GLOBAL_MAX) { 632 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 633 info->nz_used = irecv[0]; 634 info->nz_allocated = irecv[1]; 635 info->nz_unneeded = irecv[2]; 636 info->memory = irecv[3]; 637 info->mallocs = irecv[4]; 638 } else if (flag == MAT_GLOBAL_SUM) { 639 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 640 info->nz_used = irecv[0]; 641 info->nz_allocated = irecv[1]; 642 info->nz_unneeded = irecv[2]; 643 info->memory = irecv[3]; 644 info->mallocs = irecv[4]; 645 } 646 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 647 info->fill_ratio_needed = 0; 648 info->factor_mallocs = 0; 649 PetscFunctionReturn(0); 650 } 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatSetOption_MPIDense" 654 int MatSetOption_MPIDense(Mat A,MatOption op) 655 { 656 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 657 int ierr; 658 659 PetscFunctionBegin; 660 switch (op) { 661 case MAT_NO_NEW_NONZERO_LOCATIONS: 662 case MAT_YES_NEW_NONZERO_LOCATIONS: 663 case MAT_NEW_NONZERO_LOCATION_ERR: 664 case MAT_NEW_NONZERO_ALLOCATION_ERR: 665 case MAT_COLUMNS_SORTED: 666 case MAT_COLUMNS_UNSORTED: 667 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 668 break; 669 case MAT_ROW_ORIENTED: 670 a->roworiented = PETSC_TRUE; 671 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 672 break; 673 case MAT_ROWS_SORTED: 674 case MAT_ROWS_UNSORTED: 675 case MAT_YES_NEW_DIAGONALS: 676 case MAT_USE_HASH_TABLE: 677 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 678 break; 679 case MAT_COLUMN_ORIENTED: 680 a->roworiented = PETSC_FALSE; 681 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 682 break; 683 case MAT_IGNORE_OFF_PROC_ENTRIES: 684 a->donotstash = PETSC_TRUE; 685 break; 686 case MAT_NO_NEW_DIAGONALS: 687 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 688 default: 689 SETERRQ(PETSC_ERR_SUP,"unknown option"); 690 } 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatGetRow_MPIDense" 696 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v) 697 { 698 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 699 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 700 701 PetscFunctionBegin; 702 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 703 lrow = row - rstart; 704 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatRestoreRow_MPIDense" 710 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 711 { 712 int ierr; 713 714 PetscFunctionBegin; 715 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 716 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNCT__ 721 #define __FUNCT__ "MatDiagonalScale_MPIDense" 722 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 723 { 724 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 725 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 726 PetscScalar *l,*r,x,*v; 727 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 728 729 PetscFunctionBegin; 730 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 731 if (ll) { 732 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 733 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 734 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 735 for (i=0; i<m; i++) { 736 x = l[i]; 737 v = mat->v + i; 738 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 739 } 740 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 741 PetscLogFlops(n*m); 742 } 743 if (rr) { 744 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 745 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 746 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 747 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 748 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 749 for (i=0; i<n; i++) { 750 x = r[i]; 751 v = mat->v + i*m; 752 for (j=0; j<m; j++) { (*v++) *= x;} 753 } 754 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 755 PetscLogFlops(n*m); 756 } 757 PetscFunctionReturn(0); 758 } 759 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatNorm_MPIDense" 762 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 763 { 764 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 765 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 766 int ierr,i,j; 767 PetscReal sum = 0.0; 768 PetscScalar *v = mat->v; 769 770 PetscFunctionBegin; 771 if (mdn->size == 1) { 772 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 773 } else { 774 if (type == NORM_FROBENIUS) { 775 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 776 #if defined(PETSC_USE_COMPLEX) 777 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 778 #else 779 sum += (*v)*(*v); v++; 780 #endif 781 } 782 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 783 *nrm = sqrt(*nrm); 784 PetscLogFlops(2*mdn->A->n*mdn->A->m); 785 } else if (type == NORM_1) { 786 PetscReal *tmp,*tmp2; 787 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 788 tmp2 = tmp + A->N; 789 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 790 *nrm = 0.0; 791 v = mat->v; 792 for (j=0; j<mdn->A->n; j++) { 793 for (i=0; i<mdn->A->m; i++) { 794 tmp[j] += PetscAbsScalar(*v); v++; 795 } 796 } 797 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 798 for (j=0; j<A->N; j++) { 799 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 800 } 801 ierr = PetscFree(tmp);CHKERRQ(ierr); 802 PetscLogFlops(A->n*A->m); 803 } else if (type == NORM_INFINITY) { /* max row norm */ 804 PetscReal ntemp; 805 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 806 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 807 } else { 808 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 809 } 810 } 811 PetscFunctionReturn(0); 812 } 813 814 #undef __FUNCT__ 815 #define __FUNCT__ "MatTranspose_MPIDense" 816 int MatTranspose_MPIDense(Mat A,Mat *matout) 817 { 818 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 819 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 820 Mat B; 821 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 822 int j,i,ierr; 823 PetscScalar *v; 824 825 PetscFunctionBegin; 826 if (!matout && M != N) { 827 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 828 } 829 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 830 831 m = a->A->m; n = a->A->n; v = Aloc->v; 832 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 833 for (j=0; j<n; j++) { 834 for (i=0; i<m; i++) rwork[i] = rstart + i; 835 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 836 v += m; 837 } 838 ierr = PetscFree(rwork);CHKERRQ(ierr); 839 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 840 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 841 if (matout) { 842 *matout = B; 843 } else { 844 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 845 } 846 PetscFunctionReturn(0); 847 } 848 849 #include "petscblaslapack.h" 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatScale_MPIDense" 852 int MatScale_MPIDense(PetscScalar *alpha,Mat inA) 853 { 854 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 855 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 856 int one = 1,nz; 857 858 PetscFunctionBegin; 859 nz = inA->m*inA->N; 860 BLscal_(&nz,alpha,a->v,&one); 861 PetscLogFlops(nz); 862 PetscFunctionReturn(0); 863 } 864 865 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 866 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 870 int MatSetUpPreallocation_MPIDense(Mat A) 871 { 872 int ierr; 873 874 PetscFunctionBegin; 875 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 /* -------------------------------------------------------------------*/ 880 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 881 MatGetRow_MPIDense, 882 MatRestoreRow_MPIDense, 883 MatMult_MPIDense, 884 MatMultAdd_MPIDense, 885 MatMultTranspose_MPIDense, 886 MatMultTransposeAdd_MPIDense, 887 0, 888 0, 889 0, 890 0, 891 0, 892 0, 893 0, 894 MatTranspose_MPIDense, 895 MatGetInfo_MPIDense,0, 896 MatGetDiagonal_MPIDense, 897 MatDiagonalScale_MPIDense, 898 MatNorm_MPIDense, 899 MatAssemblyBegin_MPIDense, 900 MatAssemblyEnd_MPIDense, 901 0, 902 MatSetOption_MPIDense, 903 MatZeroEntries_MPIDense, 904 MatZeroRows_MPIDense, 905 0, 906 0, 907 0, 908 0, 909 MatSetUpPreallocation_MPIDense, 910 0, 911 0, 912 MatGetArray_MPIDense, 913 MatRestoreArray_MPIDense, 914 MatDuplicate_MPIDense, 915 0, 916 0, 917 0, 918 0, 919 0, 920 MatGetSubMatrices_MPIDense, 921 0, 922 MatGetValues_MPIDense, 923 0, 924 0, 925 MatScale_MPIDense, 926 0, 927 0, 928 0, 929 MatGetBlockSize_MPIDense, 930 0, 931 0, 932 0, 933 0, 934 0, 935 0, 936 0, 937 0, 938 0, 939 MatGetSubMatrix_MPIDense, 940 MatDestroy_MPIDense, 941 MatView_MPIDense, 942 MatGetPetscMaps_Petsc}; 943 944 EXTERN_C_BEGIN 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatCreate_MPIDense" 947 int MatCreate_MPIDense(Mat mat) 948 { 949 Mat_MPIDense *a; 950 int ierr,i; 951 952 PetscFunctionBegin; 953 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 954 mat->data = (void*)a; 955 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 956 mat->factor = 0; 957 mat->mapping = 0; 958 959 a->factor = 0; 960 mat->insertmode = NOT_SET_VALUES; 961 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 962 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 963 964 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 965 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 966 a->nvec = mat->n; 967 968 /* the information in the maps duplicates the information computed below, eventually 969 we should remove the duplicate information that is not contained in the maps */ 970 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 971 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 972 973 /* build local table of row and column ownerships */ 974 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 975 a->cowners = a->rowners + a->size + 1; 976 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 977 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 978 a->rowners[0] = 0; 979 for (i=2; i<=a->size; i++) { 980 a->rowners[i] += a->rowners[i-1]; 981 } 982 a->rstart = a->rowners[a->rank]; 983 a->rend = a->rowners[a->rank+1]; 984 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 985 a->cowners[0] = 0; 986 for (i=2; i<=a->size; i++) { 987 a->cowners[i] += a->cowners[i-1]; 988 } 989 990 /* build cache for off array entries formed */ 991 a->donotstash = PETSC_FALSE; 992 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 993 994 /* stuff used for matrix vector multiply */ 995 a->lvec = 0; 996 a->Mvctx = 0; 997 a->roworiented = PETSC_TRUE; 998 999 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1000 "MatGetDiagonalBlock_MPIDense", 1001 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 EXTERN_C_END 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1008 /*@C 1009 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1010 1011 Not collective 1012 1013 Input Parameters: 1014 . A - the matrix 1015 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1016 to control all matrix memory allocation. 1017 1018 Notes: 1019 The dense format is fully compatible with standard Fortran 77 1020 storage by columns. 1021 1022 The data input variable is intended primarily for Fortran programmers 1023 who wish to allocate their own matrix memory space. Most users should 1024 set data=PETSC_NULL. 1025 1026 Level: intermediate 1027 1028 .keywords: matrix,dense, parallel 1029 1030 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1031 @*/ 1032 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1033 { 1034 Mat_MPIDense *a; 1035 int ierr; 1036 PetscTruth flg2; 1037 1038 PetscFunctionBegin; 1039 ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr); 1040 if (!flg2) PetscFunctionReturn(0); 1041 mat->preallocated = PETSC_TRUE; 1042 /* Note: For now, when data is specified above, this assumes the user correctly 1043 allocates the local dense storage space. We should add error checking. */ 1044 1045 a = (Mat_MPIDense*)mat->data; 1046 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 1047 PetscLogObjectParent(mat,a->A); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatCreateMPIDense" 1053 /*@C 1054 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1055 1056 Collective on MPI_Comm 1057 1058 Input Parameters: 1059 + comm - MPI communicator 1060 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1061 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1062 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1063 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1064 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1065 to control all matrix memory allocation. 1066 1067 Output Parameter: 1068 . A - the matrix 1069 1070 Notes: 1071 The dense format is fully compatible with standard Fortran 77 1072 storage by columns. 1073 1074 The data input variable is intended primarily for Fortran programmers 1075 who wish to allocate their own matrix memory space. Most users should 1076 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1077 1078 The user MUST specify either the local or global matrix dimensions 1079 (possibly both). 1080 1081 Level: intermediate 1082 1083 .keywords: matrix,dense, parallel 1084 1085 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1086 @*/ 1087 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A) 1088 { 1089 int ierr,size; 1090 1091 PetscFunctionBegin; 1092 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1093 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1094 if (size > 1) { 1095 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1096 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1097 } else { 1098 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1099 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1100 } 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatDuplicate_MPIDense" 1106 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1107 { 1108 Mat mat; 1109 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1110 int ierr; 1111 1112 PetscFunctionBegin; 1113 *newmat = 0; 1114 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1115 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1116 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1117 mat->data = (void*)a; 1118 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1119 mat->factor = A->factor; 1120 mat->assembled = PETSC_TRUE; 1121 mat->preallocated = PETSC_TRUE; 1122 1123 a->rstart = oldmat->rstart; 1124 a->rend = oldmat->rend; 1125 a->size = oldmat->size; 1126 a->rank = oldmat->rank; 1127 mat->insertmode = NOT_SET_VALUES; 1128 a->nvec = oldmat->nvec; 1129 a->donotstash = oldmat->donotstash; 1130 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1131 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1132 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1133 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1134 1135 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1136 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1137 PetscLogObjectParent(mat,a->A); 1138 *newmat = mat; 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #include "petscsys.h" 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1146 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1147 { 1148 int *rowners,i,size,rank,m,ierr,nz,j; 1149 PetscScalar *array,*vals,*vals_ptr; 1150 MPI_Status status; 1151 1152 PetscFunctionBegin; 1153 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1154 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1155 1156 /* determine ownership of all rows */ 1157 m = M/size + ((M % size) > rank); 1158 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1159 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1160 rowners[0] = 0; 1161 for (i=2; i<=size; i++) { 1162 rowners[i] += rowners[i-1]; 1163 } 1164 1165 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1166 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1167 1168 if (!rank) { 1169 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1170 1171 /* read in my part of the matrix numerical values */ 1172 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1173 1174 /* insert into matrix-by row (this is why cannot directly read into array */ 1175 vals_ptr = vals; 1176 for (i=0; i<m; i++) { 1177 for (j=0; j<N; j++) { 1178 array[i + j*m] = *vals_ptr++; 1179 } 1180 } 1181 1182 /* read in other processors and ship out */ 1183 for (i=1; i<size; i++) { 1184 nz = (rowners[i+1] - rowners[i])*N; 1185 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1186 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1187 } 1188 } else { 1189 /* receive numeric values */ 1190 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1191 1192 /* receive message of values*/ 1193 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1194 1195 /* insert into matrix-by row (this is why cannot directly read into array */ 1196 vals_ptr = vals; 1197 for (i=0; i<m; i++) { 1198 for (j=0; j<N; j++) { 1199 array[i + j*m] = *vals_ptr++; 1200 } 1201 } 1202 } 1203 ierr = PetscFree(rowners);CHKERRQ(ierr); 1204 ierr = PetscFree(vals);CHKERRQ(ierr); 1205 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1206 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 EXTERN_C_BEGIN 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatLoad_MPIDense" 1213 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat) 1214 { 1215 Mat A; 1216 PetscScalar *vals,*svals; 1217 MPI_Comm comm = ((PetscObject)viewer)->comm; 1218 MPI_Status status; 1219 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1220 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1221 int tag = ((PetscObject)viewer)->tag; 1222 int i,nz,ierr,j,rstart,rend,fd; 1223 1224 PetscFunctionBegin; 1225 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1226 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1227 if (!rank) { 1228 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1229 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1230 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1231 } 1232 1233 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1234 M = header[1]; N = header[2]; nz = header[3]; 1235 1236 /* 1237 Handle case where matrix is stored on disk as a dense matrix 1238 */ 1239 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1240 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 /* determine ownership of all rows */ 1245 m = M/size + ((M % size) > rank); 1246 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1247 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1248 rowners[0] = 0; 1249 for (i=2; i<=size; i++) { 1250 rowners[i] += rowners[i-1]; 1251 } 1252 rstart = rowners[rank]; 1253 rend = rowners[rank+1]; 1254 1255 /* distribute row lengths to all processors */ 1256 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1257 offlens = ourlens + (rend-rstart); 1258 if (!rank) { 1259 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1260 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1261 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1262 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1263 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1264 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1265 } else { 1266 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1267 } 1268 1269 if (!rank) { 1270 /* calculate the number of nonzeros on each processor */ 1271 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1272 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1273 for (i=0; i<size; i++) { 1274 for (j=rowners[i]; j< rowners[i+1]; j++) { 1275 procsnz[i] += rowlengths[j]; 1276 } 1277 } 1278 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1279 1280 /* determine max buffer needed and allocate it */ 1281 maxnz = 0; 1282 for (i=0; i<size; i++) { 1283 maxnz = PetscMax(maxnz,procsnz[i]); 1284 } 1285 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1286 1287 /* read in my part of the matrix column indices */ 1288 nz = procsnz[0]; 1289 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1290 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1291 1292 /* read in every one elses and ship off */ 1293 for (i=1; i<size; i++) { 1294 nz = procsnz[i]; 1295 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1296 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1297 } 1298 ierr = PetscFree(cols);CHKERRQ(ierr); 1299 } else { 1300 /* determine buffer space needed for message */ 1301 nz = 0; 1302 for (i=0; i<m; i++) { 1303 nz += ourlens[i]; 1304 } 1305 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1306 1307 /* receive message of column indices*/ 1308 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1309 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1310 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1311 } 1312 1313 /* loop over local rows, determining number of off diagonal entries */ 1314 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1315 jj = 0; 1316 for (i=0; i<m; i++) { 1317 for (j=0; j<ourlens[i]; j++) { 1318 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1319 jj++; 1320 } 1321 } 1322 1323 /* create our matrix */ 1324 for (i=0; i<m; i++) { 1325 ourlens[i] -= offlens[i]; 1326 } 1327 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1328 A = *newmat; 1329 for (i=0; i<m; i++) { 1330 ourlens[i] += offlens[i]; 1331 } 1332 1333 if (!rank) { 1334 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1335 1336 /* read in my part of the matrix numerical values */ 1337 nz = procsnz[0]; 1338 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1339 1340 /* insert into matrix */ 1341 jj = rstart; 1342 smycols = mycols; 1343 svals = vals; 1344 for (i=0; i<m; i++) { 1345 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1346 smycols += ourlens[i]; 1347 svals += ourlens[i]; 1348 jj++; 1349 } 1350 1351 /* read in other processors and ship out */ 1352 for (i=1; i<size; i++) { 1353 nz = procsnz[i]; 1354 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1355 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1356 } 1357 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1358 } else { 1359 /* receive numeric values */ 1360 ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1361 1362 /* receive message of values*/ 1363 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1364 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1365 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1366 1367 /* insert into matrix */ 1368 jj = rstart; 1369 smycols = mycols; 1370 svals = vals; 1371 for (i=0; i<m; i++) { 1372 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1373 smycols += ourlens[i]; 1374 svals += ourlens[i]; 1375 jj++; 1376 } 1377 } 1378 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1379 ierr = PetscFree(vals);CHKERRQ(ierr); 1380 ierr = PetscFree(mycols);CHKERRQ(ierr); 1381 ierr = PetscFree(rowners);CHKERRQ(ierr); 1382 1383 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1384 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1385 PetscFunctionReturn(0); 1386 } 1387 EXTERN_C_END 1388 1389 1390 1391 1392 1393