1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,ierr; 17 PetscScalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 27 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 28 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 29 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 30 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 33 *iscopy = PETSC_TRUE; 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN int MatSetUpMultiply_MPIDense(Mat); 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatSetValues_MPIDense" 42 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv) 43 { 44 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 45 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 46 PetscTruth roworiented = A->roworiented; 47 48 PetscFunctionBegin; 49 for (i=0; i<m; i++) { 50 if (idxm[i] < 0) continue; 51 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 52 if (idxm[i] >= rstart && idxm[i] < rend) { 53 row = idxm[i] - rstart; 54 if (roworiented) { 55 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 56 } else { 57 for (j=0; j<n; j++) { 58 if (idxn[j] < 0) continue; 59 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 60 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 61 } 62 } 63 } else { 64 if (!A->donotstash) { 65 if (roworiented) { 66 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 67 } else { 68 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 69 } 70 } 71 } 72 } 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetValues_MPIDense" 78 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 79 { 80 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 81 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 82 83 PetscFunctionBegin; 84 for (i=0; i<m; i++) { 85 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 86 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 87 if (idxm[i] >= rstart && idxm[i] < rend) { 88 row = idxm[i] - rstart; 89 for (j=0; j<n; j++) { 90 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 91 if (idxn[j] >= mat->N) { 92 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 93 } 94 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 95 } 96 } else { 97 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 98 } 99 } 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatGetArray_MPIDense" 105 int MatGetArray_MPIDense(Mat A,PetscScalar **array) 106 { 107 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 108 int ierr; 109 110 PetscFunctionBegin; 111 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 118 { 119 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 120 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 121 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 122 PetscScalar *av,*bv,*v = lmat->v; 123 Mat newmat; 124 125 PetscFunctionBegin; 126 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 127 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 128 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 129 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 130 131 /* No parallel redistribution currently supported! Should really check each index set 132 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 133 original matrix! */ 134 135 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 137 138 /* Check submatrix call */ 139 if (scall == MAT_REUSE_MATRIX) { 140 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 141 /* Really need to test rows and column sizes! */ 142 newmat = *B; 143 } else { 144 /* Create and fill new matrix */ 145 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 146 } 147 148 /* Now extract the data pointers and do the copy, column at a time */ 149 newmatd = (Mat_MPIDense*)newmat->data; 150 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 151 152 for (i=0; i<ncols; i++) { 153 av = v + nlrows*icol[i]; 154 for (j=0; j<nrows; j++) { 155 *bv++ = av[irow[j] - rstart]; 156 } 157 } 158 159 /* Assemble the matrices so that the correct flags are set */ 160 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 161 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 162 163 /* Free work space */ 164 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 165 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 166 *B = newmat; 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatRestoreArray_MPIDense" 172 int MatRestoreArray_MPIDense(Mat A,PetscScalar **array) 173 { 174 PetscFunctionBegin; 175 PetscFunctionReturn(0); 176 } 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 181 { 182 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 183 MPI_Comm comm = mat->comm; 184 int ierr,nstash,reallocs; 185 InsertMode addv; 186 187 PetscFunctionBegin; 188 /* make sure all processors are either in INSERTMODE or ADDMODE */ 189 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 190 if (addv == (ADD_VALUES|INSERT_VALUES)) { 191 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 192 } 193 mat->insertmode = addv; /* in case this processor had no cache */ 194 195 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 196 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 197 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 198 PetscFunctionReturn(0); 199 } 200 201 #undef __FUNCT__ 202 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 204 { 205 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 206 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 207 PetscScalar *val; 208 InsertMode addv=mat->insertmode; 209 210 PetscFunctionBegin; 211 /* wait on receives */ 212 while (1) { 213 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 214 if (!flg) break; 215 216 for (i=0; i<n;) { 217 /* Now identify the consecutive vals belonging to the same row */ 218 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 219 if (j < n) ncols = j-i; 220 else ncols = n-i; 221 /* Now assemble all these values with a single function call */ 222 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 223 i = j; 224 } 225 } 226 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 227 228 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 229 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 230 231 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 232 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatZeroEntries_MPIDense" 239 int MatZeroEntries_MPIDense(Mat A) 240 { 241 int ierr; 242 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 243 244 PetscFunctionBegin; 245 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatGetBlockSize_MPIDense" 251 int MatGetBlockSize_MPIDense(Mat A,int *bs) 252 { 253 PetscFunctionBegin; 254 *bs = 1; 255 PetscFunctionReturn(0); 256 } 257 258 /* the code does not do the diagonal entries correctly unless the 259 matrix is square and the column and row owerships are identical. 260 This is a BUG. The only way to fix it seems to be to access 261 mdn->A and mdn->B directly and not through the MatZeroRows() 262 routine. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatZeroRows_MPIDense" 266 int MatZeroRows_MPIDense(Mat A,IS is,PetscScalar *diag) 267 { 268 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 269 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 270 int *nprocs,j,idx,nsends; 271 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 272 int *rvalues,tag = A->tag,count,base,slen,n,*source; 273 int *lens,imdex,*lrows,*values; 274 MPI_Comm comm = A->comm; 275 MPI_Request *send_waits,*recv_waits; 276 MPI_Status recv_status,*send_status; 277 IS istmp; 278 PetscTruth found; 279 280 PetscFunctionBegin; 281 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 282 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 283 284 /* first count number of contributors to each processor */ 285 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 286 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 287 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 288 for (i=0; i<N; i++) { 289 idx = rows[i]; 290 found = PETSC_FALSE; 291 for (j=0; j<size; j++) { 292 if (idx >= owners[j] && idx < owners[j+1]) { 293 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 294 } 295 } 296 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 297 } 298 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 299 300 /* inform other processors of number of messages and max length*/ 301 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 302 303 /* post receives: */ 304 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 305 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 306 for (i=0; i<nrecvs; i++) { 307 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 308 } 309 310 /* do sends: 311 1) starts[i] gives the starting index in svalues for stuff going to 312 the ith processor 313 */ 314 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 315 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 316 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 317 starts[0] = 0; 318 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 319 for (i=0; i<N; i++) { 320 svalues[starts[owner[i]]++] = rows[i]; 321 } 322 ISRestoreIndices(is,&rows); 323 324 starts[0] = 0; 325 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 326 count = 0; 327 for (i=0; i<size; i++) { 328 if (nprocs[2*i+1]) { 329 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 330 } 331 } 332 ierr = PetscFree(starts);CHKERRQ(ierr); 333 334 base = owners[rank]; 335 336 /* wait on receives */ 337 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 338 source = lens + nrecvs; 339 count = nrecvs; slen = 0; 340 while (count) { 341 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 342 /* unpack receives into our local space */ 343 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 344 source[imdex] = recv_status.MPI_SOURCE; 345 lens[imdex] = n; 346 slen += n; 347 count--; 348 } 349 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 350 351 /* move the data into the send scatter */ 352 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 353 count = 0; 354 for (i=0; i<nrecvs; i++) { 355 values = rvalues + i*nmax; 356 for (j=0; j<lens[i]; j++) { 357 lrows[count++] = values[j] - base; 358 } 359 } 360 ierr = PetscFree(rvalues);CHKERRQ(ierr); 361 ierr = PetscFree(lens);CHKERRQ(ierr); 362 ierr = PetscFree(owner);CHKERRQ(ierr); 363 ierr = PetscFree(nprocs);CHKERRQ(ierr); 364 365 /* actually zap the local rows */ 366 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 367 PetscLogObjectParent(A,istmp); 368 ierr = PetscFree(lrows);CHKERRQ(ierr); 369 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 370 ierr = ISDestroy(istmp);CHKERRQ(ierr); 371 372 /* wait on sends */ 373 if (nsends) { 374 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 375 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 376 ierr = PetscFree(send_status);CHKERRQ(ierr); 377 } 378 ierr = PetscFree(send_waits);CHKERRQ(ierr); 379 ierr = PetscFree(svalues);CHKERRQ(ierr); 380 381 PetscFunctionReturn(0); 382 } 383 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatMult_MPIDense" 386 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 387 { 388 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 389 int ierr; 390 391 PetscFunctionBegin; 392 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 393 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 394 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 #undef __FUNCT__ 399 #define __FUNCT__ "MatMultAdd_MPIDense" 400 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 401 { 402 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 403 int ierr; 404 405 PetscFunctionBegin; 406 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 407 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 408 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 #undef __FUNCT__ 413 #define __FUNCT__ "MatMultTranspose_MPIDense" 414 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 415 { 416 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 417 int ierr; 418 PetscScalar zero = 0.0; 419 420 PetscFunctionBegin; 421 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 422 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 423 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 424 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 430 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 431 { 432 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 433 int ierr; 434 435 PetscFunctionBegin; 436 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 437 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 438 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 439 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatGetDiagonal_MPIDense" 445 int MatGetDiagonal_MPIDense(Mat A,Vec v) 446 { 447 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 448 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 449 int ierr,len,i,n,m = A->m,radd; 450 PetscScalar *x,zero = 0.0; 451 452 PetscFunctionBegin; 453 ierr = VecSet(&zero,v);CHKERRQ(ierr); 454 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 455 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 456 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 457 len = PetscMin(a->A->m,a->A->n); 458 radd = a->rstart*m; 459 for (i=0; i<len; i++) { 460 x[i] = aloc->v[radd + i*m + i]; 461 } 462 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatDestroy_MPIDense" 468 int MatDestroy_MPIDense(Mat mat) 469 { 470 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 471 int ierr; 472 473 PetscFunctionBegin; 474 475 #if defined(PETSC_USE_LOG) 476 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 477 #endif 478 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 479 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 480 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 481 if (mdn->lvec) VecDestroy(mdn->lvec); 482 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 483 if (mdn->factor) { 484 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 485 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 486 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 487 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 488 } 489 ierr = PetscFree(mdn);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 #undef __FUNCT__ 494 #define __FUNCT__ "MatView_MPIDense_Binary" 495 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 496 { 497 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 498 int ierr; 499 500 PetscFunctionBegin; 501 if (mdn->size == 1) { 502 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 503 } 504 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 510 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 511 { 512 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 513 int ierr,size = mdn->size,rank = mdn->rank; 514 PetscViewerType vtype; 515 PetscTruth isascii,isdraw; 516 PetscViewer sviewer; 517 PetscViewerFormat format; 518 519 PetscFunctionBegin; 520 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 521 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 522 if (isascii) { 523 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 524 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 525 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 526 MatInfo info; 527 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 528 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 529 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 530 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 531 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 532 PetscFunctionReturn(0); 533 } else if (format == PETSC_VIEWER_ASCII_INFO) { 534 PetscFunctionReturn(0); 535 } 536 } else if (isdraw) { 537 PetscDraw draw; 538 PetscTruth isnull; 539 540 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 541 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 542 if (isnull) PetscFunctionReturn(0); 543 } 544 545 if (size == 1) { 546 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 547 } else { 548 /* assemble the entire matrix onto first processor. */ 549 Mat A; 550 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 551 PetscScalar *vals; 552 553 if (!rank) { 554 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 555 } else { 556 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 557 } 558 PetscLogObjectParent(mat,A); 559 560 /* Copy the matrix ... This isn't the most efficient means, 561 but it's quick for now */ 562 row = mdn->rstart; m = mdn->A->m; 563 for (i=0; i<m; i++) { 564 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 565 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 566 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 567 row++; 568 } 569 570 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 571 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 572 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 573 if (!rank) { 574 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 575 } 576 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 577 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 578 ierr = MatDestroy(A);CHKERRQ(ierr); 579 } 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatView_MPIDense" 585 int MatView_MPIDense(Mat mat,PetscViewer viewer) 586 { 587 int ierr; 588 PetscTruth isascii,isbinary,isdraw,issocket; 589 590 PetscFunctionBegin; 591 592 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 593 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 594 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 595 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 596 597 if (isascii || issocket || isdraw) { 598 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 599 } else if (isbinary) { 600 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 601 } else { 602 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatGetInfo_MPIDense" 609 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 610 { 611 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 612 Mat mdn = mat->A; 613 int ierr; 614 PetscReal isend[5],irecv[5]; 615 616 PetscFunctionBegin; 617 info->rows_global = (double)A->M; 618 info->columns_global = (double)A->N; 619 info->rows_local = (double)A->m; 620 info->columns_local = (double)A->N; 621 info->block_size = 1.0; 622 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 623 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 624 isend[3] = info->memory; isend[4] = info->mallocs; 625 if (flag == MAT_LOCAL) { 626 info->nz_used = isend[0]; 627 info->nz_allocated = isend[1]; 628 info->nz_unneeded = isend[2]; 629 info->memory = isend[3]; 630 info->mallocs = isend[4]; 631 } else if (flag == MAT_GLOBAL_MAX) { 632 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 633 info->nz_used = irecv[0]; 634 info->nz_allocated = irecv[1]; 635 info->nz_unneeded = irecv[2]; 636 info->memory = irecv[3]; 637 info->mallocs = irecv[4]; 638 } else if (flag == MAT_GLOBAL_SUM) { 639 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 640 info->nz_used = irecv[0]; 641 info->nz_allocated = irecv[1]; 642 info->nz_unneeded = irecv[2]; 643 info->memory = irecv[3]; 644 info->mallocs = irecv[4]; 645 } 646 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 647 info->fill_ratio_needed = 0; 648 info->factor_mallocs = 0; 649 PetscFunctionReturn(0); 650 } 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatSetOption_MPIDense" 654 int MatSetOption_MPIDense(Mat A,MatOption op) 655 { 656 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 657 int ierr; 658 659 PetscFunctionBegin; 660 switch (op) { 661 case MAT_NO_NEW_NONZERO_LOCATIONS: 662 case MAT_YES_NEW_NONZERO_LOCATIONS: 663 case MAT_NEW_NONZERO_LOCATION_ERR: 664 case MAT_NEW_NONZERO_ALLOCATION_ERR: 665 case MAT_COLUMNS_SORTED: 666 case MAT_COLUMNS_UNSORTED: 667 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 668 break; 669 case MAT_ROW_ORIENTED: 670 a->roworiented = PETSC_TRUE; 671 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 672 break; 673 case MAT_ROWS_SORTED: 674 case MAT_ROWS_UNSORTED: 675 case MAT_YES_NEW_DIAGONALS: 676 case MAT_USE_HASH_TABLE: 677 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 678 break; 679 case MAT_COLUMN_ORIENTED: 680 a->roworiented = PETSC_FALSE; 681 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 682 break; 683 case MAT_IGNORE_OFF_PROC_ENTRIES: 684 a->donotstash = PETSC_TRUE; 685 break; 686 case MAT_NO_NEW_DIAGONALS: 687 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 688 default: 689 SETERRQ(PETSC_ERR_SUP,"unknown option"); 690 } 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatGetRow_MPIDense" 696 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v) 697 { 698 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 699 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 700 701 PetscFunctionBegin; 702 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 703 lrow = row - rstart; 704 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatRestoreRow_MPIDense" 710 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 711 { 712 int ierr; 713 714 PetscFunctionBegin; 715 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 716 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNCT__ 721 #define __FUNCT__ "MatDiagonalScale_MPIDense" 722 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 723 { 724 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 725 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 726 PetscScalar *l,*r,x,*v; 727 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 728 729 PetscFunctionBegin; 730 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 731 if (ll) { 732 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 733 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 734 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 735 for (i=0; i<m; i++) { 736 x = l[i]; 737 v = mat->v + i; 738 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 739 } 740 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 741 PetscLogFlops(n*m); 742 } 743 if (rr) { 744 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 745 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 746 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 747 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 748 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 749 for (i=0; i<n; i++) { 750 x = r[i]; 751 v = mat->v + i*m; 752 for (j=0; j<m; j++) { (*v++) *= x;} 753 } 754 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 755 PetscLogFlops(n*m); 756 } 757 PetscFunctionReturn(0); 758 } 759 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatNorm_MPIDense" 762 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 763 { 764 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 765 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 766 int ierr,i,j; 767 PetscReal sum = 0.0; 768 PetscScalar *v = mat->v; 769 770 PetscFunctionBegin; 771 if (mdn->size == 1) { 772 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 773 } else { 774 if (type == NORM_FROBENIUS) { 775 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 776 #if defined(PETSC_USE_COMPLEX) 777 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 778 #else 779 sum += (*v)*(*v); v++; 780 #endif 781 } 782 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 783 *nrm = sqrt(*nrm); 784 PetscLogFlops(2*mdn->A->n*mdn->A->m); 785 } else if (type == NORM_1) { 786 PetscReal *tmp,*tmp2; 787 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 788 tmp2 = tmp + A->N; 789 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 790 *nrm = 0.0; 791 v = mat->v; 792 for (j=0; j<mdn->A->n; j++) { 793 for (i=0; i<mdn->A->m; i++) { 794 tmp[j] += PetscAbsScalar(*v); v++; 795 } 796 } 797 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 798 for (j=0; j<A->N; j++) { 799 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 800 } 801 ierr = PetscFree(tmp);CHKERRQ(ierr); 802 PetscLogFlops(A->n*A->m); 803 } else if (type == NORM_INFINITY) { /* max row norm */ 804 PetscReal ntemp; 805 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 806 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 807 } else { 808 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 809 } 810 } 811 PetscFunctionReturn(0); 812 } 813 814 #undef __FUNCT__ 815 #define __FUNCT__ "MatTranspose_MPIDense" 816 int MatTranspose_MPIDense(Mat A,Mat *matout) 817 { 818 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 819 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 820 Mat B; 821 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 822 int j,i,ierr; 823 PetscScalar *v; 824 825 PetscFunctionBegin; 826 if (!matout && M != N) { 827 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 828 } 829 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 830 831 m = a->A->m; n = a->A->n; v = Aloc->v; 832 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 833 for (j=0; j<n; j++) { 834 for (i=0; i<m; i++) rwork[i] = rstart + i; 835 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 836 v += m; 837 } 838 ierr = PetscFree(rwork);CHKERRQ(ierr); 839 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 840 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 841 if (matout) { 842 *matout = B; 843 } else { 844 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 845 } 846 PetscFunctionReturn(0); 847 } 848 849 #include "petscblaslapack.h" 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatScale_MPIDense" 852 int MatScale_MPIDense(PetscScalar *alpha,Mat inA) 853 { 854 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 855 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 856 int one = 1,nz; 857 858 PetscFunctionBegin; 859 nz = inA->m*inA->N; 860 BLscal_(&nz,alpha,a->v,&one); 861 PetscLogFlops(nz); 862 PetscFunctionReturn(0); 863 } 864 865 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 866 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 870 int MatSetUpPreallocation_MPIDense(Mat A) 871 { 872 int ierr; 873 874 PetscFunctionBegin; 875 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 /* -------------------------------------------------------------------*/ 880 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 881 MatGetRow_MPIDense, 882 MatRestoreRow_MPIDense, 883 MatMult_MPIDense, 884 MatMultAdd_MPIDense, 885 MatMultTranspose_MPIDense, 886 MatMultTransposeAdd_MPIDense, 887 0, 888 0, 889 0, 890 0, 891 0, 892 0, 893 0, 894 MatTranspose_MPIDense, 895 MatGetInfo_MPIDense,0, 896 MatGetDiagonal_MPIDense, 897 MatDiagonalScale_MPIDense, 898 MatNorm_MPIDense, 899 MatAssemblyBegin_MPIDense, 900 MatAssemblyEnd_MPIDense, 901 0, 902 MatSetOption_MPIDense, 903 MatZeroEntries_MPIDense, 904 MatZeroRows_MPIDense, 905 0, 906 0, 907 0, 908 0, 909 MatSetUpPreallocation_MPIDense, 910 0, 911 0, 912 MatGetArray_MPIDense, 913 MatRestoreArray_MPIDense, 914 MatDuplicate_MPIDense, 915 0, 916 0, 917 0, 918 0, 919 0, 920 MatGetSubMatrices_MPIDense, 921 0, 922 MatGetValues_MPIDense, 923 0, 924 0, 925 MatScale_MPIDense, 926 0, 927 0, 928 0, 929 MatGetBlockSize_MPIDense, 930 0, 931 0, 932 0, 933 0, 934 0, 935 0, 936 0, 937 0, 938 0, 939 MatGetSubMatrix_MPIDense, 940 MatDestroy_MPIDense, 941 MatView_MPIDense, 942 MatGetPetscMaps_Petsc}; 943 944 EXTERN_C_BEGIN 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 947 int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 948 { 949 Mat_MPIDense *a; 950 int ierr; 951 PetscTruth flg2; 952 953 PetscFunctionBegin; 954 mat->preallocated = PETSC_TRUE; 955 /* Note: For now, when data is specified above, this assumes the user correctly 956 allocates the local dense storage space. We should add error checking. */ 957 958 a = (Mat_MPIDense*)mat->data; 959 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 960 PetscLogObjectParent(mat,a->A); 961 PetscFunctionReturn(0); 962 } 963 EXTERN_C_END 964 965 EXTERN_C_BEGIN 966 #undef __FUNCT__ 967 #define __FUNCT__ "MatCreate_MPIDense" 968 int MatCreate_MPIDense(Mat mat) 969 { 970 Mat_MPIDense *a; 971 int ierr,i; 972 973 PetscFunctionBegin; 974 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 975 mat->data = (void*)a; 976 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 977 mat->factor = 0; 978 mat->mapping = 0; 979 980 a->factor = 0; 981 mat->insertmode = NOT_SET_VALUES; 982 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 983 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 984 985 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 986 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 987 a->nvec = mat->n; 988 989 /* the information in the maps duplicates the information computed below, eventually 990 we should remove the duplicate information that is not contained in the maps */ 991 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 992 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 993 994 /* build local table of row and column ownerships */ 995 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 996 a->cowners = a->rowners + a->size + 1; 997 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 998 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 999 a->rowners[0] = 0; 1000 for (i=2; i<=a->size; i++) { 1001 a->rowners[i] += a->rowners[i-1]; 1002 } 1003 a->rstart = a->rowners[a->rank]; 1004 a->rend = a->rowners[a->rank+1]; 1005 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1006 a->cowners[0] = 0; 1007 for (i=2; i<=a->size; i++) { 1008 a->cowners[i] += a->cowners[i-1]; 1009 } 1010 1011 /* build cache for off array entries formed */ 1012 a->donotstash = PETSC_FALSE; 1013 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1014 1015 /* stuff used for matrix vector multiply */ 1016 a->lvec = 0; 1017 a->Mvctx = 0; 1018 a->roworiented = PETSC_TRUE; 1019 1020 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1021 "MatGetDiagonalBlock_MPIDense", 1022 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1023 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1024 "MatMPIDenseSetPreallocation_MPIDense", 1025 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1026 PetscFunctionReturn(0); 1027 } 1028 EXTERN_C_END 1029 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1032 /*@C 1033 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1034 1035 Not collective 1036 1037 Input Parameters: 1038 . A - the matrix 1039 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1040 to control all matrix memory allocation. 1041 1042 Notes: 1043 The dense format is fully compatible with standard Fortran 77 1044 storage by columns. 1045 1046 The data input variable is intended primarily for Fortran programmers 1047 who wish to allocate their own matrix memory space. Most users should 1048 set data=PETSC_NULL. 1049 1050 Level: intermediate 1051 1052 .keywords: matrix,dense, parallel 1053 1054 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1055 @*/ 1056 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1057 { 1058 int ierr,(*f)(Mat,PetscScalar *); 1059 1060 PetscFunctionBegin; 1061 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIAdjSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1062 if (f) { 1063 ierr = (*f)(mat,data);CHKERRQ(ierr); 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatCreateMPIDense" 1070 /*@C 1071 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1072 1073 Collective on MPI_Comm 1074 1075 Input Parameters: 1076 + comm - MPI communicator 1077 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1078 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1079 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1080 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1081 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1082 to control all matrix memory allocation. 1083 1084 Output Parameter: 1085 . A - the matrix 1086 1087 Notes: 1088 The dense format is fully compatible with standard Fortran 77 1089 storage by columns. 1090 1091 The data input variable is intended primarily for Fortran programmers 1092 who wish to allocate their own matrix memory space. Most users should 1093 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1094 1095 The user MUST specify either the local or global matrix dimensions 1096 (possibly both). 1097 1098 Level: intermediate 1099 1100 .keywords: matrix,dense, parallel 1101 1102 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1103 @*/ 1104 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A) 1105 { 1106 int ierr,size; 1107 1108 PetscFunctionBegin; 1109 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1110 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1111 if (size > 1) { 1112 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1113 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1114 } else { 1115 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1116 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1117 } 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatDuplicate_MPIDense" 1123 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1124 { 1125 Mat mat; 1126 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1127 int ierr; 1128 1129 PetscFunctionBegin; 1130 *newmat = 0; 1131 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1132 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1133 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1134 mat->data = (void*)a; 1135 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1136 mat->factor = A->factor; 1137 mat->assembled = PETSC_TRUE; 1138 mat->preallocated = PETSC_TRUE; 1139 1140 a->rstart = oldmat->rstart; 1141 a->rend = oldmat->rend; 1142 a->size = oldmat->size; 1143 a->rank = oldmat->rank; 1144 mat->insertmode = NOT_SET_VALUES; 1145 a->nvec = oldmat->nvec; 1146 a->donotstash = oldmat->donotstash; 1147 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1148 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1149 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1150 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1151 1152 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1153 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1154 PetscLogObjectParent(mat,a->A); 1155 *newmat = mat; 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #include "petscsys.h" 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1163 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1164 { 1165 int *rowners,i,size,rank,m,ierr,nz,j; 1166 PetscScalar *array,*vals,*vals_ptr; 1167 MPI_Status status; 1168 1169 PetscFunctionBegin; 1170 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1171 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1172 1173 /* determine ownership of all rows */ 1174 m = M/size + ((M % size) > rank); 1175 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1176 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1177 rowners[0] = 0; 1178 for (i=2; i<=size; i++) { 1179 rowners[i] += rowners[i-1]; 1180 } 1181 1182 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1183 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1184 1185 if (!rank) { 1186 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1187 1188 /* read in my part of the matrix numerical values */ 1189 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1190 1191 /* insert into matrix-by row (this is why cannot directly read into array */ 1192 vals_ptr = vals; 1193 for (i=0; i<m; i++) { 1194 for (j=0; j<N; j++) { 1195 array[i + j*m] = *vals_ptr++; 1196 } 1197 } 1198 1199 /* read in other processors and ship out */ 1200 for (i=1; i<size; i++) { 1201 nz = (rowners[i+1] - rowners[i])*N; 1202 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1203 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1204 } 1205 } else { 1206 /* receive numeric values */ 1207 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1208 1209 /* receive message of values*/ 1210 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1211 1212 /* insert into matrix-by row (this is why cannot directly read into array */ 1213 vals_ptr = vals; 1214 for (i=0; i<m; i++) { 1215 for (j=0; j<N; j++) { 1216 array[i + j*m] = *vals_ptr++; 1217 } 1218 } 1219 } 1220 ierr = PetscFree(rowners);CHKERRQ(ierr); 1221 ierr = PetscFree(vals);CHKERRQ(ierr); 1222 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1223 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 EXTERN_C_BEGIN 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "MatLoad_MPIDense" 1230 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat) 1231 { 1232 Mat A; 1233 PetscScalar *vals,*svals; 1234 MPI_Comm comm = ((PetscObject)viewer)->comm; 1235 MPI_Status status; 1236 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1237 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1238 int tag = ((PetscObject)viewer)->tag; 1239 int i,nz,ierr,j,rstart,rend,fd; 1240 1241 PetscFunctionBegin; 1242 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1243 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1244 if (!rank) { 1245 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1246 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1247 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1248 } 1249 1250 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1251 M = header[1]; N = header[2]; nz = header[3]; 1252 1253 /* 1254 Handle case where matrix is stored on disk as a dense matrix 1255 */ 1256 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1257 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 /* determine ownership of all rows */ 1262 m = M/size + ((M % size) > rank); 1263 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1264 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1265 rowners[0] = 0; 1266 for (i=2; i<=size; i++) { 1267 rowners[i] += rowners[i-1]; 1268 } 1269 rstart = rowners[rank]; 1270 rend = rowners[rank+1]; 1271 1272 /* distribute row lengths to all processors */ 1273 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1274 offlens = ourlens + (rend-rstart); 1275 if (!rank) { 1276 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1277 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1278 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1279 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1280 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1281 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1282 } else { 1283 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1284 } 1285 1286 if (!rank) { 1287 /* calculate the number of nonzeros on each processor */ 1288 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1289 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1290 for (i=0; i<size; i++) { 1291 for (j=rowners[i]; j< rowners[i+1]; j++) { 1292 procsnz[i] += rowlengths[j]; 1293 } 1294 } 1295 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1296 1297 /* determine max buffer needed and allocate it */ 1298 maxnz = 0; 1299 for (i=0; i<size; i++) { 1300 maxnz = PetscMax(maxnz,procsnz[i]); 1301 } 1302 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1303 1304 /* read in my part of the matrix column indices */ 1305 nz = procsnz[0]; 1306 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1307 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1308 1309 /* read in every one elses and ship off */ 1310 for (i=1; i<size; i++) { 1311 nz = procsnz[i]; 1312 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1313 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1314 } 1315 ierr = PetscFree(cols);CHKERRQ(ierr); 1316 } else { 1317 /* determine buffer space needed for message */ 1318 nz = 0; 1319 for (i=0; i<m; i++) { 1320 nz += ourlens[i]; 1321 } 1322 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1323 1324 /* receive message of column indices*/ 1325 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1326 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1327 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1328 } 1329 1330 /* loop over local rows, determining number of off diagonal entries */ 1331 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1332 jj = 0; 1333 for (i=0; i<m; i++) { 1334 for (j=0; j<ourlens[i]; j++) { 1335 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1336 jj++; 1337 } 1338 } 1339 1340 /* create our matrix */ 1341 for (i=0; i<m; i++) { 1342 ourlens[i] -= offlens[i]; 1343 } 1344 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1345 A = *newmat; 1346 for (i=0; i<m; i++) { 1347 ourlens[i] += offlens[i]; 1348 } 1349 1350 if (!rank) { 1351 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1352 1353 /* read in my part of the matrix numerical values */ 1354 nz = procsnz[0]; 1355 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1356 1357 /* insert into matrix */ 1358 jj = rstart; 1359 smycols = mycols; 1360 svals = vals; 1361 for (i=0; i<m; i++) { 1362 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1363 smycols += ourlens[i]; 1364 svals += ourlens[i]; 1365 jj++; 1366 } 1367 1368 /* read in other processors and ship out */ 1369 for (i=1; i<size; i++) { 1370 nz = procsnz[i]; 1371 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1372 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1373 } 1374 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1375 } else { 1376 /* receive numeric values */ 1377 ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1378 1379 /* receive message of values*/ 1380 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1381 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1382 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1383 1384 /* insert into matrix */ 1385 jj = rstart; 1386 smycols = mycols; 1387 svals = vals; 1388 for (i=0; i<m; i++) { 1389 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1390 smycols += ourlens[i]; 1391 svals += ourlens[i]; 1392 jj++; 1393 } 1394 } 1395 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1396 ierr = PetscFree(vals);CHKERRQ(ierr); 1397 ierr = PetscFree(mycols);CHKERRQ(ierr); 1398 ierr = PetscFree(rowners);CHKERRQ(ierr); 1399 1400 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1401 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 EXTERN_C_END 1405 1406 1407 1408 1409 1410