1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpidense.c,v 1.109 1999/03/16 21:10:18 balay Exp balay $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 extern int MatSetUpMultiply_MPIDense(Mat); 13 14 #undef __FUNC__ 15 #define __FUNC__ "MatSetValues_MPIDense" 16 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 17 { 18 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 19 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 20 int roworiented = A->roworiented; 21 22 PetscFunctionBegin; 23 for ( i=0; i<m; i++ ) { 24 if (idxm[i] < 0) continue; 25 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 26 if (idxm[i] >= rstart && idxm[i] < rend) { 27 row = idxm[i] - rstart; 28 if (roworiented) { 29 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 30 } else { 31 for ( j=0; j<n; j++ ) { 32 if (idxn[j] < 0) continue; 33 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 34 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 35 } 36 } 37 } else { 38 if (roworiented) { 39 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n); CHKERRQ(ierr); 40 } else { 41 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 42 } 43 } 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNC__ 49 #define __FUNC__ "MatGetValues_MPIDense" 50 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 51 { 52 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 53 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 54 55 PetscFunctionBegin; 56 for ( i=0; i<m; i++ ) { 57 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 58 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 59 if (idxm[i] >= rstart && idxm[i] < rend) { 60 row = idxm[i] - rstart; 61 for ( j=0; j<n; j++ ) { 62 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 63 if (idxn[j] >= mdn->N) { 64 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 65 } 66 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 67 } 68 } else { 69 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 70 } 71 } 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNC__ 76 #define __FUNC__ "MatGetArray_MPIDense" 77 int MatGetArray_MPIDense(Mat A,Scalar **array) 78 { 79 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 80 int ierr; 81 82 PetscFunctionBegin; 83 ierr = MatGetArray(a->A,array); CHKERRQ(ierr); 84 PetscFunctionReturn(0); 85 } 86 87 #undef __FUNC__ 88 #define __FUNC__ "MatRestoreArray_MPIDense" 89 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 90 { 91 PetscFunctionBegin; 92 PetscFunctionReturn(0); 93 } 94 95 #undef __FUNC__ 96 #define __FUNC__ "MatAssemblyBegin_MPIDense" 97 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 98 { 99 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 100 MPI_Comm comm = mat->comm; 101 int ierr,nstash,reallocs; 102 InsertMode addv; 103 104 PetscFunctionBegin; 105 /* make sure all processors are either in INSERTMODE or ADDMODE */ 106 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 107 if (addv == (ADD_VALUES|INSERT_VALUES)) { 108 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 109 } 110 mat->insertmode = addv; /* in case this processor had no cache */ 111 112 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners); CHKERRQ(ierr); 113 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr); 114 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n", 115 nstash,reallocs); 116 PetscFunctionReturn(0); 117 } 118 119 #undef __FUNC__ 120 #define __FUNC__ "MatAssemblyEnd_MPIDense" 121 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 122 { 123 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 124 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 125 Scalar *val; 126 InsertMode addv=mat->insertmode; 127 128 PetscFunctionBegin; 129 /* wait on receives */ 130 while (1) { 131 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 132 if (!flg) break; 133 134 for ( i=0; i<n; ) { 135 /* Now identify the consecutive vals belonging to the same row */ 136 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 137 if (j < n) ncols = j-i; 138 else ncols = n-i; 139 /* Now assemble all these values with a single function call */ 140 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr); 141 i = j; 142 } 143 } 144 ierr = MatStashScatterEnd_Private(&mat->stash); CHKERRQ(ierr); 145 146 ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 147 ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 148 149 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 150 ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNC__ 156 #define __FUNC__ "MatZeroEntries_MPIDense" 157 int MatZeroEntries_MPIDense(Mat A) 158 { 159 int ierr; 160 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 161 162 PetscFunctionBegin; 163 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 #undef __FUNC__ 168 #define __FUNC__ "MatGetBlockSize_MPIDense" 169 int MatGetBlockSize_MPIDense(Mat A,int *bs) 170 { 171 PetscFunctionBegin; 172 *bs = 1; 173 PetscFunctionReturn(0); 174 } 175 176 /* the code does not do the diagonal entries correctly unless the 177 matrix is square and the column and row owerships are identical. 178 This is a BUG. The only way to fix it seems to be to access 179 mdn->A and mdn->B directly and not through the MatZeroRows() 180 routine. 181 */ 182 #undef __FUNC__ 183 #define __FUNC__ "MatZeroRows_MPIDense" 184 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 185 { 186 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 187 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 188 int *procs,*nprocs,j,found,idx,nsends,*work; 189 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 190 int *rvalues,tag = A->tag,count,base,slen,n,*source; 191 int *lens,imdex,*lrows,*values; 192 MPI_Comm comm = A->comm; 193 MPI_Request *send_waits,*recv_waits; 194 MPI_Status recv_status,*send_status; 195 IS istmp; 196 197 PetscFunctionBegin; 198 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 199 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 200 201 /* first count number of contributors to each processor */ 202 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 203 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 204 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 205 for ( i=0; i<N; i++ ) { 206 idx = rows[i]; 207 found = 0; 208 for ( j=0; j<size; j++ ) { 209 if (idx >= owners[j] && idx < owners[j+1]) { 210 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 211 } 212 } 213 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 214 } 215 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 216 217 /* inform other processors of number of messages and max length*/ 218 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 219 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 220 nrecvs = work[rank]; 221 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 222 nmax = work[rank]; 223 PetscFree(work); 224 225 /* post receives: */ 226 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 227 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 228 for ( i=0; i<nrecvs; i++ ) { 229 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 230 } 231 232 /* do sends: 233 1) starts[i] gives the starting index in svalues for stuff going to 234 the ith processor 235 */ 236 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 237 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 238 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 239 starts[0] = 0; 240 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 241 for ( i=0; i<N; i++ ) { 242 svalues[starts[owner[i]]++] = rows[i]; 243 } 244 ISRestoreIndices(is,&rows); 245 246 starts[0] = 0; 247 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 248 count = 0; 249 for ( i=0; i<size; i++ ) { 250 if (procs[i]) { 251 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 252 } 253 } 254 PetscFree(starts); 255 256 base = owners[rank]; 257 258 /* wait on receives */ 259 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 260 source = lens + nrecvs; 261 count = nrecvs; slen = 0; 262 while (count) { 263 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 264 /* unpack receives into our local space */ 265 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 266 source[imdex] = recv_status.MPI_SOURCE; 267 lens[imdex] = n; 268 slen += n; 269 count--; 270 } 271 PetscFree(recv_waits); 272 273 /* move the data into the send scatter */ 274 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 275 count = 0; 276 for ( i=0; i<nrecvs; i++ ) { 277 values = rvalues + i*nmax; 278 for ( j=0; j<lens[i]; j++ ) { 279 lrows[count++] = values[j] - base; 280 } 281 } 282 PetscFree(rvalues); PetscFree(lens); 283 PetscFree(owner); PetscFree(nprocs); 284 285 /* actually zap the local rows */ 286 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 287 PLogObjectParent(A,istmp); 288 PetscFree(lrows); 289 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 290 ierr = ISDestroy(istmp); CHKERRQ(ierr); 291 292 /* wait on sends */ 293 if (nsends) { 294 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 295 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 296 PetscFree(send_status); 297 } 298 PetscFree(send_waits); PetscFree(svalues); 299 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNC__ 304 #define __FUNC__ "MatMult_MPIDense" 305 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 306 { 307 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 308 int ierr; 309 310 PetscFunctionBegin; 311 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 312 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 313 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNC__ 318 #define __FUNC__ "MatMultAdd_MPIDense" 319 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 320 { 321 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 322 int ierr; 323 324 PetscFunctionBegin; 325 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 326 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 327 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 #undef __FUNC__ 332 #define __FUNC__ "MatMultTrans_MPIDense" 333 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 334 { 335 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 336 int ierr; 337 Scalar zero = 0.0; 338 339 PetscFunctionBegin; 340 ierr = VecSet(&zero,yy); CHKERRQ(ierr); 341 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 342 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 343 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNC__ 348 #define __FUNC__ "MatMultTransAdd_MPIDense" 349 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 350 { 351 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 352 int ierr; 353 354 PetscFunctionBegin; 355 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 356 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 357 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 358 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 #undef __FUNC__ 363 #define __FUNC__ "MatGetDiagonal_MPIDense" 364 int MatGetDiagonal_MPIDense(Mat A,Vec v) 365 { 366 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 367 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 368 int ierr, len, i, n, m = a->m, radd; 369 Scalar *x, zero = 0.0; 370 371 PetscFunctionBegin; 372 VecSet(&zero,v); 373 ierr = VecGetArray(v,&x); CHKERRQ(ierr); 374 ierr = VecGetSize(v,&n); CHKERRQ(ierr); 375 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 376 len = PetscMin(aloc->m,aloc->n); 377 radd = a->rstart*m; 378 for ( i=0; i<len; i++ ) { 379 x[i] = aloc->v[radd + i*m + i]; 380 } 381 PetscFunctionReturn(0); 382 } 383 384 #undef __FUNC__ 385 #define __FUNC__ "MatDestroy_MPIDense" 386 int MatDestroy_MPIDense(Mat mat) 387 { 388 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 389 int ierr; 390 391 PetscFunctionBegin; 392 if (--mat->refct > 0) PetscFunctionReturn(0); 393 394 if (mat->mapping) { 395 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 396 } 397 if (mat->bmapping) { 398 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 399 } 400 #if defined(USE_PETSC_LOG) 401 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 402 #endif 403 ierr = MatStashDestroy_Private(&mat->stash); CHKERRQ(ierr); 404 PetscFree(mdn->rowners); 405 ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 406 if (mdn->lvec) VecDestroy(mdn->lvec); 407 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 408 if (mdn->factor) { 409 if (mdn->factor->temp) PetscFree(mdn->factor->temp); 410 if (mdn->factor->tag) PetscFree(mdn->factor->tag); 411 if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 412 PetscFree(mdn->factor); 413 } 414 PetscFree(mdn); 415 if (mat->rmap) { 416 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 417 } 418 if (mat->cmap) { 419 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 420 } 421 PLogObjectDestroy(mat); 422 PetscHeaderDestroy(mat); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNC__ 427 #define __FUNC__ "MatView_MPIDense_Binary" 428 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 429 { 430 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 431 int ierr; 432 433 PetscFunctionBegin; 434 if (mdn->size == 1) { 435 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 436 } 437 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 438 PetscFunctionReturn(0); 439 } 440 441 #undef __FUNC__ 442 #define __FUNC__ "MatView_MPIDense_ASCII" 443 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 444 { 445 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 446 int ierr, format, size = mdn->size, rank = mdn->rank; 447 FILE *fd; 448 ViewerType vtype; 449 450 PetscFunctionBegin; 451 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 452 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 453 ierr = ViewerGetFormat(viewer,&format); 454 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 455 MatInfo info; 456 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 457 PetscSequentialPhaseBegin(mat->comm,1); 458 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 459 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 460 fflush(fd); 461 PetscSequentialPhaseEnd(mat->comm,1); 462 ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 465 PetscFunctionReturn(0); 466 } 467 468 if (size == 1) { 469 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 470 } else { 471 /* assemble the entire matrix onto first processor. */ 472 Mat A; 473 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 474 Scalar *vals; 475 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 476 477 if (!rank) { 478 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr); 479 } else { 480 ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr); 481 } 482 PLogObjectParent(mat,A); 483 484 /* Copy the matrix ... This isn't the most efficient means, 485 but it's quick for now */ 486 row = mdn->rstart; m = Amdn->m; 487 for ( i=0; i<m; i++ ) { 488 ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 489 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 490 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 491 row++; 492 } 493 494 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 495 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 496 if (!rank) { 497 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 498 } 499 ierr = MatDestroy(A); CHKERRQ(ierr); 500 } 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNC__ 505 #define __FUNC__ "MatView_MPIDense" 506 int MatView_MPIDense(Mat mat,Viewer viewer) 507 { 508 int ierr; 509 ViewerType vtype; 510 511 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 512 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 513 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 514 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 515 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 516 } else { 517 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 518 } 519 PetscFunctionReturn(0); 520 } 521 522 #undef __FUNC__ 523 #define __FUNC__ "MatGetInfo_MPIDense" 524 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 525 { 526 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 527 Mat mdn = mat->A; 528 int ierr; 529 double isend[5], irecv[5]; 530 531 PetscFunctionBegin; 532 info->rows_global = (double)mat->M; 533 info->columns_global = (double)mat->N; 534 info->rows_local = (double)mat->m; 535 info->columns_local = (double)mat->N; 536 info->block_size = 1.0; 537 ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr); 538 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 539 isend[3] = info->memory; isend[4] = info->mallocs; 540 if (flag == MAT_LOCAL) { 541 info->nz_used = isend[0]; 542 info->nz_allocated = isend[1]; 543 info->nz_unneeded = isend[2]; 544 info->memory = isend[3]; 545 info->mallocs = isend[4]; 546 } else if (flag == MAT_GLOBAL_MAX) { 547 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 548 info->nz_used = irecv[0]; 549 info->nz_allocated = irecv[1]; 550 info->nz_unneeded = irecv[2]; 551 info->memory = irecv[3]; 552 info->mallocs = irecv[4]; 553 } else if (flag == MAT_GLOBAL_SUM) { 554 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 555 info->nz_used = irecv[0]; 556 info->nz_allocated = irecv[1]; 557 info->nz_unneeded = irecv[2]; 558 info->memory = irecv[3]; 559 info->mallocs = irecv[4]; 560 } 561 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 562 info->fill_ratio_needed = 0; 563 info->factor_mallocs = 0; 564 PetscFunctionReturn(0); 565 } 566 567 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 568 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 569 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 570 extern int MatSolve_MPIDense(Mat,Vec,Vec); 571 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 572 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 573 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 574 575 #undef __FUNC__ 576 #define __FUNC__ "MatSetOption_MPIDense" 577 int MatSetOption_MPIDense(Mat A,MatOption op) 578 { 579 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 580 581 PetscFunctionBegin; 582 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 583 op == MAT_YES_NEW_NONZERO_LOCATIONS || 584 op == MAT_NEW_NONZERO_LOCATION_ERR || 585 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 586 op == MAT_COLUMNS_SORTED || 587 op == MAT_COLUMNS_UNSORTED) { 588 MatSetOption(a->A,op); 589 } else if (op == MAT_ROW_ORIENTED) { 590 a->roworiented = 1; 591 MatSetOption(a->A,op); 592 } else if (op == MAT_ROWS_SORTED || 593 op == MAT_ROWS_UNSORTED || 594 op == MAT_SYMMETRIC || 595 op == MAT_STRUCTURALLY_SYMMETRIC || 596 op == MAT_YES_NEW_DIAGONALS || 597 op == MAT_USE_HASH_TABLE) { 598 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 599 } else if (op == MAT_COLUMN_ORIENTED) { 600 a->roworiented = 0; MatSetOption(a->A,op); 601 } else if (op == MAT_NO_NEW_DIAGONALS) { 602 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 603 } else { 604 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 605 } 606 PetscFunctionReturn(0); 607 } 608 609 #undef __FUNC__ 610 #define __FUNC__ "MatGetSize_MPIDense" 611 int MatGetSize_MPIDense(Mat A,int *m,int *n) 612 { 613 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 614 615 PetscFunctionBegin; 616 *m = mat->M; *n = mat->N; 617 PetscFunctionReturn(0); 618 } 619 620 #undef __FUNC__ 621 #define __FUNC__ "MatGetLocalSize_MPIDense" 622 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 623 { 624 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 625 626 PetscFunctionBegin; 627 *m = mat->m; *n = mat->N; 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNC__ 632 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 633 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 634 { 635 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 636 637 PetscFunctionBegin; 638 *m = mat->rstart; *n = mat->rend; 639 PetscFunctionReturn(0); 640 } 641 642 #undef __FUNC__ 643 #define __FUNC__ "MatGetRow_MPIDense" 644 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 645 { 646 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 647 int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 648 649 PetscFunctionBegin; 650 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 651 lrow = row - rstart; 652 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNC__ 657 #define __FUNC__ "MatRestoreRow_MPIDense" 658 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 659 { 660 PetscFunctionBegin; 661 if (idx) PetscFree(*idx); 662 if (v) PetscFree(*v); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNC__ 667 #define __FUNC__ "MatNorm_MPIDense" 668 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 669 { 670 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 671 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 672 int ierr, i, j; 673 double sum = 0.0; 674 Scalar *v = mat->v; 675 676 PetscFunctionBegin; 677 if (mdn->size == 1) { 678 ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 679 } else { 680 if (type == NORM_FROBENIUS) { 681 for (i=0; i<mat->n*mat->m; i++ ) { 682 #if defined(USE_PETSC_COMPLEX) 683 sum += PetscReal(PetscConj(*v)*(*v)); v++; 684 #else 685 sum += (*v)*(*v); v++; 686 #endif 687 } 688 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 689 *norm = sqrt(*norm); 690 PLogFlops(2*mat->n*mat->m); 691 } else if (type == NORM_1) { 692 double *tmp, *tmp2; 693 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 694 tmp2 = tmp + mdn->N; 695 PetscMemzero(tmp,2*mdn->N*sizeof(double)); 696 *norm = 0.0; 697 v = mat->v; 698 for ( j=0; j<mat->n; j++ ) { 699 for ( i=0; i<mat->m; i++ ) { 700 tmp[j] += PetscAbsScalar(*v); v++; 701 } 702 } 703 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 704 for ( j=0; j<mdn->N; j++ ) { 705 if (tmp2[j] > *norm) *norm = tmp2[j]; 706 } 707 PetscFree(tmp); 708 PLogFlops(mat->n*mat->m); 709 } else if (type == NORM_INFINITY) { /* max row norm */ 710 double ntemp; 711 ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 712 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 713 } else { 714 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 715 } 716 } 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNC__ 721 #define __FUNC__ "MatTranspose_MPIDense" 722 int MatTranspose_MPIDense(Mat A,Mat *matout) 723 { 724 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 725 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 726 Mat B; 727 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 728 int j, i, ierr; 729 Scalar *v; 730 731 PetscFunctionBegin; 732 if (matout == PETSC_NULL && M != N) { 733 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 734 } 735 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 736 737 m = Aloc->m; n = Aloc->n; v = Aloc->v; 738 rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 739 for ( j=0; j<n; j++ ) { 740 for (i=0; i<m; i++) rwork[i] = rstart + i; 741 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 742 v += m; 743 } 744 PetscFree(rwork); 745 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 746 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 747 if (matout != PETSC_NULL) { 748 *matout = B; 749 } else { 750 PetscOps *Abops; 751 MatOps Aops; 752 753 /* This isn't really an in-place transpose, but free data struct from a */ 754 PetscFree(a->rowners); 755 ierr = MatDestroy(a->A); CHKERRQ(ierr); 756 if (a->lvec) VecDestroy(a->lvec); 757 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 758 PetscFree(a); 759 760 /* 761 This is horrible, horrible code. We need to keep the 762 A pointers for the bops and ops but copy everything 763 else from C. 764 */ 765 Abops = A->bops; 766 Aops = A->ops; 767 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 768 A->bops = Abops; 769 A->ops = Aops; 770 771 PetscHeaderDestroy(B); 772 } 773 PetscFunctionReturn(0); 774 } 775 776 #include "pinclude/blaslapack.h" 777 #undef __FUNC__ 778 #define __FUNC__ "MatScale_MPIDense" 779 int MatScale_MPIDense(Scalar *alpha,Mat inA) 780 { 781 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 782 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 783 int one = 1, nz; 784 785 PetscFunctionBegin; 786 nz = a->m*a->n; 787 BLscal_( &nz, alpha, a->v, &one ); 788 PLogFlops(nz); 789 PetscFunctionReturn(0); 790 } 791 792 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 793 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 794 795 /* -------------------------------------------------------------------*/ 796 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 797 MatGetRow_MPIDense, 798 MatRestoreRow_MPIDense, 799 MatMult_MPIDense, 800 MatMultAdd_MPIDense, 801 MatMultTrans_MPIDense, 802 MatMultTransAdd_MPIDense, 803 0, 804 0, 805 0, 806 0, 807 0, 808 0, 809 0, 810 MatTranspose_MPIDense, 811 MatGetInfo_MPIDense,0, 812 MatGetDiagonal_MPIDense, 813 0, 814 MatNorm_MPIDense, 815 MatAssemblyBegin_MPIDense, 816 MatAssemblyEnd_MPIDense, 817 0, 818 MatSetOption_MPIDense, 819 MatZeroEntries_MPIDense, 820 MatZeroRows_MPIDense, 821 0, 822 0, 823 0, 824 0, 825 MatGetSize_MPIDense, 826 MatGetLocalSize_MPIDense, 827 MatGetOwnershipRange_MPIDense, 828 0, 829 0, 830 MatGetArray_MPIDense, 831 MatRestoreArray_MPIDense, 832 MatDuplicate_MPIDense, 833 0, 834 0, 835 0, 836 0, 837 0, 838 MatGetSubMatrices_MPIDense, 839 0, 840 MatGetValues_MPIDense, 841 0, 842 0, 843 MatScale_MPIDense, 844 0, 845 0, 846 0, 847 MatGetBlockSize_MPIDense, 848 0, 849 0, 850 0, 851 0, 852 0, 853 0, 854 0, 855 0, 856 0, 857 0, 858 0, 859 0, 860 MatGetMaps_Petsc}; 861 862 #undef __FUNC__ 863 #define __FUNC__ "MatCreateMPIDense" 864 /*@C 865 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 866 867 Collective on MPI_Comm 868 869 Input Parameters: 870 + comm - MPI communicator 871 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 872 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 873 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 874 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 875 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 876 to control all matrix memory allocation. 877 878 Output Parameter: 879 . A - the matrix 880 881 Notes: 882 The dense format is fully compatible with standard Fortran 77 883 storage by columns. 884 885 The data input variable is intended primarily for Fortran programmers 886 who wish to allocate their own matrix memory space. Most users should 887 set data=PETSC_NULL. 888 889 The user MUST specify either the local or global matrix dimensions 890 (possibly both). 891 892 Currently, the only parallel dense matrix decomposition is by rows, 893 so that n=N and each submatrix owns all of the global columns. 894 895 Level: intermediate 896 897 .keywords: matrix, dense, parallel 898 899 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 900 @*/ 901 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 902 { 903 Mat mat; 904 Mat_MPIDense *a; 905 int ierr, i,flg; 906 907 PetscFunctionBegin; 908 /* Note: For now, when data is specified above, this assumes the user correctly 909 allocates the local dense storage space. We should add error checking. */ 910 911 *A = 0; 912 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 913 PLogObjectCreate(mat); 914 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 915 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 916 mat->ops->destroy = MatDestroy_MPIDense; 917 mat->ops->view = MatView_MPIDense; 918 mat->factor = 0; 919 mat->mapping = 0; 920 921 a->factor = 0; 922 mat->insertmode = NOT_SET_VALUES; 923 MPI_Comm_rank(comm,&a->rank); 924 MPI_Comm_size(comm,&a->size); 925 926 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 927 928 /* 929 The computation of n is wrong below, n should represent the number of local 930 rows in the right (column vector) 931 */ 932 933 /* each row stores all columns */ 934 if (N == PETSC_DECIDE) N = n; 935 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 936 /* if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */ 937 a->N = mat->N = N; 938 a->M = mat->M = M; 939 a->m = mat->m = m; 940 a->n = mat->n = n; 941 942 /* the information in the maps duplicates the information computed below, eventually 943 we should remove the duplicate information that is not contained in the maps */ 944 ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 945 ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr); 946 947 /* build local table of row and column ownerships */ 948 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 949 a->cowners = a->rowners + a->size + 1; 950 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 951 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 952 a->rowners[0] = 0; 953 for ( i=2; i<=a->size; i++ ) { 954 a->rowners[i] += a->rowners[i-1]; 955 } 956 a->rstart = a->rowners[a->rank]; 957 a->rend = a->rowners[a->rank+1]; 958 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 959 a->cowners[0] = 0; 960 for ( i=2; i<=a->size; i++ ) { 961 a->cowners[i] += a->cowners[i-1]; 962 } 963 964 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 965 PLogObjectParent(mat,a->A); 966 967 /* build cache for off array entries formed */ 968 ierr = MatStashCreate_Private(comm,1,&mat->stash); CHKERRQ(ierr); 969 970 /* stuff used for matrix vector multiply */ 971 a->lvec = 0; 972 a->Mvctx = 0; 973 a->roworiented = 1; 974 975 *A = mat; 976 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 977 if (flg) { 978 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 979 } 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNC__ 984 #define __FUNC__ "MatDuplicate_MPIDense" 985 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 986 { 987 Mat mat; 988 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 989 int ierr; 990 FactorCtx *factor; 991 992 PetscFunctionBegin; 993 *newmat = 0; 994 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 995 PLogObjectCreate(mat); 996 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 997 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 998 mat->ops->destroy = MatDestroy_MPIDense; 999 mat->ops->view = MatView_MPIDense; 1000 mat->factor = A->factor; 1001 mat->assembled = PETSC_TRUE; 1002 1003 a->m = mat->m = oldmat->m; 1004 a->n = mat->n = oldmat->n; 1005 a->M = mat->M = oldmat->M; 1006 a->N = mat->N = oldmat->N; 1007 if (oldmat->factor) { 1008 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 1009 /* copy factor contents ... add this code! */ 1010 } else a->factor = 0; 1011 1012 a->rstart = oldmat->rstart; 1013 a->rend = oldmat->rend; 1014 a->size = oldmat->size; 1015 a->rank = oldmat->rank; 1016 mat->insertmode = NOT_SET_VALUES; 1017 1018 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1019 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1020 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1021 ierr = MatStashCreate_Private(A->comm,1,&mat->stash); CHKERRQ(ierr); 1022 1023 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1024 PLogObjectParent(mat,a->lvec); 1025 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1026 PLogObjectParent(mat,a->Mvctx); 1027 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 1028 PLogObjectParent(mat,a->A); 1029 *newmat = mat; 1030 PetscFunctionReturn(0); 1031 } 1032 1033 #include "sys.h" 1034 1035 #undef __FUNC__ 1036 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1037 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1038 { 1039 int *rowners, i,size,rank,m,ierr,nz,j; 1040 Scalar *array,*vals,*vals_ptr; 1041 MPI_Status status; 1042 1043 PetscFunctionBegin; 1044 MPI_Comm_rank(comm,&rank); 1045 MPI_Comm_size(comm,&size); 1046 1047 /* determine ownership of all rows */ 1048 m = M/size + ((M % size) > rank); 1049 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1050 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1051 rowners[0] = 0; 1052 for ( i=2; i<=size; i++ ) { 1053 rowners[i] += rowners[i-1]; 1054 } 1055 1056 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1057 ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr); 1058 1059 if (!rank) { 1060 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1061 1062 /* read in my part of the matrix numerical values */ 1063 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR); CHKERRQ(ierr); 1064 1065 /* insert into matrix-by row (this is why cannot directly read into array */ 1066 vals_ptr = vals; 1067 for ( i=0; i<m; i++ ) { 1068 for ( j=0; j<N; j++ ) { 1069 array[i + j*m] = *vals_ptr++; 1070 } 1071 } 1072 1073 /* read in other processors and ship out */ 1074 for ( i=1; i<size; i++ ) { 1075 nz = (rowners[i+1] - rowners[i])*N; 1076 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1077 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1078 } 1079 } else { 1080 /* receive numeric values */ 1081 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1082 1083 /* receive message of values*/ 1084 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1085 1086 /* insert into matrix-by row (this is why cannot directly read into array */ 1087 vals_ptr = vals; 1088 for ( i=0; i<m; i++ ) { 1089 for ( j=0; j<N; j++ ) { 1090 array[i + j*m] = *vals_ptr++; 1091 } 1092 } 1093 } 1094 PetscFree(rowners); 1095 PetscFree(vals); 1096 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1097 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 1102 #undef __FUNC__ 1103 #define __FUNC__ "MatLoad_MPIDense" 1104 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1105 { 1106 Mat A; 1107 Scalar *vals,*svals; 1108 MPI_Comm comm = ((PetscObject)viewer)->comm; 1109 MPI_Status status; 1110 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1111 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1112 int tag = ((PetscObject)viewer)->tag; 1113 int i, nz, ierr, j,rstart, rend, fd; 1114 1115 PetscFunctionBegin; 1116 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1117 if (!rank) { 1118 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1119 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1120 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1121 } 1122 1123 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1124 M = header[1]; N = header[2]; nz = header[3]; 1125 1126 /* 1127 Handle case where matrix is stored on disk as a dense matrix 1128 */ 1129 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1130 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 /* determine ownership of all rows */ 1135 m = M/size + ((M % size) > rank); 1136 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1137 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1138 rowners[0] = 0; 1139 for ( i=2; i<=size; i++ ) { 1140 rowners[i] += rowners[i-1]; 1141 } 1142 rstart = rowners[rank]; 1143 rend = rowners[rank+1]; 1144 1145 /* distribute row lengths to all processors */ 1146 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1147 offlens = ourlens + (rend-rstart); 1148 if (!rank) { 1149 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1150 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1151 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1152 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1153 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1154 PetscFree(sndcounts); 1155 } else { 1156 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1157 } 1158 1159 if (!rank) { 1160 /* calculate the number of nonzeros on each processor */ 1161 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1162 PetscMemzero(procsnz,size*sizeof(int)); 1163 for ( i=0; i<size; i++ ) { 1164 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1165 procsnz[i] += rowlengths[j]; 1166 } 1167 } 1168 PetscFree(rowlengths); 1169 1170 /* determine max buffer needed and allocate it */ 1171 maxnz = 0; 1172 for ( i=0; i<size; i++ ) { 1173 maxnz = PetscMax(maxnz,procsnz[i]); 1174 } 1175 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1176 1177 /* read in my part of the matrix column indices */ 1178 nz = procsnz[0]; 1179 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1180 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1181 1182 /* read in every one elses and ship off */ 1183 for ( i=1; i<size; i++ ) { 1184 nz = procsnz[i]; 1185 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1186 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1187 } 1188 PetscFree(cols); 1189 } else { 1190 /* determine buffer space needed for message */ 1191 nz = 0; 1192 for ( i=0; i<m; i++ ) { 1193 nz += ourlens[i]; 1194 } 1195 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1196 1197 /* receive message of column indices*/ 1198 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1199 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1200 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1201 } 1202 1203 /* loop over local rows, determining number of off diagonal entries */ 1204 PetscMemzero(offlens,m*sizeof(int)); 1205 jj = 0; 1206 for ( i=0; i<m; i++ ) { 1207 for ( j=0; j<ourlens[i]; j++ ) { 1208 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1209 jj++; 1210 } 1211 } 1212 1213 /* create our matrix */ 1214 for ( i=0; i<m; i++ ) { 1215 ourlens[i] -= offlens[i]; 1216 } 1217 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1218 A = *newmat; 1219 for ( i=0; i<m; i++ ) { 1220 ourlens[i] += offlens[i]; 1221 } 1222 1223 if (!rank) { 1224 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1225 1226 /* read in my part of the matrix numerical values */ 1227 nz = procsnz[0]; 1228 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1229 1230 /* insert into matrix */ 1231 jj = rstart; 1232 smycols = mycols; 1233 svals = vals; 1234 for ( i=0; i<m; i++ ) { 1235 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1236 smycols += ourlens[i]; 1237 svals += ourlens[i]; 1238 jj++; 1239 } 1240 1241 /* read in other processors and ship out */ 1242 for ( i=1; i<size; i++ ) { 1243 nz = procsnz[i]; 1244 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1245 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1246 } 1247 PetscFree(procsnz); 1248 } else { 1249 /* receive numeric values */ 1250 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1251 1252 /* receive message of values*/ 1253 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1254 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1255 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1256 1257 /* insert into matrix */ 1258 jj = rstart; 1259 smycols = mycols; 1260 svals = vals; 1261 for ( i=0; i<m; i++ ) { 1262 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1263 smycols += ourlens[i]; 1264 svals += ourlens[i]; 1265 jj++; 1266 } 1267 } 1268 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1269 1270 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1271 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 1276 1277 1278 1279