1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpidense.c,v 1.112 1999/04/19 22:11:55 bsmith Exp balay $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 extern int MatSetUpMultiply_MPIDense(Mat); 13 14 #undef __FUNC__ 15 #define __FUNC__ "MatSetValues_MPIDense" 16 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 17 { 18 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 19 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 20 int roworiented = A->roworiented; 21 22 PetscFunctionBegin; 23 for ( i=0; i<m; i++ ) { 24 if (idxm[i] < 0) continue; 25 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 26 if (idxm[i] >= rstart && idxm[i] < rend) { 27 row = idxm[i] - rstart; 28 if (roworiented) { 29 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 30 } else { 31 for ( j=0; j<n; j++ ) { 32 if (idxn[j] < 0) continue; 33 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 34 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 35 } 36 } 37 } else { 38 if ( !A->donotstash) { 39 if (roworiented) { 40 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 41 } else { 42 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 43 } 44 } 45 } 46 } 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNC__ 51 #define __FUNC__ "MatGetValues_MPIDense" 52 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 53 { 54 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 55 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 56 57 PetscFunctionBegin; 58 for ( i=0; i<m; i++ ) { 59 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 60 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 61 if (idxm[i] >= rstart && idxm[i] < rend) { 62 row = idxm[i] - rstart; 63 for ( j=0; j<n; j++ ) { 64 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 65 if (idxn[j] >= mdn->N) { 66 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 67 } 68 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 69 } 70 } else { 71 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 72 } 73 } 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNC__ 78 #define __FUNC__ "MatGetArray_MPIDense" 79 int MatGetArray_MPIDense(Mat A,Scalar **array) 80 { 81 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 82 int ierr; 83 84 PetscFunctionBegin; 85 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 86 PetscFunctionReturn(0); 87 } 88 89 #undef __FUNC__ 90 #define __FUNC__ "MatRestoreArray_MPIDense" 91 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 92 { 93 PetscFunctionBegin; 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNC__ 98 #define __FUNC__ "MatAssemblyBegin_MPIDense" 99 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 100 { 101 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 102 MPI_Comm comm = mat->comm; 103 int ierr,nstash,reallocs; 104 InsertMode addv; 105 106 PetscFunctionBegin; 107 /* make sure all processors are either in INSERTMODE or ADDMODE */ 108 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 109 if (addv == (ADD_VALUES|INSERT_VALUES)) { 110 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 111 } 112 mat->insertmode = addv; /* in case this processor had no cache */ 113 114 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 115 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 116 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n", 117 nstash,reallocs); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNC__ 122 #define __FUNC__ "MatAssemblyEnd_MPIDense" 123 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 124 { 125 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 126 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 127 Scalar *val; 128 InsertMode addv=mat->insertmode; 129 130 PetscFunctionBegin; 131 /* wait on receives */ 132 while (1) { 133 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 134 if (!flg) break; 135 136 for ( i=0; i<n; ) { 137 /* Now identify the consecutive vals belonging to the same row */ 138 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 139 if (j < n) ncols = j-i; 140 else ncols = n-i; 141 /* Now assemble all these values with a single function call */ 142 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 143 i = j; 144 } 145 } 146 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 147 148 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 149 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 150 151 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 152 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNC__ 158 #define __FUNC__ "MatZeroEntries_MPIDense" 159 int MatZeroEntries_MPIDense(Mat A) 160 { 161 int ierr; 162 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 163 164 PetscFunctionBegin; 165 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNC__ 170 #define __FUNC__ "MatGetBlockSize_MPIDense" 171 int MatGetBlockSize_MPIDense(Mat A,int *bs) 172 { 173 PetscFunctionBegin; 174 *bs = 1; 175 PetscFunctionReturn(0); 176 } 177 178 /* the code does not do the diagonal entries correctly unless the 179 matrix is square and the column and row owerships are identical. 180 This is a BUG. The only way to fix it seems to be to access 181 mdn->A and mdn->B directly and not through the MatZeroRows() 182 routine. 183 */ 184 #undef __FUNC__ 185 #define __FUNC__ "MatZeroRows_MPIDense" 186 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 187 { 188 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 189 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 190 int *procs,*nprocs,j,found,idx,nsends,*work; 191 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 192 int *rvalues,tag = A->tag,count,base,slen,n,*source; 193 int *lens,imdex,*lrows,*values; 194 MPI_Comm comm = A->comm; 195 MPI_Request *send_waits,*recv_waits; 196 MPI_Status recv_status,*send_status; 197 IS istmp; 198 199 PetscFunctionBegin; 200 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 201 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 202 203 /* first count number of contributors to each processor */ 204 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 205 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 206 procs = nprocs + size; 207 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 208 for ( i=0; i<N; i++ ) { 209 idx = rows[i]; 210 found = 0; 211 for ( j=0; j<size; j++ ) { 212 if (idx >= owners[j] && idx < owners[j+1]) { 213 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 214 } 215 } 216 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 217 } 218 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 219 220 /* inform other processors of number of messages and max length*/ 221 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 222 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 223 nrecvs = work[rank]; 224 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 225 nmax = work[rank]; 226 PetscFree(work); 227 228 /* post receives: */ 229 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 230 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 231 for ( i=0; i<nrecvs; i++ ) { 232 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 233 } 234 235 /* do sends: 236 1) starts[i] gives the starting index in svalues for stuff going to 237 the ith processor 238 */ 239 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 240 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 241 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 242 starts[0] = 0; 243 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 244 for ( i=0; i<N; i++ ) { 245 svalues[starts[owner[i]]++] = rows[i]; 246 } 247 ISRestoreIndices(is,&rows); 248 249 starts[0] = 0; 250 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 251 count = 0; 252 for ( i=0; i<size; i++ ) { 253 if (procs[i]) { 254 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 255 } 256 } 257 PetscFree(starts); 258 259 base = owners[rank]; 260 261 /* wait on receives */ 262 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 263 source = lens + nrecvs; 264 count = nrecvs; slen = 0; 265 while (count) { 266 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 267 /* unpack receives into our local space */ 268 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 269 source[imdex] = recv_status.MPI_SOURCE; 270 lens[imdex] = n; 271 slen += n; 272 count--; 273 } 274 PetscFree(recv_waits); 275 276 /* move the data into the send scatter */ 277 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 278 count = 0; 279 for ( i=0; i<nrecvs; i++ ) { 280 values = rvalues + i*nmax; 281 for ( j=0; j<lens[i]; j++ ) { 282 lrows[count++] = values[j] - base; 283 } 284 } 285 PetscFree(rvalues); PetscFree(lens); 286 PetscFree(owner); PetscFree(nprocs); 287 288 /* actually zap the local rows */ 289 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 290 PLogObjectParent(A,istmp); 291 PetscFree(lrows); 292 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 293 ierr = ISDestroy(istmp);CHKERRQ(ierr); 294 295 /* wait on sends */ 296 if (nsends) { 297 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 298 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 299 PetscFree(send_status); 300 } 301 PetscFree(send_waits); PetscFree(svalues); 302 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNC__ 307 #define __FUNC__ "MatMult_MPIDense" 308 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 309 { 310 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 311 int ierr; 312 313 PetscFunctionBegin; 314 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 315 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 316 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 317 PetscFunctionReturn(0); 318 } 319 320 #undef __FUNC__ 321 #define __FUNC__ "MatMultAdd_MPIDense" 322 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 323 { 324 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 325 int ierr; 326 327 PetscFunctionBegin; 328 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 329 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 330 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 331 PetscFunctionReturn(0); 332 } 333 334 #undef __FUNC__ 335 #define __FUNC__ "MatMultTrans_MPIDense" 336 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 337 { 338 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 339 int ierr; 340 Scalar zero = 0.0; 341 342 PetscFunctionBegin; 343 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 344 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 345 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 346 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNC__ 351 #define __FUNC__ "MatMultTransAdd_MPIDense" 352 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 353 { 354 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 355 int ierr; 356 357 PetscFunctionBegin; 358 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 359 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 360 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 361 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 362 PetscFunctionReturn(0); 363 } 364 365 #undef __FUNC__ 366 #define __FUNC__ "MatGetDiagonal_MPIDense" 367 int MatGetDiagonal_MPIDense(Mat A,Vec v) 368 { 369 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 370 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 371 int ierr, len, i, n, m = a->m, radd; 372 Scalar *x, zero = 0.0; 373 374 PetscFunctionBegin; 375 VecSet(&zero,v); 376 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 377 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 378 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 379 len = PetscMin(aloc->m,aloc->n); 380 radd = a->rstart*m; 381 for ( i=0; i<len; i++ ) { 382 x[i] = aloc->v[radd + i*m + i]; 383 } 384 PetscFunctionReturn(0); 385 } 386 387 #undef __FUNC__ 388 #define __FUNC__ "MatDestroy_MPIDense" 389 int MatDestroy_MPIDense(Mat mat) 390 { 391 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 392 int ierr; 393 394 PetscFunctionBegin; 395 if (--mat->refct > 0) PetscFunctionReturn(0); 396 397 if (mat->mapping) { 398 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 399 } 400 if (mat->bmapping) { 401 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 402 } 403 #if defined(USE_PETSC_LOG) 404 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 405 #endif 406 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 407 PetscFree(mdn->rowners); 408 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 409 if (mdn->lvec) VecDestroy(mdn->lvec); 410 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 411 if (mdn->factor) { 412 if (mdn->factor->temp) PetscFree(mdn->factor->temp); 413 if (mdn->factor->tag) PetscFree(mdn->factor->tag); 414 if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 415 PetscFree(mdn->factor); 416 } 417 PetscFree(mdn); 418 if (mat->rmap) { 419 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 420 } 421 if (mat->cmap) { 422 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 423 } 424 PLogObjectDestroy(mat); 425 PetscHeaderDestroy(mat); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNC__ 430 #define __FUNC__ "MatView_MPIDense_Binary" 431 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 432 { 433 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 434 int ierr; 435 436 PetscFunctionBegin; 437 if (mdn->size == 1) { 438 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 439 } 440 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNC__ 445 #define __FUNC__ "MatView_MPIDense_ASCII" 446 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 447 { 448 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 449 int ierr, format, size = mdn->size, rank = mdn->rank; 450 FILE *fd; 451 ViewerType vtype; 452 453 PetscFunctionBegin; 454 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 455 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 456 ierr = ViewerGetFormat(viewer,&format); 457 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 458 MatInfo info; 459 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 460 PetscSequentialPhaseBegin(mat->comm,1); 461 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 462 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 463 fflush(fd); 464 PetscSequentialPhaseEnd(mat->comm,1); 465 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 468 PetscFunctionReturn(0); 469 } 470 471 if (size == 1) { 472 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 473 } else { 474 /* assemble the entire matrix onto first processor. */ 475 Mat A; 476 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 477 Scalar *vals; 478 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 479 480 if (!rank) { 481 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 482 } else { 483 ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 484 } 485 PLogObjectParent(mat,A); 486 487 /* Copy the matrix ... This isn't the most efficient means, 488 but it's quick for now */ 489 row = mdn->rstart; m = Amdn->m; 490 for ( i=0; i<m; i++ ) { 491 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 492 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 493 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 494 row++; 495 } 496 497 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 498 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 499 if (!rank) { 500 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr); 501 } 502 ierr = MatDestroy(A);CHKERRQ(ierr); 503 } 504 PetscFunctionReturn(0); 505 } 506 507 #undef __FUNC__ 508 #define __FUNC__ "MatView_MPIDense" 509 int MatView_MPIDense(Mat mat,Viewer viewer) 510 { 511 int ierr; 512 ViewerType vtype; 513 514 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 515 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 516 ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr); 517 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 518 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 519 } else { 520 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 521 } 522 PetscFunctionReturn(0); 523 } 524 525 #undef __FUNC__ 526 #define __FUNC__ "MatGetInfo_MPIDense" 527 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 528 { 529 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 530 Mat mdn = mat->A; 531 int ierr; 532 double isend[5], irecv[5]; 533 534 PetscFunctionBegin; 535 info->rows_global = (double)mat->M; 536 info->columns_global = (double)mat->N; 537 info->rows_local = (double)mat->m; 538 info->columns_local = (double)mat->N; 539 info->block_size = 1.0; 540 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 541 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 542 isend[3] = info->memory; isend[4] = info->mallocs; 543 if (flag == MAT_LOCAL) { 544 info->nz_used = isend[0]; 545 info->nz_allocated = isend[1]; 546 info->nz_unneeded = isend[2]; 547 info->memory = isend[3]; 548 info->mallocs = isend[4]; 549 } else if (flag == MAT_GLOBAL_MAX) { 550 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 551 info->nz_used = irecv[0]; 552 info->nz_allocated = irecv[1]; 553 info->nz_unneeded = irecv[2]; 554 info->memory = irecv[3]; 555 info->mallocs = irecv[4]; 556 } else if (flag == MAT_GLOBAL_SUM) { 557 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 558 info->nz_used = irecv[0]; 559 info->nz_allocated = irecv[1]; 560 info->nz_unneeded = irecv[2]; 561 info->memory = irecv[3]; 562 info->mallocs = irecv[4]; 563 } 564 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 565 info->fill_ratio_needed = 0; 566 info->factor_mallocs = 0; 567 PetscFunctionReturn(0); 568 } 569 570 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 571 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 572 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 573 extern int MatSolve_MPIDense(Mat,Vec,Vec); 574 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 575 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 576 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 577 578 #undef __FUNC__ 579 #define __FUNC__ "MatSetOption_MPIDense" 580 int MatSetOption_MPIDense(Mat A,MatOption op) 581 { 582 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 583 584 PetscFunctionBegin; 585 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 586 op == MAT_YES_NEW_NONZERO_LOCATIONS || 587 op == MAT_NEW_NONZERO_LOCATION_ERR || 588 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 589 op == MAT_COLUMNS_SORTED || 590 op == MAT_COLUMNS_UNSORTED) { 591 MatSetOption(a->A,op); 592 } else if (op == MAT_ROW_ORIENTED) { 593 a->roworiented = 1; 594 MatSetOption(a->A,op); 595 } else if (op == MAT_ROWS_SORTED || 596 op == MAT_ROWS_UNSORTED || 597 op == MAT_SYMMETRIC || 598 op == MAT_STRUCTURALLY_SYMMETRIC || 599 op == MAT_YES_NEW_DIAGONALS || 600 op == MAT_USE_HASH_TABLE) { 601 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 602 } else if (op == MAT_COLUMN_ORIENTED) { 603 a->roworiented = 0; MatSetOption(a->A,op); 604 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 605 a->donotstash = 1; 606 } else if (op == MAT_NO_NEW_DIAGONALS) { 607 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 608 } else { 609 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 610 } 611 PetscFunctionReturn(0); 612 } 613 614 #undef __FUNC__ 615 #define __FUNC__ "MatGetSize_MPIDense" 616 int MatGetSize_MPIDense(Mat A,int *m,int *n) 617 { 618 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 619 620 PetscFunctionBegin; 621 *m = mat->M; *n = mat->N; 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNC__ 626 #define __FUNC__ "MatGetLocalSize_MPIDense" 627 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 628 { 629 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 630 631 PetscFunctionBegin; 632 *m = mat->m; *n = mat->N; 633 PetscFunctionReturn(0); 634 } 635 636 #undef __FUNC__ 637 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 638 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 639 { 640 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 641 642 PetscFunctionBegin; 643 *m = mat->rstart; *n = mat->rend; 644 PetscFunctionReturn(0); 645 } 646 647 #undef __FUNC__ 648 #define __FUNC__ "MatGetRow_MPIDense" 649 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 650 { 651 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 652 int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 653 654 PetscFunctionBegin; 655 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 656 lrow = row - rstart; 657 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 } 660 661 #undef __FUNC__ 662 #define __FUNC__ "MatRestoreRow_MPIDense" 663 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 664 { 665 PetscFunctionBegin; 666 if (idx) PetscFree(*idx); 667 if (v) PetscFree(*v); 668 PetscFunctionReturn(0); 669 } 670 671 #undef __FUNC__ 672 #define __FUNC__ "MatNorm_MPIDense" 673 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 674 { 675 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 676 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 677 int ierr, i, j; 678 double sum = 0.0; 679 Scalar *v = mat->v; 680 681 PetscFunctionBegin; 682 if (mdn->size == 1) { 683 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 684 } else { 685 if (type == NORM_FROBENIUS) { 686 for (i=0; i<mat->n*mat->m; i++ ) { 687 #if defined(USE_PETSC_COMPLEX) 688 sum += PetscReal(PetscConj(*v)*(*v)); v++; 689 #else 690 sum += (*v)*(*v); v++; 691 #endif 692 } 693 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 694 *norm = sqrt(*norm); 695 PLogFlops(2*mat->n*mat->m); 696 } else if (type == NORM_1) { 697 double *tmp, *tmp2; 698 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp); 699 tmp2 = tmp + mdn->N; 700 ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr); 701 *norm = 0.0; 702 v = mat->v; 703 for ( j=0; j<mat->n; j++ ) { 704 for ( i=0; i<mat->m; i++ ) { 705 tmp[j] += PetscAbsScalar(*v); v++; 706 } 707 } 708 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 709 for ( j=0; j<mdn->N; j++ ) { 710 if (tmp2[j] > *norm) *norm = tmp2[j]; 711 } 712 PetscFree(tmp); 713 PLogFlops(mat->n*mat->m); 714 } else if (type == NORM_INFINITY) { /* max row norm */ 715 double ntemp; 716 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 717 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 718 } else { 719 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 720 } 721 } 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNC__ 726 #define __FUNC__ "MatTranspose_MPIDense" 727 int MatTranspose_MPIDense(Mat A,Mat *matout) 728 { 729 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 730 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 731 Mat B; 732 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 733 int j, i, ierr; 734 Scalar *v; 735 736 PetscFunctionBegin; 737 if (matout == PETSC_NULL && M != N) { 738 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 739 } 740 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 741 742 m = Aloc->m; n = Aloc->n; v = Aloc->v; 743 rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 744 for ( j=0; j<n; j++ ) { 745 for (i=0; i<m; i++) rwork[i] = rstart + i; 746 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 747 v += m; 748 } 749 PetscFree(rwork); 750 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 752 if (matout != PETSC_NULL) { 753 *matout = B; 754 } else { 755 PetscOps *Abops; 756 MatOps Aops; 757 758 /* This isn't really an in-place transpose, but free data struct from a */ 759 PetscFree(a->rowners); 760 ierr = MatDestroy(a->A);CHKERRQ(ierr); 761 if (a->lvec) VecDestroy(a->lvec); 762 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 763 PetscFree(a); 764 765 /* 766 This is horrible, horrible code. We need to keep the 767 A pointers for the bops and ops but copy everything 768 else from C. 769 */ 770 Abops = A->bops; 771 Aops = A->ops; 772 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 773 A->bops = Abops; 774 A->ops = Aops; 775 776 PetscHeaderDestroy(B); 777 } 778 PetscFunctionReturn(0); 779 } 780 781 #include "pinclude/blaslapack.h" 782 #undef __FUNC__ 783 #define __FUNC__ "MatScale_MPIDense" 784 int MatScale_MPIDense(Scalar *alpha,Mat inA) 785 { 786 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 787 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 788 int one = 1, nz; 789 790 PetscFunctionBegin; 791 nz = a->m*a->n; 792 BLscal_( &nz, alpha, a->v, &one ); 793 PLogFlops(nz); 794 PetscFunctionReturn(0); 795 } 796 797 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 798 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 799 800 /* -------------------------------------------------------------------*/ 801 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 802 MatGetRow_MPIDense, 803 MatRestoreRow_MPIDense, 804 MatMult_MPIDense, 805 MatMultAdd_MPIDense, 806 MatMultTrans_MPIDense, 807 MatMultTransAdd_MPIDense, 808 0, 809 0, 810 0, 811 0, 812 0, 813 0, 814 0, 815 MatTranspose_MPIDense, 816 MatGetInfo_MPIDense,0, 817 MatGetDiagonal_MPIDense, 818 0, 819 MatNorm_MPIDense, 820 MatAssemblyBegin_MPIDense, 821 MatAssemblyEnd_MPIDense, 822 0, 823 MatSetOption_MPIDense, 824 MatZeroEntries_MPIDense, 825 MatZeroRows_MPIDense, 826 0, 827 0, 828 0, 829 0, 830 MatGetSize_MPIDense, 831 MatGetLocalSize_MPIDense, 832 MatGetOwnershipRange_MPIDense, 833 0, 834 0, 835 MatGetArray_MPIDense, 836 MatRestoreArray_MPIDense, 837 MatDuplicate_MPIDense, 838 0, 839 0, 840 0, 841 0, 842 0, 843 MatGetSubMatrices_MPIDense, 844 0, 845 MatGetValues_MPIDense, 846 0, 847 0, 848 MatScale_MPIDense, 849 0, 850 0, 851 0, 852 MatGetBlockSize_MPIDense, 853 0, 854 0, 855 0, 856 0, 857 0, 858 0, 859 0, 860 0, 861 0, 862 0, 863 0, 864 0, 865 MatGetMaps_Petsc}; 866 867 #undef __FUNC__ 868 #define __FUNC__ "MatCreateMPIDense" 869 /*@C 870 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 871 872 Collective on MPI_Comm 873 874 Input Parameters: 875 + comm - MPI communicator 876 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 877 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 878 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 879 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 880 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 881 to control all matrix memory allocation. 882 883 Output Parameter: 884 . A - the matrix 885 886 Notes: 887 The dense format is fully compatible with standard Fortran 77 888 storage by columns. 889 890 The data input variable is intended primarily for Fortran programmers 891 who wish to allocate their own matrix memory space. Most users should 892 set data=PETSC_NULL. 893 894 The user MUST specify either the local or global matrix dimensions 895 (possibly both). 896 897 Currently, the only parallel dense matrix decomposition is by rows, 898 so that n=N and each submatrix owns all of the global columns. 899 900 Level: intermediate 901 902 .keywords: matrix, dense, parallel 903 904 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 905 @*/ 906 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 907 { 908 Mat mat; 909 Mat_MPIDense *a; 910 int ierr, i,flg; 911 912 PetscFunctionBegin; 913 /* Note: For now, when data is specified above, this assumes the user correctly 914 allocates the local dense storage space. We should add error checking. */ 915 916 *A = 0; 917 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 918 PLogObjectCreate(mat); 919 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 920 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 921 mat->ops->destroy = MatDestroy_MPIDense; 922 mat->ops->view = MatView_MPIDense; 923 mat->factor = 0; 924 mat->mapping = 0; 925 926 a->factor = 0; 927 mat->insertmode = NOT_SET_VALUES; 928 ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); 929 ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); 930 931 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 932 933 /* 934 The computation of n is wrong below, n should represent the number of local 935 rows in the right (column vector) 936 */ 937 938 /* each row stores all columns */ 939 if (N == PETSC_DECIDE) N = n; 940 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 941 /* if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */ 942 a->N = mat->N = N; 943 a->M = mat->M = M; 944 a->m = mat->m = m; 945 a->n = mat->n = n; 946 947 /* the information in the maps duplicates the information computed below, eventually 948 we should remove the duplicate information that is not contained in the maps */ 949 ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 950 ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr); 951 952 /* build local table of row and column ownerships */ 953 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 954 a->cowners = a->rowners + a->size + 1; 955 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 956 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 957 a->rowners[0] = 0; 958 for ( i=2; i<=a->size; i++ ) { 959 a->rowners[i] += a->rowners[i-1]; 960 } 961 a->rstart = a->rowners[a->rank]; 962 a->rend = a->rowners[a->rank+1]; 963 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 964 a->cowners[0] = 0; 965 for ( i=2; i<=a->size; i++ ) { 966 a->cowners[i] += a->cowners[i-1]; 967 } 968 969 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr); 970 PLogObjectParent(mat,a->A); 971 972 /* build cache for off array entries formed */ 973 a->donotstash = 0; 974 ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr); 975 976 /* stuff used for matrix vector multiply */ 977 a->lvec = 0; 978 a->Mvctx = 0; 979 a->roworiented = 1; 980 981 *A = mat; 982 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 983 if (flg) { 984 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 985 } 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNC__ 990 #define __FUNC__ "MatDuplicate_MPIDense" 991 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 992 { 993 Mat mat; 994 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 995 int ierr; 996 FactorCtx *factor; 997 998 PetscFunctionBegin; 999 *newmat = 0; 1000 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 1001 PLogObjectCreate(mat); 1002 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1003 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1004 mat->ops->destroy = MatDestroy_MPIDense; 1005 mat->ops->view = MatView_MPIDense; 1006 mat->factor = A->factor; 1007 mat->assembled = PETSC_TRUE; 1008 1009 a->m = mat->m = oldmat->m; 1010 a->n = mat->n = oldmat->n; 1011 a->M = mat->M = oldmat->M; 1012 a->N = mat->N = oldmat->N; 1013 if (oldmat->factor) { 1014 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor); 1015 /* copy factor contents ... add this code! */ 1016 } else a->factor = 0; 1017 1018 a->rstart = oldmat->rstart; 1019 a->rend = oldmat->rend; 1020 a->size = oldmat->size; 1021 a->rank = oldmat->rank; 1022 mat->insertmode = NOT_SET_VALUES; 1023 a->donotstash = oldmat->donotstash; 1024 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1025 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1026 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1027 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1028 1029 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1030 PLogObjectParent(mat,a->lvec); 1031 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1032 PLogObjectParent(mat,a->Mvctx); 1033 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1034 PLogObjectParent(mat,a->A); 1035 *newmat = mat; 1036 PetscFunctionReturn(0); 1037 } 1038 1039 #include "sys.h" 1040 1041 #undef __FUNC__ 1042 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1043 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1044 { 1045 int *rowners, i,size,rank,m,ierr,nz,j; 1046 Scalar *array,*vals,*vals_ptr; 1047 MPI_Status status; 1048 1049 PetscFunctionBegin; 1050 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1051 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1052 1053 /* determine ownership of all rows */ 1054 m = M/size + ((M % size) > rank); 1055 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1056 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1057 rowners[0] = 0; 1058 for ( i=2; i<=size; i++ ) { 1059 rowners[i] += rowners[i-1]; 1060 } 1061 1062 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1063 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1064 1065 if (!rank) { 1066 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1067 1068 /* read in my part of the matrix numerical values */ 1069 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1070 1071 /* insert into matrix-by row (this is why cannot directly read into array */ 1072 vals_ptr = vals; 1073 for ( i=0; i<m; i++ ) { 1074 for ( j=0; j<N; j++ ) { 1075 array[i + j*m] = *vals_ptr++; 1076 } 1077 } 1078 1079 /* read in other processors and ship out */ 1080 for ( i=1; i<size; i++ ) { 1081 nz = (rowners[i+1] - rowners[i])*N; 1082 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1083 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1084 } 1085 } else { 1086 /* receive numeric values */ 1087 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1088 1089 /* receive message of values*/ 1090 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1091 1092 /* insert into matrix-by row (this is why cannot directly read into array */ 1093 vals_ptr = vals; 1094 for ( i=0; i<m; i++ ) { 1095 for ( j=0; j<N; j++ ) { 1096 array[i + j*m] = *vals_ptr++; 1097 } 1098 } 1099 } 1100 PetscFree(rowners); 1101 PetscFree(vals); 1102 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1103 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 1108 #undef __FUNC__ 1109 #define __FUNC__ "MatLoad_MPIDense" 1110 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1111 { 1112 Mat A; 1113 Scalar *vals,*svals; 1114 MPI_Comm comm = ((PetscObject)viewer)->comm; 1115 MPI_Status status; 1116 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1117 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1118 int tag = ((PetscObject)viewer)->tag; 1119 int i, nz, ierr, j,rstart, rend, fd; 1120 1121 PetscFunctionBegin; 1122 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1123 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1124 if (!rank) { 1125 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1126 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1127 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1128 } 1129 1130 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1131 M = header[1]; N = header[2]; nz = header[3]; 1132 1133 /* 1134 Handle case where matrix is stored on disk as a dense matrix 1135 */ 1136 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1137 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 /* determine ownership of all rows */ 1142 m = M/size + ((M % size) > rank); 1143 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1144 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1145 rowners[0] = 0; 1146 for ( i=2; i<=size; i++ ) { 1147 rowners[i] += rowners[i-1]; 1148 } 1149 rstart = rowners[rank]; 1150 rend = rowners[rank+1]; 1151 1152 /* distribute row lengths to all processors */ 1153 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1154 offlens = ourlens + (rend-rstart); 1155 if (!rank) { 1156 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1157 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1158 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1159 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1160 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1161 PetscFree(sndcounts); 1162 } else { 1163 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1164 } 1165 1166 if (!rank) { 1167 /* calculate the number of nonzeros on each processor */ 1168 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1169 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1170 for ( i=0; i<size; i++ ) { 1171 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1172 procsnz[i] += rowlengths[j]; 1173 } 1174 } 1175 PetscFree(rowlengths); 1176 1177 /* determine max buffer needed and allocate it */ 1178 maxnz = 0; 1179 for ( i=0; i<size; i++ ) { 1180 maxnz = PetscMax(maxnz,procsnz[i]); 1181 } 1182 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1183 1184 /* read in my part of the matrix column indices */ 1185 nz = procsnz[0]; 1186 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1187 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1188 1189 /* read in every one elses and ship off */ 1190 for ( i=1; i<size; i++ ) { 1191 nz = procsnz[i]; 1192 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1193 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1194 } 1195 PetscFree(cols); 1196 } else { 1197 /* determine buffer space needed for message */ 1198 nz = 0; 1199 for ( i=0; i<m; i++ ) { 1200 nz += ourlens[i]; 1201 } 1202 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1203 1204 /* receive message of column indices*/ 1205 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1206 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1207 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1208 } 1209 1210 /* loop over local rows, determining number of off diagonal entries */ 1211 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1212 jj = 0; 1213 for ( i=0; i<m; i++ ) { 1214 for ( j=0; j<ourlens[i]; j++ ) { 1215 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1216 jj++; 1217 } 1218 } 1219 1220 /* create our matrix */ 1221 for ( i=0; i<m; i++ ) { 1222 ourlens[i] -= offlens[i]; 1223 } 1224 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1225 A = *newmat; 1226 for ( i=0; i<m; i++ ) { 1227 ourlens[i] += offlens[i]; 1228 } 1229 1230 if (!rank) { 1231 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 1232 1233 /* read in my part of the matrix numerical values */ 1234 nz = procsnz[0]; 1235 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1236 1237 /* insert into matrix */ 1238 jj = rstart; 1239 smycols = mycols; 1240 svals = vals; 1241 for ( i=0; i<m; i++ ) { 1242 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1243 smycols += ourlens[i]; 1244 svals += ourlens[i]; 1245 jj++; 1246 } 1247 1248 /* read in other processors and ship out */ 1249 for ( i=1; i<size; i++ ) { 1250 nz = procsnz[i]; 1251 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1252 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1253 } 1254 PetscFree(procsnz); 1255 } else { 1256 /* receive numeric values */ 1257 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 1258 1259 /* receive message of values*/ 1260 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1261 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1262 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1263 1264 /* insert into matrix */ 1265 jj = rstart; 1266 smycols = mycols; 1267 svals = vals; 1268 for ( i=0; i<m; i++ ) { 1269 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1270 smycols += ourlens[i]; 1271 svals += ourlens[i]; 1272 jj++; 1273 } 1274 } 1275 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1276 1277 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1278 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 1283 1284 1285 1286