1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpidense.c,v 1.116 1999/06/09 23:19:38 balay Exp balay $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 extern int MatSetUpMultiply_MPIDense(Mat); 13 14 #undef __FUNC__ 15 #define __FUNC__ "MatSetValues_MPIDense" 16 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 17 { 18 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 19 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 20 int roworiented = A->roworiented; 21 22 PetscFunctionBegin; 23 for ( i=0; i<m; i++ ) { 24 if (idxm[i] < 0) continue; 25 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 26 if (idxm[i] >= rstart && idxm[i] < rend) { 27 row = idxm[i] - rstart; 28 if (roworiented) { 29 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 30 } else { 31 for ( j=0; j<n; j++ ) { 32 if (idxn[j] < 0) continue; 33 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 34 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 35 } 36 } 37 } else { 38 if ( !A->donotstash) { 39 if (roworiented) { 40 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 41 } else { 42 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 43 } 44 } 45 } 46 } 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNC__ 51 #define __FUNC__ "MatGetValues_MPIDense" 52 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 53 { 54 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 55 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 56 57 PetscFunctionBegin; 58 for ( i=0; i<m; i++ ) { 59 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 60 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 61 if (idxm[i] >= rstart && idxm[i] < rend) { 62 row = idxm[i] - rstart; 63 for ( j=0; j<n; j++ ) { 64 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 65 if (idxn[j] >= mdn->N) { 66 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 67 } 68 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 69 } 70 } else { 71 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 72 } 73 } 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNC__ 78 #define __FUNC__ "MatGetArray_MPIDense" 79 int MatGetArray_MPIDense(Mat A,Scalar **array) 80 { 81 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 82 int ierr; 83 84 PetscFunctionBegin; 85 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 86 PetscFunctionReturn(0); 87 } 88 89 #undef __FUNC__ 90 #define __FUNC__ "MatRestoreArray_MPIDense" 91 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 92 { 93 PetscFunctionBegin; 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNC__ 98 #define __FUNC__ "MatAssemblyBegin_MPIDense" 99 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 100 { 101 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 102 MPI_Comm comm = mat->comm; 103 int ierr,nstash,reallocs; 104 InsertMode addv; 105 106 PetscFunctionBegin; 107 /* make sure all processors are either in INSERTMODE or ADDMODE */ 108 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 109 if (addv == (ADD_VALUES|INSERT_VALUES)) { 110 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 111 } 112 mat->insertmode = addv; /* in case this processor had no cache */ 113 114 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 115 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 116 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n", 117 nstash,reallocs); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNC__ 122 #define __FUNC__ "MatAssemblyEnd_MPIDense" 123 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 124 { 125 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 126 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 127 Scalar *val; 128 InsertMode addv=mat->insertmode; 129 130 PetscFunctionBegin; 131 /* wait on receives */ 132 while (1) { 133 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 134 if (!flg) break; 135 136 for ( i=0; i<n; ) { 137 /* Now identify the consecutive vals belonging to the same row */ 138 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 139 if (j < n) ncols = j-i; 140 else ncols = n-i; 141 /* Now assemble all these values with a single function call */ 142 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 143 i = j; 144 } 145 } 146 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 147 148 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 149 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 150 151 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 152 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNC__ 158 #define __FUNC__ "MatZeroEntries_MPIDense" 159 int MatZeroEntries_MPIDense(Mat A) 160 { 161 int ierr; 162 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 163 164 PetscFunctionBegin; 165 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNC__ 170 #define __FUNC__ "MatGetBlockSize_MPIDense" 171 int MatGetBlockSize_MPIDense(Mat A,int *bs) 172 { 173 PetscFunctionBegin; 174 *bs = 1; 175 PetscFunctionReturn(0); 176 } 177 178 /* the code does not do the diagonal entries correctly unless the 179 matrix is square and the column and row owerships are identical. 180 This is a BUG. The only way to fix it seems to be to access 181 mdn->A and mdn->B directly and not through the MatZeroRows() 182 routine. 183 */ 184 #undef __FUNC__ 185 #define __FUNC__ "MatZeroRows_MPIDense" 186 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 187 { 188 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 189 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 190 int *procs,*nprocs,j,found,idx,nsends,*work; 191 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 192 int *rvalues,tag = A->tag,count,base,slen,n,*source; 193 int *lens,imdex,*lrows,*values; 194 MPI_Comm comm = A->comm; 195 MPI_Request *send_waits,*recv_waits; 196 MPI_Status recv_status,*send_status; 197 IS istmp; 198 199 PetscFunctionBegin; 200 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 201 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 202 203 /* first count number of contributors to each processor */ 204 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 205 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 206 procs = nprocs + size; 207 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 208 for ( i=0; i<N; i++ ) { 209 idx = rows[i]; 210 found = 0; 211 for ( j=0; j<size; j++ ) { 212 if (idx >= owners[j] && idx < owners[j+1]) { 213 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 214 } 215 } 216 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 217 } 218 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 219 220 /* inform other processors of number of messages and max length*/ 221 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 222 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 223 nrecvs = work[rank]; 224 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 225 nmax = work[rank]; 226 ierr = PetscFree(work);CHKERRQ(ierr); 227 228 /* post receives: */ 229 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 230 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 231 for ( i=0; i<nrecvs; i++ ) { 232 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 233 } 234 235 /* do sends: 236 1) starts[i] gives the starting index in svalues for stuff going to 237 the ith processor 238 */ 239 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 240 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 241 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 242 starts[0] = 0; 243 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 244 for ( i=0; i<N; i++ ) { 245 svalues[starts[owner[i]]++] = rows[i]; 246 } 247 ISRestoreIndices(is,&rows); 248 249 starts[0] = 0; 250 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 251 count = 0; 252 for ( i=0; i<size; i++ ) { 253 if (procs[i]) { 254 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 255 } 256 } 257 ierr = PetscFree(starts);CHKERRQ(ierr); 258 259 base = owners[rank]; 260 261 /* wait on receives */ 262 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 263 source = lens + nrecvs; 264 count = nrecvs; slen = 0; 265 while (count) { 266 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 267 /* unpack receives into our local space */ 268 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 269 source[imdex] = recv_status.MPI_SOURCE; 270 lens[imdex] = n; 271 slen += n; 272 count--; 273 } 274 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 275 276 /* move the data into the send scatter */ 277 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 278 count = 0; 279 for ( i=0; i<nrecvs; i++ ) { 280 values = rvalues + i*nmax; 281 for ( j=0; j<lens[i]; j++ ) { 282 lrows[count++] = values[j] - base; 283 } 284 } 285 ierr = PetscFree(rvalues);CHKERRQ(ierr); 286 ierr = PetscFree(lens);CHKERRQ(ierr); 287 ierr = PetscFree(owner);CHKERRQ(ierr); 288 ierr = PetscFree(nprocs);CHKERRQ(ierr); 289 290 /* actually zap the local rows */ 291 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 292 PLogObjectParent(A,istmp); 293 ierr = PetscFree(lrows);CHKERRQ(ierr); 294 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 295 ierr = ISDestroy(istmp);CHKERRQ(ierr); 296 297 /* wait on sends */ 298 if (nsends) { 299 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 300 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 301 ierr = PetscFree(send_status);CHKERRQ(ierr); 302 } 303 ierr = PetscFree(send_waits);CHKERRQ(ierr); 304 ierr = PetscFree(svalues);CHKERRQ(ierr); 305 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNC__ 310 #define __FUNC__ "MatMult_MPIDense" 311 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 312 { 313 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 314 int ierr; 315 316 PetscFunctionBegin; 317 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 318 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 319 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNC__ 324 #define __FUNC__ "MatMultAdd_MPIDense" 325 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 326 { 327 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 328 int ierr; 329 330 PetscFunctionBegin; 331 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 332 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 333 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNC__ 338 #define __FUNC__ "MatMultTrans_MPIDense" 339 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 340 { 341 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 342 int ierr; 343 Scalar zero = 0.0; 344 345 PetscFunctionBegin; 346 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 347 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 348 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 349 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNC__ 354 #define __FUNC__ "MatMultTransAdd_MPIDense" 355 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 356 { 357 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 358 int ierr; 359 360 PetscFunctionBegin; 361 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 362 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 363 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 364 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNC__ 369 #define __FUNC__ "MatGetDiagonal_MPIDense" 370 int MatGetDiagonal_MPIDense(Mat A,Vec v) 371 { 372 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 373 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 374 int ierr, len, i, n, m = a->m, radd; 375 Scalar *x, zero = 0.0; 376 377 PetscFunctionBegin; 378 VecSet(&zero,v); 379 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 380 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 381 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 382 len = PetscMin(aloc->m,aloc->n); 383 radd = a->rstart*m; 384 for ( i=0; i<len; i++ ) { 385 x[i] = aloc->v[radd + i*m + i]; 386 } 387 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNC__ 392 #define __FUNC__ "MatDestroy_MPIDense" 393 int MatDestroy_MPIDense(Mat mat) 394 { 395 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 396 int ierr; 397 398 PetscFunctionBegin; 399 if (--mat->refct > 0) PetscFunctionReturn(0); 400 401 if (mat->mapping) { 402 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 403 } 404 if (mat->bmapping) { 405 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 406 } 407 #if defined(PETSC_USE_LOG) 408 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 409 #endif 410 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 411 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 412 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 413 if (mdn->lvec) VecDestroy(mdn->lvec); 414 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 415 if (mdn->factor) { 416 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 417 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 418 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 419 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 420 } 421 ierr = PetscFree(mdn);CHKERRQ(ierr); 422 if (mat->rmap) { 423 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 424 } 425 if (mat->cmap) { 426 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 427 } 428 PLogObjectDestroy(mat); 429 PetscHeaderDestroy(mat); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNC__ 434 #define __FUNC__ "MatView_MPIDense_Binary" 435 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 436 { 437 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 438 int ierr; 439 440 PetscFunctionBegin; 441 if (mdn->size == 1) { 442 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 443 } 444 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNC__ 449 #define __FUNC__ "MatView_MPIDense_ASCII" 450 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 451 { 452 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 453 int ierr, format, size = mdn->size, rank = mdn->rank; 454 FILE *fd; 455 ViewerType vtype; 456 457 PetscFunctionBegin; 458 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 459 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 460 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 461 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 462 MatInfo info; 463 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 464 PetscSequentialPhaseBegin(mat->comm,1); 465 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 466 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 467 fflush(fd); 468 PetscSequentialPhaseEnd(mat->comm,1); 469 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 470 PetscFunctionReturn(0); 471 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 472 PetscFunctionReturn(0); 473 } 474 475 if (size == 1) { 476 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 477 } else { 478 /* assemble the entire matrix onto first processor. */ 479 Mat A; 480 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 481 Scalar *vals; 482 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 483 484 if (!rank) { 485 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 486 } else { 487 ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 488 } 489 PLogObjectParent(mat,A); 490 491 /* Copy the matrix ... This isn't the most efficient means, 492 but it's quick for now */ 493 row = mdn->rstart; m = Amdn->m; 494 for ( i=0; i<m; i++ ) { 495 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 496 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 498 row++; 499 } 500 501 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 502 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 503 if (!rank) { 504 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr); 505 } 506 ierr = MatDestroy(A);CHKERRQ(ierr); 507 } 508 PetscFunctionReturn(0); 509 } 510 511 #undef __FUNC__ 512 #define __FUNC__ "MatView_MPIDense" 513 int MatView_MPIDense(Mat mat,Viewer viewer) 514 { 515 int ierr; 516 ViewerType vtype; 517 518 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 519 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 520 ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr); 521 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 522 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 523 } else { 524 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 525 } 526 PetscFunctionReturn(0); 527 } 528 529 #undef __FUNC__ 530 #define __FUNC__ "MatGetInfo_MPIDense" 531 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 532 { 533 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 534 Mat mdn = mat->A; 535 int ierr; 536 double isend[5], irecv[5]; 537 538 PetscFunctionBegin; 539 info->rows_global = (double)mat->M; 540 info->columns_global = (double)mat->N; 541 info->rows_local = (double)mat->m; 542 info->columns_local = (double)mat->N; 543 info->block_size = 1.0; 544 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 545 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 546 isend[3] = info->memory; isend[4] = info->mallocs; 547 if (flag == MAT_LOCAL) { 548 info->nz_used = isend[0]; 549 info->nz_allocated = isend[1]; 550 info->nz_unneeded = isend[2]; 551 info->memory = isend[3]; 552 info->mallocs = isend[4]; 553 } else if (flag == MAT_GLOBAL_MAX) { 554 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 555 info->nz_used = irecv[0]; 556 info->nz_allocated = irecv[1]; 557 info->nz_unneeded = irecv[2]; 558 info->memory = irecv[3]; 559 info->mallocs = irecv[4]; 560 } else if (flag == MAT_GLOBAL_SUM) { 561 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 562 info->nz_used = irecv[0]; 563 info->nz_allocated = irecv[1]; 564 info->nz_unneeded = irecv[2]; 565 info->memory = irecv[3]; 566 info->mallocs = irecv[4]; 567 } 568 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 569 info->fill_ratio_needed = 0; 570 info->factor_mallocs = 0; 571 PetscFunctionReturn(0); 572 } 573 574 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 575 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 576 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 577 extern int MatSolve_MPIDense(Mat,Vec,Vec); 578 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 579 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 580 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 581 582 #undef __FUNC__ 583 #define __FUNC__ "MatSetOption_MPIDense" 584 int MatSetOption_MPIDense(Mat A,MatOption op) 585 { 586 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 587 588 PetscFunctionBegin; 589 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 590 op == MAT_YES_NEW_NONZERO_LOCATIONS || 591 op == MAT_NEW_NONZERO_LOCATION_ERR || 592 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 593 op == MAT_COLUMNS_SORTED || 594 op == MAT_COLUMNS_UNSORTED) { 595 MatSetOption(a->A,op); 596 } else if (op == MAT_ROW_ORIENTED) { 597 a->roworiented = 1; 598 MatSetOption(a->A,op); 599 } else if (op == MAT_ROWS_SORTED || 600 op == MAT_ROWS_UNSORTED || 601 op == MAT_SYMMETRIC || 602 op == MAT_STRUCTURALLY_SYMMETRIC || 603 op == MAT_YES_NEW_DIAGONALS || 604 op == MAT_USE_HASH_TABLE) { 605 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 606 } else if (op == MAT_COLUMN_ORIENTED) { 607 a->roworiented = 0; MatSetOption(a->A,op); 608 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 609 a->donotstash = 1; 610 } else if (op == MAT_NO_NEW_DIAGONALS) { 611 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 612 } else { 613 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 614 } 615 PetscFunctionReturn(0); 616 } 617 618 #undef __FUNC__ 619 #define __FUNC__ "MatGetSize_MPIDense" 620 int MatGetSize_MPIDense(Mat A,int *m,int *n) 621 { 622 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 623 624 PetscFunctionBegin; 625 *m = mat->M; *n = mat->N; 626 PetscFunctionReturn(0); 627 } 628 629 #undef __FUNC__ 630 #define __FUNC__ "MatGetLocalSize_MPIDense" 631 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 632 { 633 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 634 635 PetscFunctionBegin; 636 *m = mat->m; *n = mat->N; 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNC__ 641 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 642 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 643 { 644 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 645 646 PetscFunctionBegin; 647 *m = mat->rstart; *n = mat->rend; 648 PetscFunctionReturn(0); 649 } 650 651 #undef __FUNC__ 652 #define __FUNC__ "MatGetRow_MPIDense" 653 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 654 { 655 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 656 int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 657 658 PetscFunctionBegin; 659 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 660 lrow = row - rstart; 661 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 662 PetscFunctionReturn(0); 663 } 664 665 #undef __FUNC__ 666 #define __FUNC__ "MatRestoreRow_MPIDense" 667 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 668 { 669 int ierr; 670 671 PetscFunctionBegin; 672 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 673 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 674 PetscFunctionReturn(0); 675 } 676 677 #undef __FUNC__ 678 #define __FUNC__ "MatNorm_MPIDense" 679 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 680 { 681 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 682 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 683 int ierr, i, j; 684 double sum = 0.0; 685 Scalar *v = mat->v; 686 687 PetscFunctionBegin; 688 if (mdn->size == 1) { 689 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 690 } else { 691 if (type == NORM_FROBENIUS) { 692 for (i=0; i<mat->n*mat->m; i++ ) { 693 #if defined(PETSC_USE_COMPLEX) 694 sum += PetscReal(PetscConj(*v)*(*v)); v++; 695 #else 696 sum += (*v)*(*v); v++; 697 #endif 698 } 699 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 700 *norm = sqrt(*norm); 701 PLogFlops(2*mat->n*mat->m); 702 } else if (type == NORM_1) { 703 double *tmp, *tmp2; 704 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp); 705 tmp2 = tmp + mdn->N; 706 ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr); 707 *norm = 0.0; 708 v = mat->v; 709 for ( j=0; j<mat->n; j++ ) { 710 for ( i=0; i<mat->m; i++ ) { 711 tmp[j] += PetscAbsScalar(*v); v++; 712 } 713 } 714 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 715 for ( j=0; j<mdn->N; j++ ) { 716 if (tmp2[j] > *norm) *norm = tmp2[j]; 717 } 718 ierr = PetscFree(tmp);CHKERRQ(ierr); 719 PLogFlops(mat->n*mat->m); 720 } else if (type == NORM_INFINITY) { /* max row norm */ 721 double ntemp; 722 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 723 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 724 } else { 725 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 726 } 727 } 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNC__ 732 #define __FUNC__ "MatTranspose_MPIDense" 733 int MatTranspose_MPIDense(Mat A,Mat *matout) 734 { 735 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 736 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 737 Mat B; 738 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 739 int j, i, ierr; 740 Scalar *v; 741 742 PetscFunctionBegin; 743 if (matout == PETSC_NULL && M != N) { 744 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 745 } 746 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 747 748 m = Aloc->m; n = Aloc->n; v = Aloc->v; 749 rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 750 for ( j=0; j<n; j++ ) { 751 for (i=0; i<m; i++) rwork[i] = rstart + i; 752 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 753 v += m; 754 } 755 ierr = PetscFree(rwork);CHKERRQ(ierr); 756 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 757 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 758 if (matout != PETSC_NULL) { 759 *matout = B; 760 } else { 761 PetscOps *Abops; 762 MatOps Aops; 763 764 /* This isn't really an in-place transpose, but free data struct from a */ 765 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 766 ierr = MatDestroy(a->A);CHKERRQ(ierr); 767 if (a->lvec) VecDestroy(a->lvec); 768 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 769 ierr = PetscFree(a);CHKERRQ(ierr); 770 771 /* 772 This is horrible, horrible code. We need to keep the 773 A pointers for the bops and ops but copy everything 774 else from C. 775 */ 776 Abops = A->bops; 777 Aops = A->ops; 778 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 779 A->bops = Abops; 780 A->ops = Aops; 781 782 PetscHeaderDestroy(B); 783 } 784 PetscFunctionReturn(0); 785 } 786 787 #include "pinclude/blaslapack.h" 788 #undef __FUNC__ 789 #define __FUNC__ "MatScale_MPIDense" 790 int MatScale_MPIDense(Scalar *alpha,Mat inA) 791 { 792 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 793 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 794 int one = 1, nz; 795 796 PetscFunctionBegin; 797 nz = a->m*a->n; 798 BLscal_( &nz, alpha, a->v, &one ); 799 PLogFlops(nz); 800 PetscFunctionReturn(0); 801 } 802 803 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 804 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 805 806 /* -------------------------------------------------------------------*/ 807 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 808 MatGetRow_MPIDense, 809 MatRestoreRow_MPIDense, 810 MatMult_MPIDense, 811 MatMultAdd_MPIDense, 812 MatMultTrans_MPIDense, 813 MatMultTransAdd_MPIDense, 814 0, 815 0, 816 0, 817 0, 818 0, 819 0, 820 0, 821 MatTranspose_MPIDense, 822 MatGetInfo_MPIDense,0, 823 MatGetDiagonal_MPIDense, 824 0, 825 MatNorm_MPIDense, 826 MatAssemblyBegin_MPIDense, 827 MatAssemblyEnd_MPIDense, 828 0, 829 MatSetOption_MPIDense, 830 MatZeroEntries_MPIDense, 831 MatZeroRows_MPIDense, 832 0, 833 0, 834 0, 835 0, 836 MatGetSize_MPIDense, 837 MatGetLocalSize_MPIDense, 838 MatGetOwnershipRange_MPIDense, 839 0, 840 0, 841 MatGetArray_MPIDense, 842 MatRestoreArray_MPIDense, 843 MatDuplicate_MPIDense, 844 0, 845 0, 846 0, 847 0, 848 0, 849 MatGetSubMatrices_MPIDense, 850 0, 851 MatGetValues_MPIDense, 852 0, 853 0, 854 MatScale_MPIDense, 855 0, 856 0, 857 0, 858 MatGetBlockSize_MPIDense, 859 0, 860 0, 861 0, 862 0, 863 0, 864 0, 865 0, 866 0, 867 0, 868 0, 869 0, 870 0, 871 MatGetMaps_Petsc}; 872 873 #undef __FUNC__ 874 #define __FUNC__ "MatCreateMPIDense" 875 /*@C 876 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 877 878 Collective on MPI_Comm 879 880 Input Parameters: 881 + comm - MPI communicator 882 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 883 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 884 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 885 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 886 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 887 to control all matrix memory allocation. 888 889 Output Parameter: 890 . A - the matrix 891 892 Notes: 893 The dense format is fully compatible with standard Fortran 77 894 storage by columns. 895 896 The data input variable is intended primarily for Fortran programmers 897 who wish to allocate their own matrix memory space. Most users should 898 set data=PETSC_NULL. 899 900 The user MUST specify either the local or global matrix dimensions 901 (possibly both). 902 903 Currently, the only parallel dense matrix decomposition is by rows, 904 so that n=N and each submatrix owns all of the global columns. 905 906 Level: intermediate 907 908 .keywords: matrix, dense, parallel 909 910 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 911 @*/ 912 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 913 { 914 Mat mat; 915 Mat_MPIDense *a; 916 int ierr, i,flg; 917 918 PetscFunctionBegin; 919 /* Note: For now, when data is specified above, this assumes the user correctly 920 allocates the local dense storage space. We should add error checking. */ 921 922 *A = 0; 923 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 924 PLogObjectCreate(mat); 925 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 926 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 927 mat->ops->destroy = MatDestroy_MPIDense; 928 mat->ops->view = MatView_MPIDense; 929 mat->factor = 0; 930 mat->mapping = 0; 931 932 a->factor = 0; 933 mat->insertmode = NOT_SET_VALUES; 934 ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); 935 ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); 936 937 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 938 939 /* 940 The computation of n is wrong below, n should represent the number of local 941 rows in the right (column vector) 942 */ 943 944 /* each row stores all columns */ 945 if (N == PETSC_DECIDE) N = n; 946 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 947 /* if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */ 948 a->N = mat->N = N; 949 a->M = mat->M = M; 950 a->m = mat->m = m; 951 a->n = mat->n = n; 952 953 /* the information in the maps duplicates the information computed below, eventually 954 we should remove the duplicate information that is not contained in the maps */ 955 ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 956 ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr); 957 958 /* build local table of row and column ownerships */ 959 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 960 a->cowners = a->rowners + a->size + 1; 961 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 962 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 963 a->rowners[0] = 0; 964 for ( i=2; i<=a->size; i++ ) { 965 a->rowners[i] += a->rowners[i-1]; 966 } 967 a->rstart = a->rowners[a->rank]; 968 a->rend = a->rowners[a->rank+1]; 969 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 970 a->cowners[0] = 0; 971 for ( i=2; i<=a->size; i++ ) { 972 a->cowners[i] += a->cowners[i-1]; 973 } 974 975 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr); 976 PLogObjectParent(mat,a->A); 977 978 /* build cache for off array entries formed */ 979 a->donotstash = 0; 980 ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr); 981 982 /* stuff used for matrix vector multiply */ 983 a->lvec = 0; 984 a->Mvctx = 0; 985 a->roworiented = 1; 986 987 *A = mat; 988 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 989 if (flg) { 990 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 991 } 992 PetscFunctionReturn(0); 993 } 994 995 #undef __FUNC__ 996 #define __FUNC__ "MatDuplicate_MPIDense" 997 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 998 { 999 Mat mat; 1000 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 1001 int ierr; 1002 FactorCtx *factor; 1003 1004 PetscFunctionBegin; 1005 *newmat = 0; 1006 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 1007 PLogObjectCreate(mat); 1008 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1009 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1010 mat->ops->destroy = MatDestroy_MPIDense; 1011 mat->ops->view = MatView_MPIDense; 1012 mat->factor = A->factor; 1013 mat->assembled = PETSC_TRUE; 1014 1015 a->m = mat->m = oldmat->m; 1016 a->n = mat->n = oldmat->n; 1017 a->M = mat->M = oldmat->M; 1018 a->N = mat->N = oldmat->N; 1019 if (oldmat->factor) { 1020 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor); 1021 /* copy factor contents ... add this code! */ 1022 } else a->factor = 0; 1023 1024 a->rstart = oldmat->rstart; 1025 a->rend = oldmat->rend; 1026 a->size = oldmat->size; 1027 a->rank = oldmat->rank; 1028 mat->insertmode = NOT_SET_VALUES; 1029 a->donotstash = oldmat->donotstash; 1030 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1031 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1032 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1033 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1034 1035 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1036 PLogObjectParent(mat,a->lvec); 1037 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1038 PLogObjectParent(mat,a->Mvctx); 1039 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1040 PLogObjectParent(mat,a->A); 1041 *newmat = mat; 1042 PetscFunctionReturn(0); 1043 } 1044 1045 #include "sys.h" 1046 1047 #undef __FUNC__ 1048 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1049 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1050 { 1051 int *rowners, i,size,rank,m,ierr,nz,j; 1052 Scalar *array,*vals,*vals_ptr; 1053 MPI_Status status; 1054 1055 PetscFunctionBegin; 1056 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1057 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1058 1059 /* determine ownership of all rows */ 1060 m = M/size + ((M % size) > rank); 1061 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1062 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1063 rowners[0] = 0; 1064 for ( i=2; i<=size; i++ ) { 1065 rowners[i] += rowners[i-1]; 1066 } 1067 1068 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1069 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1070 1071 if (!rank) { 1072 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1073 1074 /* read in my part of the matrix numerical values */ 1075 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1076 1077 /* insert into matrix-by row (this is why cannot directly read into array */ 1078 vals_ptr = vals; 1079 for ( i=0; i<m; i++ ) { 1080 for ( j=0; j<N; j++ ) { 1081 array[i + j*m] = *vals_ptr++; 1082 } 1083 } 1084 1085 /* read in other processors and ship out */ 1086 for ( i=1; i<size; i++ ) { 1087 nz = (rowners[i+1] - rowners[i])*N; 1088 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1089 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1090 } 1091 } else { 1092 /* receive numeric values */ 1093 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1094 1095 /* receive message of values*/ 1096 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1097 1098 /* insert into matrix-by row (this is why cannot directly read into array */ 1099 vals_ptr = vals; 1100 for ( i=0; i<m; i++ ) { 1101 for ( j=0; j<N; j++ ) { 1102 array[i + j*m] = *vals_ptr++; 1103 } 1104 } 1105 } 1106 ierr = PetscFree(rowners);CHKERRQ(ierr); 1107 ierr = PetscFree(vals);CHKERRQ(ierr); 1108 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1109 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 1114 #undef __FUNC__ 1115 #define __FUNC__ "MatLoad_MPIDense" 1116 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1117 { 1118 Mat A; 1119 Scalar *vals,*svals; 1120 MPI_Comm comm = ((PetscObject)viewer)->comm; 1121 MPI_Status status; 1122 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1123 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1124 int tag = ((PetscObject)viewer)->tag; 1125 int i, nz, ierr, j,rstart, rend, fd; 1126 1127 PetscFunctionBegin; 1128 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1129 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1130 if (!rank) { 1131 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1132 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1133 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1134 } 1135 1136 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1137 M = header[1]; N = header[2]; nz = header[3]; 1138 1139 /* 1140 Handle case where matrix is stored on disk as a dense matrix 1141 */ 1142 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1143 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 /* determine ownership of all rows */ 1148 m = M/size + ((M % size) > rank); 1149 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1150 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1151 rowners[0] = 0; 1152 for ( i=2; i<=size; i++ ) { 1153 rowners[i] += rowners[i-1]; 1154 } 1155 rstart = rowners[rank]; 1156 rend = rowners[rank+1]; 1157 1158 /* distribute row lengths to all processors */ 1159 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1160 offlens = ourlens + (rend-rstart); 1161 if (!rank) { 1162 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1163 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1164 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1165 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1166 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1167 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1168 } else { 1169 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1170 } 1171 1172 if (!rank) { 1173 /* calculate the number of nonzeros on each processor */ 1174 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1175 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1176 for ( i=0; i<size; i++ ) { 1177 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1178 procsnz[i] += rowlengths[j]; 1179 } 1180 } 1181 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1182 1183 /* determine max buffer needed and allocate it */ 1184 maxnz = 0; 1185 for ( i=0; i<size; i++ ) { 1186 maxnz = PetscMax(maxnz,procsnz[i]); 1187 } 1188 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1189 1190 /* read in my part of the matrix column indices */ 1191 nz = procsnz[0]; 1192 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1193 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1194 1195 /* read in every one elses and ship off */ 1196 for ( i=1; i<size; i++ ) { 1197 nz = procsnz[i]; 1198 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1199 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1200 } 1201 ierr = PetscFree(cols);CHKERRQ(ierr); 1202 } else { 1203 /* determine buffer space needed for message */ 1204 nz = 0; 1205 for ( i=0; i<m; i++ ) { 1206 nz += ourlens[i]; 1207 } 1208 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1209 1210 /* receive message of column indices*/ 1211 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1212 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1213 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1214 } 1215 1216 /* loop over local rows, determining number of off diagonal entries */ 1217 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1218 jj = 0; 1219 for ( i=0; i<m; i++ ) { 1220 for ( j=0; j<ourlens[i]; j++ ) { 1221 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1222 jj++; 1223 } 1224 } 1225 1226 /* create our matrix */ 1227 for ( i=0; i<m; i++ ) { 1228 ourlens[i] -= offlens[i]; 1229 } 1230 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1231 A = *newmat; 1232 for ( i=0; i<m; i++ ) { 1233 ourlens[i] += offlens[i]; 1234 } 1235 1236 if (!rank) { 1237 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 1238 1239 /* read in my part of the matrix numerical values */ 1240 nz = procsnz[0]; 1241 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1242 1243 /* insert into matrix */ 1244 jj = rstart; 1245 smycols = mycols; 1246 svals = vals; 1247 for ( i=0; i<m; i++ ) { 1248 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1249 smycols += ourlens[i]; 1250 svals += ourlens[i]; 1251 jj++; 1252 } 1253 1254 /* read in other processors and ship out */ 1255 for ( i=1; i<size; i++ ) { 1256 nz = procsnz[i]; 1257 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1258 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1259 } 1260 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1261 } else { 1262 /* receive numeric values */ 1263 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 1264 1265 /* receive message of values*/ 1266 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1267 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1268 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1269 1270 /* insert into matrix */ 1271 jj = rstart; 1272 smycols = mycols; 1273 svals = vals; 1274 for ( i=0; i<m; i++ ) { 1275 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1276 smycols += ourlens[i]; 1277 svals += ourlens[i]; 1278 jj++; 1279 } 1280 } 1281 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1282 ierr = PetscFree(vals);CHKERRQ(ierr); 1283 ierr = PetscFree(mycols);CHKERRQ(ierr); 1284 ierr = PetscFree(rowners);CHKERRQ(ierr); 1285 1286 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1287 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 1292 1293 1294 1295