1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpidense.c,v 1.117 1999/06/30 23:50:57 balay Exp bsmith $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 extern int MatSetUpMultiply_MPIDense(Mat); 13 14 #undef __FUNC__ 15 #define __FUNC__ "MatSetValues_MPIDense" 16 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 17 { 18 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 19 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 20 int roworiented = A->roworiented; 21 22 PetscFunctionBegin; 23 for ( i=0; i<m; i++ ) { 24 if (idxm[i] < 0) continue; 25 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 26 if (idxm[i] >= rstart && idxm[i] < rend) { 27 row = idxm[i] - rstart; 28 if (roworiented) { 29 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 30 } else { 31 for ( j=0; j<n; j++ ) { 32 if (idxn[j] < 0) continue; 33 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 34 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 35 } 36 } 37 } else { 38 if ( !A->donotstash) { 39 if (roworiented) { 40 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 41 } else { 42 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 43 } 44 } 45 } 46 } 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNC__ 51 #define __FUNC__ "MatGetValues_MPIDense" 52 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 53 { 54 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 55 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 56 57 PetscFunctionBegin; 58 for ( i=0; i<m; i++ ) { 59 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 60 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 61 if (idxm[i] >= rstart && idxm[i] < rend) { 62 row = idxm[i] - rstart; 63 for ( j=0; j<n; j++ ) { 64 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 65 if (idxn[j] >= mdn->N) { 66 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 67 } 68 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 69 } 70 } else { 71 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 72 } 73 } 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNC__ 78 #define __FUNC__ "MatGetArray_MPIDense" 79 int MatGetArray_MPIDense(Mat A,Scalar **array) 80 { 81 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 82 int ierr; 83 84 PetscFunctionBegin; 85 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 86 PetscFunctionReturn(0); 87 } 88 89 #undef __FUNC__ 90 #define __FUNC__ "MatRestoreArray_MPIDense" 91 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 92 { 93 PetscFunctionBegin; 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNC__ 98 #define __FUNC__ "MatAssemblyBegin_MPIDense" 99 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 100 { 101 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 102 MPI_Comm comm = mat->comm; 103 int ierr,nstash,reallocs; 104 InsertMode addv; 105 106 PetscFunctionBegin; 107 /* make sure all processors are either in INSERTMODE or ADDMODE */ 108 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 109 if (addv == (ADD_VALUES|INSERT_VALUES)) { 110 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 111 } 112 mat->insertmode = addv; /* in case this processor had no cache */ 113 114 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 115 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 116 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n", 117 nstash,reallocs); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNC__ 122 #define __FUNC__ "MatAssemblyEnd_MPIDense" 123 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 124 { 125 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 126 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 127 Scalar *val; 128 InsertMode addv=mat->insertmode; 129 130 PetscFunctionBegin; 131 /* wait on receives */ 132 while (1) { 133 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 134 if (!flg) break; 135 136 for ( i=0; i<n; ) { 137 /* Now identify the consecutive vals belonging to the same row */ 138 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 139 if (j < n) ncols = j-i; 140 else ncols = n-i; 141 /* Now assemble all these values with a single function call */ 142 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 143 i = j; 144 } 145 } 146 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 147 148 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 149 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 150 151 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 152 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNC__ 158 #define __FUNC__ "MatZeroEntries_MPIDense" 159 int MatZeroEntries_MPIDense(Mat A) 160 { 161 int ierr; 162 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 163 164 PetscFunctionBegin; 165 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNC__ 170 #define __FUNC__ "MatGetBlockSize_MPIDense" 171 int MatGetBlockSize_MPIDense(Mat A,int *bs) 172 { 173 PetscFunctionBegin; 174 *bs = 1; 175 PetscFunctionReturn(0); 176 } 177 178 /* the code does not do the diagonal entries correctly unless the 179 matrix is square and the column and row owerships are identical. 180 This is a BUG. The only way to fix it seems to be to access 181 mdn->A and mdn->B directly and not through the MatZeroRows() 182 routine. 183 */ 184 #undef __FUNC__ 185 #define __FUNC__ "MatZeroRows_MPIDense" 186 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 187 { 188 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 189 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 190 int *procs,*nprocs,j,found,idx,nsends,*work; 191 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 192 int *rvalues,tag = A->tag,count,base,slen,n,*source; 193 int *lens,imdex,*lrows,*values; 194 MPI_Comm comm = A->comm; 195 MPI_Request *send_waits,*recv_waits; 196 MPI_Status recv_status,*send_status; 197 IS istmp; 198 199 PetscFunctionBegin; 200 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 201 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 202 203 /* first count number of contributors to each processor */ 204 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 205 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 206 procs = nprocs + size; 207 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 208 for ( i=0; i<N; i++ ) { 209 idx = rows[i]; 210 found = 0; 211 for ( j=0; j<size; j++ ) { 212 if (idx >= owners[j] && idx < owners[j+1]) { 213 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 214 } 215 } 216 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 217 } 218 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 219 220 /* inform other processors of number of messages and max length*/ 221 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 222 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 223 nrecvs = work[rank]; 224 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 225 nmax = work[rank]; 226 ierr = PetscFree(work);CHKERRQ(ierr); 227 228 /* post receives: */ 229 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 230 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 231 for ( i=0; i<nrecvs; i++ ) { 232 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 233 } 234 235 /* do sends: 236 1) starts[i] gives the starting index in svalues for stuff going to 237 the ith processor 238 */ 239 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 240 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 241 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 242 starts[0] = 0; 243 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 244 for ( i=0; i<N; i++ ) { 245 svalues[starts[owner[i]]++] = rows[i]; 246 } 247 ISRestoreIndices(is,&rows); 248 249 starts[0] = 0; 250 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 251 count = 0; 252 for ( i=0; i<size; i++ ) { 253 if (procs[i]) { 254 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 255 } 256 } 257 ierr = PetscFree(starts);CHKERRQ(ierr); 258 259 base = owners[rank]; 260 261 /* wait on receives */ 262 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 263 source = lens + nrecvs; 264 count = nrecvs; slen = 0; 265 while (count) { 266 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 267 /* unpack receives into our local space */ 268 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 269 source[imdex] = recv_status.MPI_SOURCE; 270 lens[imdex] = n; 271 slen += n; 272 count--; 273 } 274 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 275 276 /* move the data into the send scatter */ 277 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 278 count = 0; 279 for ( i=0; i<nrecvs; i++ ) { 280 values = rvalues + i*nmax; 281 for ( j=0; j<lens[i]; j++ ) { 282 lrows[count++] = values[j] - base; 283 } 284 } 285 ierr = PetscFree(rvalues);CHKERRQ(ierr); 286 ierr = PetscFree(lens);CHKERRQ(ierr); 287 ierr = PetscFree(owner);CHKERRQ(ierr); 288 ierr = PetscFree(nprocs);CHKERRQ(ierr); 289 290 /* actually zap the local rows */ 291 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 292 PLogObjectParent(A,istmp); 293 ierr = PetscFree(lrows);CHKERRQ(ierr); 294 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 295 ierr = ISDestroy(istmp);CHKERRQ(ierr); 296 297 /* wait on sends */ 298 if (nsends) { 299 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 300 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 301 ierr = PetscFree(send_status);CHKERRQ(ierr); 302 } 303 ierr = PetscFree(send_waits);CHKERRQ(ierr); 304 ierr = PetscFree(svalues);CHKERRQ(ierr); 305 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNC__ 310 #define __FUNC__ "MatMult_MPIDense" 311 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 312 { 313 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 314 int ierr; 315 316 PetscFunctionBegin; 317 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 318 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 319 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNC__ 324 #define __FUNC__ "MatMultAdd_MPIDense" 325 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 326 { 327 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 328 int ierr; 329 330 PetscFunctionBegin; 331 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 332 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 333 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNC__ 338 #define __FUNC__ "MatMultTrans_MPIDense" 339 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 340 { 341 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 342 int ierr; 343 Scalar zero = 0.0; 344 345 PetscFunctionBegin; 346 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 347 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 348 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 349 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNC__ 354 #define __FUNC__ "MatMultTransAdd_MPIDense" 355 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 356 { 357 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 358 int ierr; 359 360 PetscFunctionBegin; 361 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 362 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 363 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 364 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNC__ 369 #define __FUNC__ "MatGetDiagonal_MPIDense" 370 int MatGetDiagonal_MPIDense(Mat A,Vec v) 371 { 372 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 373 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 374 int ierr, len, i, n, m = a->m, radd; 375 Scalar *x, zero = 0.0; 376 377 PetscFunctionBegin; 378 VecSet(&zero,v); 379 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 380 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 381 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 382 len = PetscMin(aloc->m,aloc->n); 383 radd = a->rstart*m; 384 for ( i=0; i<len; i++ ) { 385 x[i] = aloc->v[radd + i*m + i]; 386 } 387 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNC__ 392 #define __FUNC__ "MatDestroy_MPIDense" 393 int MatDestroy_MPIDense(Mat mat) 394 { 395 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 396 int ierr; 397 398 PetscFunctionBegin; 399 400 if (mat->mapping) { 401 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 402 } 403 if (mat->bmapping) { 404 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 405 } 406 #if defined(PETSC_USE_LOG) 407 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 408 #endif 409 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 410 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 411 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 412 if (mdn->lvec) VecDestroy(mdn->lvec); 413 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 414 if (mdn->factor) { 415 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 416 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 417 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 418 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 419 } 420 ierr = PetscFree(mdn);CHKERRQ(ierr); 421 if (mat->rmap) { 422 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 423 } 424 if (mat->cmap) { 425 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 426 } 427 PLogObjectDestroy(mat); 428 PetscHeaderDestroy(mat); 429 PetscFunctionReturn(0); 430 } 431 432 #undef __FUNC__ 433 #define __FUNC__ "MatView_MPIDense_Binary" 434 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 435 { 436 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 437 int ierr; 438 439 PetscFunctionBegin; 440 if (mdn->size == 1) { 441 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 442 } 443 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 444 PetscFunctionReturn(0); 445 } 446 447 #undef __FUNC__ 448 #define __FUNC__ "MatView_MPIDense_ASCII" 449 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 450 { 451 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 452 int ierr, format, size = mdn->size, rank = mdn->rank; 453 FILE *fd; 454 ViewerType vtype; 455 456 PetscFunctionBegin; 457 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 458 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 459 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 460 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 461 MatInfo info; 462 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 463 PetscSequentialPhaseBegin(mat->comm,1); 464 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 465 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 466 fflush(fd); 467 PetscSequentialPhaseEnd(mat->comm,1); 468 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 471 PetscFunctionReturn(0); 472 } 473 474 if (size == 1) { 475 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 476 } else { 477 /* assemble the entire matrix onto first processor. */ 478 Mat A; 479 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 480 Scalar *vals; 481 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 482 483 if (!rank) { 484 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 485 } else { 486 ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 487 } 488 PLogObjectParent(mat,A); 489 490 /* Copy the matrix ... This isn't the most efficient means, 491 but it's quick for now */ 492 row = mdn->rstart; m = Amdn->m; 493 for ( i=0; i<m; i++ ) { 494 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 495 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 496 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 497 row++; 498 } 499 500 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 501 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 502 if (!rank) { 503 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr); 504 } 505 ierr = MatDestroy(A);CHKERRQ(ierr); 506 } 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNC__ 511 #define __FUNC__ "MatView_MPIDense" 512 int MatView_MPIDense(Mat mat,Viewer viewer) 513 { 514 int ierr; 515 ViewerType vtype; 516 517 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 518 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 519 ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr); 520 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 521 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 522 } else { 523 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 524 } 525 PetscFunctionReturn(0); 526 } 527 528 #undef __FUNC__ 529 #define __FUNC__ "MatGetInfo_MPIDense" 530 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 531 { 532 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 533 Mat mdn = mat->A; 534 int ierr; 535 double isend[5], irecv[5]; 536 537 PetscFunctionBegin; 538 info->rows_global = (double)mat->M; 539 info->columns_global = (double)mat->N; 540 info->rows_local = (double)mat->m; 541 info->columns_local = (double)mat->N; 542 info->block_size = 1.0; 543 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 544 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 545 isend[3] = info->memory; isend[4] = info->mallocs; 546 if (flag == MAT_LOCAL) { 547 info->nz_used = isend[0]; 548 info->nz_allocated = isend[1]; 549 info->nz_unneeded = isend[2]; 550 info->memory = isend[3]; 551 info->mallocs = isend[4]; 552 } else if (flag == MAT_GLOBAL_MAX) { 553 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 554 info->nz_used = irecv[0]; 555 info->nz_allocated = irecv[1]; 556 info->nz_unneeded = irecv[2]; 557 info->memory = irecv[3]; 558 info->mallocs = irecv[4]; 559 } else if (flag == MAT_GLOBAL_SUM) { 560 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 561 info->nz_used = irecv[0]; 562 info->nz_allocated = irecv[1]; 563 info->nz_unneeded = irecv[2]; 564 info->memory = irecv[3]; 565 info->mallocs = irecv[4]; 566 } 567 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 568 info->fill_ratio_needed = 0; 569 info->factor_mallocs = 0; 570 PetscFunctionReturn(0); 571 } 572 573 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 574 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 575 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 576 extern int MatSolve_MPIDense(Mat,Vec,Vec); 577 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 578 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 579 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 580 581 #undef __FUNC__ 582 #define __FUNC__ "MatSetOption_MPIDense" 583 int MatSetOption_MPIDense(Mat A,MatOption op) 584 { 585 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 586 587 PetscFunctionBegin; 588 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 589 op == MAT_YES_NEW_NONZERO_LOCATIONS || 590 op == MAT_NEW_NONZERO_LOCATION_ERR || 591 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 592 op == MAT_COLUMNS_SORTED || 593 op == MAT_COLUMNS_UNSORTED) { 594 MatSetOption(a->A,op); 595 } else if (op == MAT_ROW_ORIENTED) { 596 a->roworiented = 1; 597 MatSetOption(a->A,op); 598 } else if (op == MAT_ROWS_SORTED || 599 op == MAT_ROWS_UNSORTED || 600 op == MAT_SYMMETRIC || 601 op == MAT_STRUCTURALLY_SYMMETRIC || 602 op == MAT_YES_NEW_DIAGONALS || 603 op == MAT_USE_HASH_TABLE) { 604 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 605 } else if (op == MAT_COLUMN_ORIENTED) { 606 a->roworiented = 0; MatSetOption(a->A,op); 607 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 608 a->donotstash = 1; 609 } else if (op == MAT_NO_NEW_DIAGONALS) { 610 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 611 } else { 612 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 613 } 614 PetscFunctionReturn(0); 615 } 616 617 #undef __FUNC__ 618 #define __FUNC__ "MatGetSize_MPIDense" 619 int MatGetSize_MPIDense(Mat A,int *m,int *n) 620 { 621 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 622 623 PetscFunctionBegin; 624 *m = mat->M; *n = mat->N; 625 PetscFunctionReturn(0); 626 } 627 628 #undef __FUNC__ 629 #define __FUNC__ "MatGetLocalSize_MPIDense" 630 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 631 { 632 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 633 634 PetscFunctionBegin; 635 *m = mat->m; *n = mat->N; 636 PetscFunctionReturn(0); 637 } 638 639 #undef __FUNC__ 640 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 641 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 642 { 643 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 644 645 PetscFunctionBegin; 646 *m = mat->rstart; *n = mat->rend; 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNC__ 651 #define __FUNC__ "MatGetRow_MPIDense" 652 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 653 { 654 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 655 int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 656 657 PetscFunctionBegin; 658 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 659 lrow = row - rstart; 660 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } 663 664 #undef __FUNC__ 665 #define __FUNC__ "MatRestoreRow_MPIDense" 666 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 667 { 668 int ierr; 669 670 PetscFunctionBegin; 671 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 672 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNC__ 677 #define __FUNC__ "MatNorm_MPIDense" 678 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 679 { 680 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 681 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 682 int ierr, i, j; 683 double sum = 0.0; 684 Scalar *v = mat->v; 685 686 PetscFunctionBegin; 687 if (mdn->size == 1) { 688 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 689 } else { 690 if (type == NORM_FROBENIUS) { 691 for (i=0; i<mat->n*mat->m; i++ ) { 692 #if defined(PETSC_USE_COMPLEX) 693 sum += PetscReal(PetscConj(*v)*(*v)); v++; 694 #else 695 sum += (*v)*(*v); v++; 696 #endif 697 } 698 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 699 *norm = sqrt(*norm); 700 PLogFlops(2*mat->n*mat->m); 701 } else if (type == NORM_1) { 702 double *tmp, *tmp2; 703 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp); 704 tmp2 = tmp + mdn->N; 705 ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr); 706 *norm = 0.0; 707 v = mat->v; 708 for ( j=0; j<mat->n; j++ ) { 709 for ( i=0; i<mat->m; i++ ) { 710 tmp[j] += PetscAbsScalar(*v); v++; 711 } 712 } 713 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 714 for ( j=0; j<mdn->N; j++ ) { 715 if (tmp2[j] > *norm) *norm = tmp2[j]; 716 } 717 ierr = PetscFree(tmp);CHKERRQ(ierr); 718 PLogFlops(mat->n*mat->m); 719 } else if (type == NORM_INFINITY) { /* max row norm */ 720 double ntemp; 721 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 722 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 723 } else { 724 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 725 } 726 } 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNC__ 731 #define __FUNC__ "MatTranspose_MPIDense" 732 int MatTranspose_MPIDense(Mat A,Mat *matout) 733 { 734 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 735 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 736 Mat B; 737 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 738 int j, i, ierr; 739 Scalar *v; 740 741 PetscFunctionBegin; 742 if (matout == PETSC_NULL && M != N) { 743 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 744 } 745 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 746 747 m = Aloc->m; n = Aloc->n; v = Aloc->v; 748 rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 749 for ( j=0; j<n; j++ ) { 750 for (i=0; i<m; i++) rwork[i] = rstart + i; 751 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 752 v += m; 753 } 754 ierr = PetscFree(rwork);CHKERRQ(ierr); 755 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 756 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 757 if (matout != PETSC_NULL) { 758 *matout = B; 759 } else { 760 PetscOps *Abops; 761 MatOps Aops; 762 763 /* This isn't really an in-place transpose, but free data struct from a */ 764 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 765 ierr = MatDestroy(a->A);CHKERRQ(ierr); 766 if (a->lvec) VecDestroy(a->lvec); 767 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 768 ierr = PetscFree(a);CHKERRQ(ierr); 769 770 /* 771 This is horrible, horrible code. We need to keep the 772 A pointers for the bops and ops but copy everything 773 else from C. 774 */ 775 Abops = A->bops; 776 Aops = A->ops; 777 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 778 A->bops = Abops; 779 A->ops = Aops; 780 781 PetscHeaderDestroy(B); 782 } 783 PetscFunctionReturn(0); 784 } 785 786 #include "pinclude/blaslapack.h" 787 #undef __FUNC__ 788 #define __FUNC__ "MatScale_MPIDense" 789 int MatScale_MPIDense(Scalar *alpha,Mat inA) 790 { 791 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 792 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 793 int one = 1, nz; 794 795 PetscFunctionBegin; 796 nz = a->m*a->n; 797 BLscal_( &nz, alpha, a->v, &one ); 798 PLogFlops(nz); 799 PetscFunctionReturn(0); 800 } 801 802 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 803 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 804 805 /* -------------------------------------------------------------------*/ 806 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 807 MatGetRow_MPIDense, 808 MatRestoreRow_MPIDense, 809 MatMult_MPIDense, 810 MatMultAdd_MPIDense, 811 MatMultTrans_MPIDense, 812 MatMultTransAdd_MPIDense, 813 0, 814 0, 815 0, 816 0, 817 0, 818 0, 819 0, 820 MatTranspose_MPIDense, 821 MatGetInfo_MPIDense,0, 822 MatGetDiagonal_MPIDense, 823 0, 824 MatNorm_MPIDense, 825 MatAssemblyBegin_MPIDense, 826 MatAssemblyEnd_MPIDense, 827 0, 828 MatSetOption_MPIDense, 829 MatZeroEntries_MPIDense, 830 MatZeroRows_MPIDense, 831 0, 832 0, 833 0, 834 0, 835 MatGetSize_MPIDense, 836 MatGetLocalSize_MPIDense, 837 MatGetOwnershipRange_MPIDense, 838 0, 839 0, 840 MatGetArray_MPIDense, 841 MatRestoreArray_MPIDense, 842 MatDuplicate_MPIDense, 843 0, 844 0, 845 0, 846 0, 847 0, 848 MatGetSubMatrices_MPIDense, 849 0, 850 MatGetValues_MPIDense, 851 0, 852 0, 853 MatScale_MPIDense, 854 0, 855 0, 856 0, 857 MatGetBlockSize_MPIDense, 858 0, 859 0, 860 0, 861 0, 862 0, 863 0, 864 0, 865 0, 866 0, 867 0, 868 0, 869 0, 870 MatGetMaps_Petsc}; 871 872 #undef __FUNC__ 873 #define __FUNC__ "MatCreateMPIDense" 874 /*@C 875 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 876 877 Collective on MPI_Comm 878 879 Input Parameters: 880 + comm - MPI communicator 881 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 882 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 883 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 884 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 885 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 886 to control all matrix memory allocation. 887 888 Output Parameter: 889 . A - the matrix 890 891 Notes: 892 The dense format is fully compatible with standard Fortran 77 893 storage by columns. 894 895 The data input variable is intended primarily for Fortran programmers 896 who wish to allocate their own matrix memory space. Most users should 897 set data=PETSC_NULL. 898 899 The user MUST specify either the local or global matrix dimensions 900 (possibly both). 901 902 Currently, the only parallel dense matrix decomposition is by rows, 903 so that n=N and each submatrix owns all of the global columns. 904 905 Level: intermediate 906 907 .keywords: matrix, dense, parallel 908 909 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 910 @*/ 911 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 912 { 913 Mat mat; 914 Mat_MPIDense *a; 915 int ierr, i,flg; 916 917 PetscFunctionBegin; 918 /* Note: For now, when data is specified above, this assumes the user correctly 919 allocates the local dense storage space. We should add error checking. */ 920 921 *A = 0; 922 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 923 PLogObjectCreate(mat); 924 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 925 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 926 mat->ops->destroy = MatDestroy_MPIDense; 927 mat->ops->view = MatView_MPIDense; 928 mat->factor = 0; 929 mat->mapping = 0; 930 931 a->factor = 0; 932 mat->insertmode = NOT_SET_VALUES; 933 ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); 934 ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); 935 936 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 937 938 /* 939 The computation of n is wrong below, n should represent the number of local 940 rows in the right (column vector) 941 */ 942 943 /* each row stores all columns */ 944 if (N == PETSC_DECIDE) N = n; 945 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 946 /* if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */ 947 a->N = mat->N = N; 948 a->M = mat->M = M; 949 a->m = mat->m = m; 950 a->n = mat->n = n; 951 952 /* the information in the maps duplicates the information computed below, eventually 953 we should remove the duplicate information that is not contained in the maps */ 954 ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 955 ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr); 956 957 /* build local table of row and column ownerships */ 958 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 959 a->cowners = a->rowners + a->size + 1; 960 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 961 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 962 a->rowners[0] = 0; 963 for ( i=2; i<=a->size; i++ ) { 964 a->rowners[i] += a->rowners[i-1]; 965 } 966 a->rstart = a->rowners[a->rank]; 967 a->rend = a->rowners[a->rank+1]; 968 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 969 a->cowners[0] = 0; 970 for ( i=2; i<=a->size; i++ ) { 971 a->cowners[i] += a->cowners[i-1]; 972 } 973 974 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr); 975 PLogObjectParent(mat,a->A); 976 977 /* build cache for off array entries formed */ 978 a->donotstash = 0; 979 ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr); 980 981 /* stuff used for matrix vector multiply */ 982 a->lvec = 0; 983 a->Mvctx = 0; 984 a->roworiented = 1; 985 986 *A = mat; 987 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 988 if (flg) { 989 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 990 } 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNC__ 995 #define __FUNC__ "MatDuplicate_MPIDense" 996 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 997 { 998 Mat mat; 999 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 1000 int ierr; 1001 FactorCtx *factor; 1002 1003 PetscFunctionBegin; 1004 *newmat = 0; 1005 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 1006 PLogObjectCreate(mat); 1007 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1008 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1009 mat->ops->destroy = MatDestroy_MPIDense; 1010 mat->ops->view = MatView_MPIDense; 1011 mat->factor = A->factor; 1012 mat->assembled = PETSC_TRUE; 1013 1014 a->m = mat->m = oldmat->m; 1015 a->n = mat->n = oldmat->n; 1016 a->M = mat->M = oldmat->M; 1017 a->N = mat->N = oldmat->N; 1018 if (oldmat->factor) { 1019 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor); 1020 /* copy factor contents ... add this code! */ 1021 } else a->factor = 0; 1022 1023 a->rstart = oldmat->rstart; 1024 a->rend = oldmat->rend; 1025 a->size = oldmat->size; 1026 a->rank = oldmat->rank; 1027 mat->insertmode = NOT_SET_VALUES; 1028 a->donotstash = oldmat->donotstash; 1029 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1030 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1031 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1032 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1033 1034 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1035 PLogObjectParent(mat,a->lvec); 1036 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1037 PLogObjectParent(mat,a->Mvctx); 1038 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1039 PLogObjectParent(mat,a->A); 1040 *newmat = mat; 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #include "sys.h" 1045 1046 #undef __FUNC__ 1047 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1048 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1049 { 1050 int *rowners, i,size,rank,m,ierr,nz,j; 1051 Scalar *array,*vals,*vals_ptr; 1052 MPI_Status status; 1053 1054 PetscFunctionBegin; 1055 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1056 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1057 1058 /* determine ownership of all rows */ 1059 m = M/size + ((M % size) > rank); 1060 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1061 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1062 rowners[0] = 0; 1063 for ( i=2; i<=size; i++ ) { 1064 rowners[i] += rowners[i-1]; 1065 } 1066 1067 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1068 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1069 1070 if (!rank) { 1071 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1072 1073 /* read in my part of the matrix numerical values */ 1074 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1075 1076 /* insert into matrix-by row (this is why cannot directly read into array */ 1077 vals_ptr = vals; 1078 for ( i=0; i<m; i++ ) { 1079 for ( j=0; j<N; j++ ) { 1080 array[i + j*m] = *vals_ptr++; 1081 } 1082 } 1083 1084 /* read in other processors and ship out */ 1085 for ( i=1; i<size; i++ ) { 1086 nz = (rowners[i+1] - rowners[i])*N; 1087 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1088 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1089 } 1090 } else { 1091 /* receive numeric values */ 1092 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1093 1094 /* receive message of values*/ 1095 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1096 1097 /* insert into matrix-by row (this is why cannot directly read into array */ 1098 vals_ptr = vals; 1099 for ( i=0; i<m; i++ ) { 1100 for ( j=0; j<N; j++ ) { 1101 array[i + j*m] = *vals_ptr++; 1102 } 1103 } 1104 } 1105 ierr = PetscFree(rowners);CHKERRQ(ierr); 1106 ierr = PetscFree(vals);CHKERRQ(ierr); 1107 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1108 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 1113 #undef __FUNC__ 1114 #define __FUNC__ "MatLoad_MPIDense" 1115 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1116 { 1117 Mat A; 1118 Scalar *vals,*svals; 1119 MPI_Comm comm = ((PetscObject)viewer)->comm; 1120 MPI_Status status; 1121 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1122 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1123 int tag = ((PetscObject)viewer)->tag; 1124 int i, nz, ierr, j,rstart, rend, fd; 1125 1126 PetscFunctionBegin; 1127 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1128 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1129 if (!rank) { 1130 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1131 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1132 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1133 } 1134 1135 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1136 M = header[1]; N = header[2]; nz = header[3]; 1137 1138 /* 1139 Handle case where matrix is stored on disk as a dense matrix 1140 */ 1141 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1142 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 /* determine ownership of all rows */ 1147 m = M/size + ((M % size) > rank); 1148 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1149 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1150 rowners[0] = 0; 1151 for ( i=2; i<=size; i++ ) { 1152 rowners[i] += rowners[i-1]; 1153 } 1154 rstart = rowners[rank]; 1155 rend = rowners[rank+1]; 1156 1157 /* distribute row lengths to all processors */ 1158 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1159 offlens = ourlens + (rend-rstart); 1160 if (!rank) { 1161 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1162 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1163 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1164 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1165 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1166 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1167 } else { 1168 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1169 } 1170 1171 if (!rank) { 1172 /* calculate the number of nonzeros on each processor */ 1173 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1174 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1175 for ( i=0; i<size; i++ ) { 1176 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1177 procsnz[i] += rowlengths[j]; 1178 } 1179 } 1180 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1181 1182 /* determine max buffer needed and allocate it */ 1183 maxnz = 0; 1184 for ( i=0; i<size; i++ ) { 1185 maxnz = PetscMax(maxnz,procsnz[i]); 1186 } 1187 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1188 1189 /* read in my part of the matrix column indices */ 1190 nz = procsnz[0]; 1191 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1192 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1193 1194 /* read in every one elses and ship off */ 1195 for ( i=1; i<size; i++ ) { 1196 nz = procsnz[i]; 1197 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1198 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1199 } 1200 ierr = PetscFree(cols);CHKERRQ(ierr); 1201 } else { 1202 /* determine buffer space needed for message */ 1203 nz = 0; 1204 for ( i=0; i<m; i++ ) { 1205 nz += ourlens[i]; 1206 } 1207 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1208 1209 /* receive message of column indices*/ 1210 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1211 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1212 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1213 } 1214 1215 /* loop over local rows, determining number of off diagonal entries */ 1216 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1217 jj = 0; 1218 for ( i=0; i<m; i++ ) { 1219 for ( j=0; j<ourlens[i]; j++ ) { 1220 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1221 jj++; 1222 } 1223 } 1224 1225 /* create our matrix */ 1226 for ( i=0; i<m; i++ ) { 1227 ourlens[i] -= offlens[i]; 1228 } 1229 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1230 A = *newmat; 1231 for ( i=0; i<m; i++ ) { 1232 ourlens[i] += offlens[i]; 1233 } 1234 1235 if (!rank) { 1236 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 1237 1238 /* read in my part of the matrix numerical values */ 1239 nz = procsnz[0]; 1240 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1241 1242 /* insert into matrix */ 1243 jj = rstart; 1244 smycols = mycols; 1245 svals = vals; 1246 for ( i=0; i<m; i++ ) { 1247 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1248 smycols += ourlens[i]; 1249 svals += ourlens[i]; 1250 jj++; 1251 } 1252 1253 /* read in other processors and ship out */ 1254 for ( i=1; i<size; i++ ) { 1255 nz = procsnz[i]; 1256 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1257 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1258 } 1259 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1260 } else { 1261 /* receive numeric values */ 1262 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 1263 1264 /* receive message of values*/ 1265 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1266 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1267 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1268 1269 /* insert into matrix */ 1270 jj = rstart; 1271 smycols = mycols; 1272 svals = vals; 1273 for ( i=0; i<m; i++ ) { 1274 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1275 smycols += ourlens[i]; 1276 svals += ourlens[i]; 1277 jj++; 1278 } 1279 } 1280 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1281 ierr = PetscFree(vals);CHKERRQ(ierr); 1282 ierr = PetscFree(mycols);CHKERRQ(ierr); 1283 ierr = PetscFree(rowners);CHKERRQ(ierr); 1284 1285 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1286 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 1291 1292 1293 1294