1 #ifndef lint 2 static char vcid[] = "$Id: mpidense.c,v 1.1 1995/10/19 22:14:22 curfman Exp curfman $"; 3 #endif 4 5 #include "mpidense.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n, 10 int *idxn,Scalar *v,InsertMode addv) 11 { 12 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 13 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 14 15 if (mdn->insertmode != NOT_SET_VALUES && mdn->insertmode != addv) { 16 SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds"); 17 } 18 mdn->insertmode = addv; 19 for ( i=0; i<m; i++ ) { 20 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row"); 21 if (idxm[i] >= mdn->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large"); 22 if (idxm[i] >= rstart && idxm[i] < rend) { 23 row = idxm[i] - rstart; 24 for ( j=0; j<n; j++ ) { 25 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column"); 26 if (idxn[j] >= mdn->N) 27 SETERRQ(1,"MatSetValues_MPIDense:Column too large"); 28 ierr = MatSetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j,addv); 29 CHKERRQ(ierr); 30 } 31 } 32 else { 33 ierr = StashValues_Private(&mdn->stash,idxm[i],n,idxn,v+i*n,addv); 34 CHKERRQ(ierr); 35 } 36 } 37 return 0; 38 } 39 40 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 41 { 42 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 43 MPI_Comm comm = mat->comm; 44 int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 45 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 46 int tag = mat->tag, *owner,*starts,count,ierr; 47 InsertMode addv; 48 MPI_Request *send_waits,*recv_waits; 49 Scalar *rvalues,*svalues; 50 51 /* make sure all processors are either in INSERTMODE or ADDMODE */ 52 MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT, 53 MPI_BOR,comm); 54 if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1, 55 "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs"); 56 } 57 mdn->insertmode = addv; /* in case this processor had no cache */ 58 59 /* first count number of contributors to each processor */ 60 nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 61 PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 62 owner = (int *) PETSCMALLOC( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 63 for ( i=0; i<mdn->stash.n; i++ ) { 64 idx = mdn->stash.idx[i]; 65 for ( j=0; j<size; j++ ) { 66 if (idx >= owners[j] && idx < owners[j+1]) { 67 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 68 } 69 } 70 } 71 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 72 73 /* inform other processors of number of messages and max length*/ 74 work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work); 75 MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm); 76 nreceives = work[rank]; 77 MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm); 78 nmax = work[rank]; 79 PETSCFREE(work); 80 81 /* post receives: 82 1) each message will consist of ordered pairs 83 (global index,value) we store the global index as a double 84 to simplify the message passing. 85 2) since we don't know how long each individual message is we 86 allocate the largest needed buffer for each receive. Potentially 87 this is a lot of wasted space. 88 89 This could be done better. 90 */ 91 rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 92 CHKPTRQ(rvalues); 93 recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 94 CHKPTRQ(recv_waits); 95 for ( i=0; i<nreceives; i++ ) { 96 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 97 comm,recv_waits+i); 98 } 99 100 /* do sends: 101 1) starts[i] gives the starting index in svalues for stuff going to 102 the ith processor 103 */ 104 svalues = (Scalar *) PETSCMALLOC( 3*(mdn->stash.n+1)*sizeof(Scalar) ); 105 CHKPTRQ(svalues); 106 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 107 CHKPTRQ(send_waits); 108 starts = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(starts); 109 starts[0] = 0; 110 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 111 for ( i=0; i<mdn->stash.n; i++ ) { 112 svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 113 svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 114 svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 115 } 116 PETSCFREE(owner); 117 starts[0] = 0; 118 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 119 count = 0; 120 for ( i=0; i<size; i++ ) { 121 if (procs[i]) { 122 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 123 comm,send_waits+count++); 124 } 125 } 126 PETSCFREE(starts); PETSCFREE(nprocs); 127 128 /* Free cache space */ 129 ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 130 131 mdn->svalues = svalues; mdn->rvalues = rvalues; 132 mdn->nsends = nsends; mdn->nrecvs = nreceives; 133 mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 134 mdn->rmax = nmax; 135 136 return 0; 137 } 138 extern int MatSetUpMultiply_MPIDense(Mat); 139 140 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 141 { 142 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 143 MPI_Status *send_status,recv_status; 144 int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 145 Scalar *values,val; 146 InsertMode addv = mdn->insertmode; 147 148 /* wait on receives */ 149 while (count) { 150 MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 151 /* unpack receives into our local space */ 152 values = mdn->rvalues + 3*imdex*mdn->rmax; 153 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 154 n = n/3; 155 for ( i=0; i<n; i++ ) { 156 row = (int) PETSCREAL(values[3*i]) - mdn->rstart; 157 col = (int) PETSCREAL(values[3*i+1]); 158 val = values[3*i+2]; 159 if (col >= 0 && col < mdn->N) { 160 MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 161 } 162 else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");} 163 } 164 count--; 165 } 166 PETSCFREE(mdn->recv_waits); PETSCFREE(mdn->rvalues); 167 168 /* wait on sends */ 169 if (mdn->nsends) { 170 send_status = (MPI_Status *) PETSCMALLOC( mdn->nsends*sizeof(MPI_Status) ); 171 CHKPTRQ(send_status); 172 MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 173 PETSCFREE(send_status); 174 } 175 PETSCFREE(mdn->send_waits); PETSCFREE(mdn->svalues); 176 177 mdn->insertmode = NOT_SET_VALUES; 178 ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 179 ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 180 181 if (!mdn->assembled && mode == FINAL_ASSEMBLY) { 182 ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 183 } 184 mdn->assembled = 1; 185 return 0; 186 } 187 188 static int MatZeroEntries_MPIDense(Mat A) 189 { 190 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 191 return MatZeroEntries(l->A); 192 } 193 194 /* the code does not do the diagonal entries correctly unless the 195 matrix is square and the column and row owerships are identical. 196 This is a BUG. The only way to fix it seems to be to access 197 aij->A and aij->B directly and not through the MatZeroRows() 198 routine. 199 */ 200 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 201 { 202 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 203 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 204 int *procs,*nprocs,j,found,idx,nsends,*work; 205 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 206 int *rvalues,tag = A->tag,count,base,slen,n,*source; 207 int *lens,imdex,*lrows,*values; 208 MPI_Comm comm = A->comm; 209 MPI_Request *send_waits,*recv_waits; 210 MPI_Status recv_status,*send_status; 211 IS istmp; 212 213 if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix"); 214 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 215 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 216 217 /* first count number of contributors to each processor */ 218 nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 219 PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 220 owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 221 for ( i=0; i<N; i++ ) { 222 idx = rows[i]; 223 found = 0; 224 for ( j=0; j<size; j++ ) { 225 if (idx >= owners[j] && idx < owners[j+1]) { 226 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 227 } 228 } 229 if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range"); 230 } 231 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 232 233 /* inform other processors of number of messages and max length*/ 234 work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work); 235 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 236 nrecvs = work[rank]; 237 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 238 nmax = work[rank]; 239 PETSCFREE(work); 240 241 /* post receives: */ 242 rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 243 CHKPTRQ(rvalues); 244 recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 245 CHKPTRQ(recv_waits); 246 for ( i=0; i<nrecvs; i++ ) { 247 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 248 } 249 250 /* do sends: 251 1) starts[i] gives the starting index in svalues for stuff going to 252 the ith processor 253 */ 254 svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 255 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 256 CHKPTRQ(send_waits); 257 starts = (int *) PETSCMALLOC( (size+1)*sizeof(int) ); CHKPTRQ(starts); 258 starts[0] = 0; 259 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 260 for ( i=0; i<N; i++ ) { 261 svalues[starts[owner[i]]++] = rows[i]; 262 } 263 ISRestoreIndices(is,&rows); 264 265 starts[0] = 0; 266 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 267 count = 0; 268 for ( i=0; i<size; i++ ) { 269 if (procs[i]) { 270 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 271 } 272 } 273 PETSCFREE(starts); 274 275 base = owners[rank]; 276 277 /* wait on receives */ 278 lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 279 source = lens + nrecvs; 280 count = nrecvs; slen = 0; 281 while (count) { 282 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 283 /* unpack receives into our local space */ 284 MPI_Get_count(&recv_status,MPI_INT,&n); 285 source[imdex] = recv_status.MPI_SOURCE; 286 lens[imdex] = n; 287 slen += n; 288 count--; 289 } 290 PETSCFREE(recv_waits); 291 292 /* move the data into the send scatter */ 293 lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 294 count = 0; 295 for ( i=0; i<nrecvs; i++ ) { 296 values = rvalues + i*nmax; 297 for ( j=0; j<lens[i]; j++ ) { 298 lrows[count++] = values[j] - base; 299 } 300 } 301 PETSCFREE(rvalues); PETSCFREE(lens); 302 PETSCFREE(owner); PETSCFREE(nprocs); 303 304 /* actually zap the local rows */ 305 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 306 PLogObjectParent(A,istmp); 307 PETSCFREE(lrows); 308 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 309 ierr = ISDestroy(istmp); CHKERRQ(ierr); 310 311 /* wait on sends */ 312 if (nsends) { 313 send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status)); 314 CHKPTRQ(send_status); 315 MPI_Waitall(nsends,send_waits,send_status); 316 PETSCFREE(send_status); 317 } 318 PETSCFREE(send_waits); PETSCFREE(svalues); 319 320 return 0; 321 } 322 323 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 324 { 325 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 326 int ierr; 327 if (!mdn->assembled) 328 SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first"); 329 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 330 CHKERRQ(ierr); 331 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 332 CHKERRQ(ierr); 333 ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 334 return 0; 335 } 336 337 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 338 { 339 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 340 int ierr; 341 if (!mdn->assembled) 342 SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first"); 343 ierr = VecScatterBegin(xx,mdn->lvec,ADD_VALUES,SCATTER_ALL,mdn->Mvctx); 344 CHKERRQ(ierr); 345 ierr = VecScatterEnd(xx,mdn->lvec,ADD_VALUES,SCATTER_ALL,mdn->Mvctx); 346 CHKERRQ(ierr); 347 ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 348 return 0; 349 } 350 351 /* 352 This only works correctly for square matrices where the subblock A->A is the 353 diagonal block 354 */ 355 static int MatGetDiagonal_MPIDense(Mat A,Vec v) 356 { 357 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 358 if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix"); 359 return MatGetDiagonal(a->A,v); 360 } 361 362 static int MatDestroy_MPIDense(PetscObject obj) 363 { 364 Mat mat = (Mat) obj; 365 Mat_MPIDense *aij = (Mat_MPIDense *) mat->data; 366 int ierr; 367 #if defined(PETSC_LOG) 368 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 369 #endif 370 PETSCFREE(aij->rowners); 371 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 372 if (aij->lvec) VecDestroy(aij->lvec); 373 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 374 PETSCFREE(aij); 375 PLogObjectDestroy(mat); 376 PETSCHEADERDESTROY(mat); 377 return 0; 378 } 379 380 #include "pinclude/pviewer.h" 381 382 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 383 { 384 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 385 int ierr; 386 if (mdn->size == 1) { 387 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 388 } 389 else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported"); 390 return 0; 391 } 392 393 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 394 { 395 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 396 int ierr, format; 397 PetscObject vobj = (PetscObject) viewer; 398 FILE *fd; 399 400 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 401 ierr = ViewerFileGetFormat_Private(viewer,&format); 402 if (format == FILE_FORMAT_INFO) { 403 ; /* do nothing for now */ 404 } 405 } 406 407 if (vobj->type == ASCII_FILE_VIEWER) { 408 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 409 MPIU_Seq_begin(mat->comm,1); 410 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 411 mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 412 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 413 fflush(fd); 414 MPIU_Seq_end(mat->comm,1); 415 } 416 else { 417 int size = mdn->size, rank = mdn->rank; 418 if (size == 1) { 419 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 420 } 421 else { 422 /* assemble the entire matrix onto first processor. */ 423 Mat A; 424 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 425 Scalar *vals; 426 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 427 428 if (!rank) { 429 ierr = MatCreateMPIDense(mat->comm,M,M,N,N,&A); CHKERRQ(ierr); 430 } 431 else { 432 ierr = MatCreateMPIDense(mat->comm,0,M,N,N,&A); CHKERRQ(ierr); 433 } 434 PLogObjectParent(mat,A); 435 436 /* Copy the matrix ... This isn't the most efficient means, 437 but it's quick for now */ 438 row = mdn->rstart; m = Amdn->m; 439 for ( i=0; i<m; i++ ) { 440 ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 441 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 442 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 443 row++; 444 } 445 446 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 447 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 448 if (!rank) { 449 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 450 } 451 ierr = MatDestroy(A); CHKERRQ(ierr); 452 } 453 } 454 return 0; 455 } 456 457 static int MatView_MPIDense(PetscObject obj,Viewer viewer) 458 { 459 Mat mat = (Mat) obj; 460 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 461 PetscObject vobj = (PetscObject) viewer; 462 int ierr; 463 464 if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix"); 465 if (!viewer) { 466 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 467 } 468 if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 469 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 470 } 471 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 472 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 473 } 474 else if (vobj->type == BINARY_FILE_VIEWER) { 475 return MatView_MPIDense_Binary(mat,viewer); 476 } 477 return 0; 478 } 479 480 static int MatGetInfo_MPIDense(Mat matin,MatInfoType flag,int *nz, 481 int *nzalloc,int *mem) 482 { 483 Mat_MPIDense *mat = (Mat_MPIDense *) matin->data; 484 Mat A = mat->A; 485 int ierr, isend[3], irecv[3]; 486 487 ierr = MatGetInfo(A,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 488 if (flag == MAT_LOCAL) { 489 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 490 } else if (flag == MAT_GLOBAL_MAX) { 491 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 492 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 493 } else if (flag == MAT_GLOBAL_SUM) { 494 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 495 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 496 } 497 return 0; 498 } 499 500 static int MatSetOption_MPIDense(Mat A,MatOption op) 501 { 502 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 503 504 if (op == NO_NEW_NONZERO_LOCATIONS || 505 op == YES_NEW_NONZERO_LOCATIONS || 506 op == COLUMNS_SORTED || 507 op == ROW_ORIENTED) { 508 MatSetOption(a->A,op); 509 } 510 else if (op == ROWS_SORTED || 511 op == SYMMETRIC_MATRIX || 512 op == STRUCTURALLY_SYMMETRIC_MATRIX || 513 op == YES_NEW_DIAGONALS) 514 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n"); 515 else if (op == COLUMN_ORIENTED) 516 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:COLUMN_ORIENTED");} 517 else if (op == NO_NEW_DIAGONALS) 518 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");} 519 else 520 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");} 521 return 0; 522 } 523 524 static int MatGetSize_MPIDense(Mat matin,int *m,int *n) 525 { 526 Mat_MPIDense *mat = (Mat_MPIDense *) matin->data; 527 *m = mat->M; *n = mat->N; 528 return 0; 529 } 530 531 static int MatGetLocalSize_MPIDense(Mat matin,int *m,int *n) 532 { 533 Mat_MPIDense *mat = (Mat_MPIDense *) matin->data; 534 *m = mat->m; *n = mat->N; 535 return 0; 536 } 537 538 static int MatGetOwnershipRange_MPIDense(Mat matin,int *m,int *n) 539 { 540 Mat_MPIDense *mat = (Mat_MPIDense *) matin->data; 541 *m = mat->rstart; *n = mat->rend; 542 return 0; 543 } 544 545 static int MatGetRow_MPIDense(Mat matin,int row,int *nz,int **idx,Scalar **v) 546 { 547 Mat_MPIDense *mat = (Mat_MPIDense *) matin->data; 548 int lrow, rstart = mat->rstart, rend = mat->rend; 549 550 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows") 551 lrow = row - rstart; 552 return MatGetRow(mat->A,lrow,nz,idx,v); 553 } 554 555 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 556 { 557 if (idx) PETSCFREE(*idx); 558 if (v) PETSCFREE(*v); 559 return 0; 560 } 561 562 static int MatCopyPrivate_MPIDense(Mat,Mat *); 563 564 /* -------------------------------------------------------------------*/ 565 static struct _MatOps MatOps = {MatSetValues_MPIDense, 566 MatGetRow_MPIDense,MatRestoreRow_MPIDense, 567 MatMult_MPIDense,MatMultAdd_MPIDense, 568 0, 0, 569 0, 0, 570 0, 0, 0, 571 0, 0, 0, 572 MatGetInfo_MPIDense, 0, 573 MatGetDiagonal_MPIDense, 0, 0, 574 MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 575 0, 576 MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 577 0, 578 0, 0, 0, 0, 579 MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 580 MatGetOwnershipRange_MPIDense, 581 0, 0, 582 0, 0, 0, 0, 0, MatCopyPrivate_MPIDense}; 583 584 /*@C 585 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 586 587 Input Parameters: 588 . comm - MPI communicator 589 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 590 . n - number of local columns (or PETSC_DECIDE to have calculated 591 if N is given) 592 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 593 . N - number of global columns (or PETSC_DECIDE to have calculated 594 if n is given) 595 596 Output Parameter: 597 . newmat - the matrix 598 599 Notes: 600 The dense format is fully compatible with standard Fortran 77 601 storage by columns. 602 603 The user MUST specify either the local or global matrix dimensions 604 (possibly both). 605 606 .keywords: matrix, dense, parallel 607 608 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 609 @*/ 610 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Mat *newmat) 611 { 612 Mat mat; 613 Mat_MPIDense *a; 614 int ierr, i; 615 616 *newmat = 0; 617 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 618 PLogObjectCreate(mat); 619 mat->data = (void *) (a = PETSCNEW(Mat_MPIDense)); CHKPTRQ(a); 620 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 621 mat->destroy = MatDestroy_MPIDense; 622 mat->view = MatView_MPIDense; 623 mat->factor = 0; 624 625 a->insertmode = NOT_SET_VALUES; 626 MPI_Comm_rank(comm,&a->rank); 627 MPI_Comm_size(comm,&a->size); 628 629 if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 630 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 631 632 /* each row stores all columns */ 633 if (N == PETSC_DECIDE) N = n; 634 if (n == PETSC_DECIDE) n = N; 635 if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); 636 a->N = N; 637 a->M = M; 638 a->m = m; 639 a->n = n; 640 641 /* build local table of row and column ownerships */ 642 a->rowners = (int *) PETSCMALLOC((a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 643 PLogObjectMemory(mat,(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 644 sizeof(Mat_MPIDense)); 645 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 646 a->rowners[0] = 0; 647 for ( i=2; i<=a->size; i++ ) { 648 a->rowners[i] += a->rowners[i-1]; 649 } 650 a->rstart = a->rowners[a->rank]; 651 a->rend = a->rowners[a->rank+1]; 652 653 ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,&a->A); CHKERRQ(ierr); 654 PLogObjectParent(mat,a->A); 655 656 /* build cache for off array entries formed */ 657 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 658 659 /* stuff used for matrix vector multiply */ 660 a->lvec = 0; 661 a->Mvctx = 0; 662 a->assembled = 0; 663 664 *newmat = mat; 665 return 0; 666 } 667 668 static int MatCopyPrivate_MPIDense(Mat matin,Mat *newmat) 669 { 670 Mat mat; 671 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) matin->data; 672 int ierr; 673 674 if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIDense:Must assemble matrix"); 675 *newmat = 0; 676 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIDENSE,matin->comm); 677 PLogObjectCreate(mat); 678 mat->data = (void *) (a = PETSCNEW(Mat_MPIDense)); CHKPTRQ(a); 679 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 680 mat->destroy = MatDestroy_MPIDense; 681 mat->view = MatView_MPIDense; 682 mat->factor = matin->factor; 683 684 a->m = oldmat->m; 685 a->n = oldmat->n; 686 a->M = oldmat->M; 687 a->N = oldmat->N; 688 689 a->assembled = 1; 690 a->rstart = oldmat->rstart; 691 a->rend = oldmat->rend; 692 a->size = oldmat->size; 693 a->rank = oldmat->rank; 694 a->insertmode = NOT_SET_VALUES; 695 696 a->rowners = (int *) PETSCMALLOC((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 697 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 698 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 699 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 700 701 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 702 PLogObjectParent(mat,a->lvec); 703 ierr = VecScatterCtxCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 704 PLogObjectParent(mat,a->Mvctx); 705 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 706 PLogObjectParent(mat,a->A); 707 *newmat = mat; 708 return 0; 709 } 710 711 #include "sysio.h" 712 713 int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat) 714 { 715 Mat A; 716 int i, nz, ierr, j,rstart, rend, fd; 717 Scalar *vals,*svals; 718 PetscObject vobj = (PetscObject) bview; 719 MPI_Comm comm = vobj->comm; 720 MPI_Status status; 721 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 722 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 723 int tag = ((PetscObject)bview)->tag; 724 725 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 726 if (!rank) { 727 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 728 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 729 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object"); 730 } 731 732 MPI_Bcast(header+1,3,MPI_INT,0,comm); 733 M = header[1]; N = header[2]; 734 /* determine ownership of all rows */ 735 m = M/size + ((M % size) > rank); 736 rowners = (int *) PETSCMALLOC((size+2)*sizeof(int)); CHKPTRQ(rowners); 737 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 738 rowners[0] = 0; 739 for ( i=2; i<=size; i++ ) { 740 rowners[i] += rowners[i-1]; 741 } 742 rstart = rowners[rank]; 743 rend = rowners[rank+1]; 744 745 /* distribute row lengths to all processors */ 746 ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 747 offlens = ourlens + (rend-rstart); 748 if (!rank) { 749 rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths); 750 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 751 sndcounts = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(sndcounts); 752 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 753 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 754 PETSCFREE(sndcounts); 755 } 756 else { 757 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 758 } 759 760 if (!rank) { 761 /* calculate the number of nonzeros on each processor */ 762 procsnz = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(procsnz); 763 PetscZero(procsnz,size*sizeof(int)); 764 for ( i=0; i<size; i++ ) { 765 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 766 procsnz[i] += rowlengths[j]; 767 } 768 } 769 PETSCFREE(rowlengths); 770 771 /* determine max buffer needed and allocate it */ 772 maxnz = 0; 773 for ( i=0; i<size; i++ ) { 774 maxnz = PETSCMAX(maxnz,procsnz[i]); 775 } 776 cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols); 777 778 /* read in my part of the matrix column indices */ 779 nz = procsnz[0]; 780 mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 781 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 782 783 /* read in every one elses and ship off */ 784 for ( i=1; i<size; i++ ) { 785 nz = procsnz[i]; 786 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 787 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 788 } 789 PETSCFREE(cols); 790 } 791 else { 792 /* determine buffer space needed for message */ 793 nz = 0; 794 for ( i=0; i<m; i++ ) { 795 nz += ourlens[i]; 796 } 797 mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 798 799 /* receive message of column indices*/ 800 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 801 MPI_Get_count(&status,MPI_INT,&maxnz); 802 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 803 } 804 805 /* loop over local rows, determining number of off diagonal entries */ 806 PetscZero(offlens,m*sizeof(int)); 807 jj = 0; 808 for ( i=0; i<m; i++ ) { 809 for ( j=0; j<ourlens[i]; j++ ) { 810 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 811 jj++; 812 } 813 } 814 815 /* create our matrix */ 816 for ( i=0; i<m; i++ ) { 817 ourlens[i] -= offlens[i]; 818 } 819 if (type == MATMPIDENSE) { 820 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr); 821 } 822 A = *newmat; 823 MatSetOption(A,COLUMNS_SORTED); 824 for ( i=0; i<m; i++ ) { 825 ourlens[i] += offlens[i]; 826 } 827 828 if (!rank) { 829 vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 830 831 /* read in my part of the matrix numerical values */ 832 nz = procsnz[0]; 833 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 834 835 /* insert into matrix */ 836 jj = rstart; 837 smycols = mycols; 838 svals = vals; 839 for ( i=0; i<m; i++ ) { 840 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 841 smycols += ourlens[i]; 842 svals += ourlens[i]; 843 jj++; 844 } 845 846 /* read in other processors and ship out */ 847 for ( i=1; i<size; i++ ) { 848 nz = procsnz[i]; 849 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 850 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 851 } 852 PETSCFREE(procsnz); 853 } 854 else { 855 /* receive numeric values */ 856 vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals); 857 858 /* receive message of values*/ 859 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 860 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 861 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 862 863 /* insert into matrix */ 864 jj = rstart; 865 smycols = mycols; 866 svals = vals; 867 for ( i=0; i<m; i++ ) { 868 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 869 smycols += ourlens[i]; 870 svals += ourlens[i]; 871 jj++; 872 } 873 } 874 PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners); 875 876 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 877 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 878 return 0; 879 } 880