1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.90 1995/10/17 02:21:15 curfman Exp curfman $"; 3 #endif 4 5 #include "mpidense.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 10 int *idxn,Scalar *v,InsertMode addv) 11 { 12 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 13 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 14 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 15 int cstart = aij->cstart, cend = aij->cend,row,col; 16 int shift = C->indexshift; 17 18 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 19 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 20 } 21 aij->insertmode = addv; 22 for ( i=0; i<m; i++ ) { 23 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 24 if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 25 if (idxm[i] >= rstart && idxm[i] < rend) { 26 row = idxm[i] - rstart; 27 for ( j=0; j<n; j++ ) { 28 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 29 if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 30 if (idxn[j] >= cstart && idxn[j] < cend){ 31 col = idxn[j] - cstart; 32 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 33 } 34 else { 35 if (aij->assembled) { 36 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 37 col = aij->colmap[idxn[j]] + shift; 38 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 39 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 40 col = idxn[j]; 41 } 42 } 43 else col = idxn[j]; 44 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 45 } 46 } 47 } 48 else { 49 ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr); 50 } 51 } 52 return 0; 53 } 54 55 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 56 { 57 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 58 MPI_Comm comm = mat->comm; 59 int size = aij->size, *owners = aij->rowners; 60 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 61 MPI_Request *send_waits,*recv_waits; 62 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 63 InsertMode addv; 64 Scalar *rvalues,*svalues; 65 66 /* make sure all processors are either in INSERTMODE or ADDMODE */ 67 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 68 if (addv == (ADD_VALUES|INSERT_VALUES)) { 69 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 70 } 71 aij->insertmode = addv; /* in case this processor had no cache */ 72 73 /* first count number of contributors to each processor */ 74 nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 75 PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 76 owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 77 for ( i=0; i<aij->stash.n; i++ ) { 78 idx = aij->stash.idx[i]; 79 for ( j=0; j<size; j++ ) { 80 if (idx >= owners[j] && idx < owners[j+1]) { 81 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 82 } 83 } 84 } 85 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 86 87 /* inform other processors of number of messages and max length*/ 88 work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work); 89 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 90 nreceives = work[rank]; 91 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 92 nmax = work[rank]; 93 PETSCFREE(work); 94 95 /* post receives: 96 1) each message will consist of ordered pairs 97 (global index,value) we store the global index as a double 98 to simplify the message passing. 99 2) since we don't know how long each individual message is we 100 allocate the largest needed buffer for each receive. Potentially 101 this is a lot of wasted space. 102 103 104 This could be done better. 105 */ 106 rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 107 CHKPTRQ(rvalues); 108 recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 109 CHKPTRQ(recv_waits); 110 for ( i=0; i<nreceives; i++ ) { 111 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 112 comm,recv_waits+i); 113 } 114 115 /* do sends: 116 1) starts[i] gives the starting index in svalues for stuff going to 117 the ith processor 118 */ 119 svalues = (Scalar *) PETSCMALLOC(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 120 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 121 CHKPTRQ(send_waits); 122 starts = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(starts); 123 starts[0] = 0; 124 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 125 for ( i=0; i<aij->stash.n; i++ ) { 126 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 127 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 128 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 129 } 130 PETSCFREE(owner); 131 starts[0] = 0; 132 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 133 count = 0; 134 for ( i=0; i<size; i++ ) { 135 if (procs[i]) { 136 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 137 comm,send_waits+count++); 138 } 139 } 140 PETSCFREE(starts); PETSCFREE(nprocs); 141 142 /* Free cache space */ 143 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 144 145 aij->svalues = svalues; aij->rvalues = rvalues; 146 aij->nsends = nsends; aij->nrecvs = nreceives; 147 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 148 aij->rmax = nmax; 149 150 return 0; 151 } 152 extern int MatSetUpMultiply_MPIAIJ(Mat); 153 154 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 155 { 156 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 157 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 158 MPI_Status *send_status,recv_status; 159 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 160 int row,col,other_disassembled,shift = C->indexshift; 161 Scalar *values,val; 162 InsertMode addv = aij->insertmode; 163 164 /* wait on receives */ 165 while (count) { 166 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 167 /* unpack receives into our local space */ 168 values = aij->rvalues + 3*imdex*aij->rmax; 169 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 170 n = n/3; 171 for ( i=0; i<n; i++ ) { 172 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 173 col = (int) PETSCREAL(values[3*i+1]); 174 val = values[3*i+2]; 175 if (col >= aij->cstart && col < aij->cend) { 176 col -= aij->cstart; 177 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 178 } 179 else { 180 if (aij->assembled) { 181 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 182 col = aij->colmap[col] + shift; 183 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 184 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 185 col = (int) PETSCREAL(values[3*i+1]); 186 } 187 } 188 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 189 } 190 } 191 count--; 192 } 193 PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 194 195 /* wait on sends */ 196 if (aij->nsends) { 197 send_status = (MPI_Status *) PETSCMALLOC(aij->nsends*sizeof(MPI_Status)); 198 CHKPTRQ(send_status); 199 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 200 PETSCFREE(send_status); 201 } 202 PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 203 204 aij->insertmode = NOT_SET_VALUES; 205 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 206 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 207 208 /* determine if any processor has disassembled, if so we must 209 also disassemble ourselfs, in order that we may reassemble. */ 210 MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 211 if (aij->assembled && !other_disassembled) { 212 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 213 } 214 215 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 216 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 217 aij->assembled = 1; 218 } 219 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 220 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 221 222 return 0; 223 } 224 225 static int MatZeroEntries_MPIAIJ(Mat A) 226 { 227 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 228 int ierr; 229 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 230 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 231 return 0; 232 } 233 234 /* the code does not do the diagonal entries correctly unless the 235 matrix is square and the column and row owerships are identical. 236 This is a BUG. The only way to fix it seems to be to access 237 aij->A and aij->B directly and not through the MatZeroRows() 238 routine. 239 */ 240 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 241 { 242 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 243 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 244 int *procs,*nprocs,j,found,idx,nsends,*work; 245 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 246 int *rvalues,tag = A->tag,count,base,slen,n,*source; 247 int *lens,imdex,*lrows,*values; 248 MPI_Comm comm = A->comm; 249 MPI_Request *send_waits,*recv_waits; 250 MPI_Status recv_status,*send_status; 251 IS istmp; 252 253 if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix"); 254 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 255 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 256 257 /* first count number of contributors to each processor */ 258 nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 259 PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 260 owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 261 for ( i=0; i<N; i++ ) { 262 idx = rows[i]; 263 found = 0; 264 for ( j=0; j<size; j++ ) { 265 if (idx >= owners[j] && idx < owners[j+1]) { 266 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 267 } 268 } 269 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 270 } 271 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 272 273 /* inform other processors of number of messages and max length*/ 274 work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work); 275 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 276 nrecvs = work[rank]; 277 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 278 nmax = work[rank]; 279 PETSCFREE(work); 280 281 /* post receives: */ 282 rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 283 CHKPTRQ(rvalues); 284 recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 285 CHKPTRQ(recv_waits); 286 for ( i=0; i<nrecvs; i++ ) { 287 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 288 } 289 290 /* do sends: 291 1) starts[i] gives the starting index in svalues for stuff going to 292 the ith processor 293 */ 294 svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 295 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 296 CHKPTRQ(send_waits); 297 starts = (int *) PETSCMALLOC( (size+1)*sizeof(int) ); CHKPTRQ(starts); 298 starts[0] = 0; 299 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 300 for ( i=0; i<N; i++ ) { 301 svalues[starts[owner[i]]++] = rows[i]; 302 } 303 ISRestoreIndices(is,&rows); 304 305 starts[0] = 0; 306 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 307 count = 0; 308 for ( i=0; i<size; i++ ) { 309 if (procs[i]) { 310 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 311 } 312 } 313 PETSCFREE(starts); 314 315 base = owners[rank]; 316 317 /* wait on receives */ 318 lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 319 source = lens + nrecvs; 320 count = nrecvs; slen = 0; 321 while (count) { 322 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 323 /* unpack receives into our local space */ 324 MPI_Get_count(&recv_status,MPI_INT,&n); 325 source[imdex] = recv_status.MPI_SOURCE; 326 lens[imdex] = n; 327 slen += n; 328 count--; 329 } 330 PETSCFREE(recv_waits); 331 332 /* move the data into the send scatter */ 333 lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 334 count = 0; 335 for ( i=0; i<nrecvs; i++ ) { 336 values = rvalues + i*nmax; 337 for ( j=0; j<lens[i]; j++ ) { 338 lrows[count++] = values[j] - base; 339 } 340 } 341 PETSCFREE(rvalues); PETSCFREE(lens); 342 PETSCFREE(owner); PETSCFREE(nprocs); 343 344 /* actually zap the local rows */ 345 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 346 PLogObjectParent(A,istmp); 347 PETSCFREE(lrows); 348 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 349 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 350 ierr = ISDestroy(istmp); CHKERRQ(ierr); 351 352 /* wait on sends */ 353 if (nsends) { 354 send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status)); 355 CHKPTRQ(send_status); 356 MPI_Waitall(nsends,send_waits,send_status); 357 PETSCFREE(send_status); 358 } 359 PETSCFREE(send_waits); PETSCFREE(svalues); 360 361 return 0; 362 } 363 364 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 365 { 366 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 367 int ierr; 368 369 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 370 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 371 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 372 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 373 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 374 return 0; 375 } 376 377 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 378 { 379 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 380 int ierr; 381 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 382 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 383 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 384 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 385 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 386 return 0; 387 } 388 389 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 390 { 391 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 392 int ierr; 393 394 if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix"); 395 /* do nondiagonal part */ 396 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 397 /* send it on its way */ 398 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 399 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 400 /* do local part */ 401 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 402 /* receive remote parts: note this assumes the values are not actually */ 403 /* inserted in yy until the next line, which is true for my implementation*/ 404 /* but is not perhaps always true. */ 405 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 406 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 407 return 0; 408 } 409 410 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 411 { 412 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 413 int ierr; 414 415 if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix"); 416 /* do nondiagonal part */ 417 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 418 /* send it on its way */ 419 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 420 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 421 /* do local part */ 422 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 423 /* receive remote parts: note this assumes the values are not actually */ 424 /* inserted in yy until the next line, which is true for my implementation*/ 425 /* but is not perhaps always true. */ 426 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 427 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 428 return 0; 429 } 430 431 /* 432 This only works correctly for square matrices where the subblock A->A is the 433 diagonal block 434 */ 435 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 436 { 437 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 438 if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix"); 439 return MatGetDiagonal(a->A,v); 440 } 441 442 static int MatDestroy_MPIAIJ(PetscObject obj) 443 { 444 Mat mat = (Mat) obj; 445 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 446 int ierr; 447 #if defined(PETSC_LOG) 448 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 449 #endif 450 PETSCFREE(aij->rowners); 451 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 452 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 453 if (aij->colmap) PETSCFREE(aij->colmap); 454 if (aij->garray) PETSCFREE(aij->garray); 455 if (aij->lvec) VecDestroy(aij->lvec); 456 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 457 PETSCFREE(aij); 458 PLogObjectDestroy(mat); 459 PETSCHEADERDESTROY(mat); 460 return 0; 461 } 462 #include "draw.h" 463 #include "pinclude/pviewer.h" 464 465 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 466 { 467 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 468 int ierr; 469 470 if (aij->size == 1) { 471 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 472 } 473 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 474 return 0; 475 } 476 477 static int MatView_MPIAIJ_ASCIIorDraw(Mat mat,Viewer viewer) 478 { 479 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 480 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 481 int ierr, format,shift = C->indexshift; 482 PetscObject vobj = (PetscObject) viewer; 483 FILE *fd; 484 485 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 486 ierr = ViewerFileGetFormat_Private(viewer,&format); 487 if (format == FILE_FORMAT_INFO) { 488 return 0; 489 } 490 } 491 492 if (vobj->type == ASCII_FILE_VIEWER) { 493 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 494 MPIU_Seq_begin(mat->comm,1); 495 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 496 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 497 aij->cend); 498 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 499 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 500 fflush(fd); 501 MPIU_Seq_end(mat->comm,1); 502 } 503 else { 504 int size = aij->size, rank = aij->rank; 505 if (size == 1) { 506 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 507 } 508 else { 509 /* assemble the entire matrix onto first processor. */ 510 Mat A; 511 Mat_SeqAIJ *Aloc; 512 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 513 Scalar *a; 514 515 if (!rank) { 516 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr); 517 } 518 else { 519 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr); 520 } 521 PLogObjectParent(mat,A); 522 523 524 /* copy over the A part */ 525 Aloc = (Mat_SeqAIJ*) aij->A->data; 526 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 527 row = aij->rstart; 528 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 529 for ( i=0; i<m; i++ ) { 530 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 531 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 532 } 533 aj = Aloc->j; 534 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 535 536 /* copy over the B part */ 537 Aloc = (Mat_SeqAIJ*) aij->B->data; 538 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 539 row = aij->rstart; 540 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 541 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 542 for ( i=0; i<m; i++ ) { 543 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 544 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 545 } 546 PETSCFREE(ct); 547 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 548 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 549 if (!rank) { 550 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 551 } 552 ierr = MatDestroy(A); CHKERRQ(ierr); 553 } 554 } 555 return 0; 556 } 557 558 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 559 { 560 Mat mat = (Mat) obj; 561 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 562 int ierr; 563 PetscObject vobj = (PetscObject) viewer; 564 565 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix"); 566 if (!viewer) { 567 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 568 } 569 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 570 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 571 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 572 } 573 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 574 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 575 } 576 else if (vobj->cookie == DRAW_COOKIE) { 577 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 578 } 579 else if (vobj->type == BINARY_FILE_VIEWER) { 580 return MatView_MPIAIJ_Binary(mat,viewer); 581 } 582 return 0; 583 } 584 585 extern int MatMarkDiag_SeqAIJ(Mat); 586 /* 587 This has to provide several versions. 588 589 1) per sequential 590 2) a) use only local smoothing updating outer values only once. 591 b) local smoothing updating outer values each inner iteration 592 3) color updating out values betwen colors. 593 */ 594 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 595 double fshift,int its,Vec xx) 596 { 597 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 598 Mat AA = mat->A, BB = mat->B; 599 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 600 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 601 int ierr,*idx, *diag; 602 int n = mat->n, m = mat->m, i,shift = A->indexshift; 603 Vec tt; 604 605 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix"); 606 607 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 608 xs = x + shift; /* shift by one for index start of 1 */ 609 ls = ls + shift; 610 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 611 diag = A->diag; 612 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 613 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 614 } 615 if (flag & SOR_EISENSTAT) { 616 /* Let A = L + U + D; where L is lower trianglar, 617 U is upper triangular, E is diagonal; This routine applies 618 619 (L + E)^{-1} A (U + E)^{-1} 620 621 to a vector efficiently using Eisenstat's trick. This is for 622 the case of SSOR preconditioner, so E is D/omega where omega 623 is the relaxation factor. 624 */ 625 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 626 PLogObjectParent(matin,tt); 627 VecGetArray(tt,&t); 628 scale = (2.0/omega) - 1.0; 629 /* x = (E + U)^{-1} b */ 630 VecSet(&zero,mat->lvec); 631 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 632 mat->Mvctx); CHKERRQ(ierr); 633 for ( i=m-1; i>-1; i-- ) { 634 n = A->i[i+1] - diag[i] - 1; 635 idx = A->j + diag[i] + !shift; 636 v = A->a + diag[i] + !shift; 637 sum = b[i]; 638 SPARSEDENSEMDOT(sum,xs,v,idx,n); 639 d = fshift + A->a[diag[i]+shift]; 640 n = B->i[i+1] - B->i[i]; 641 idx = B->j + B->i[i] + shift; 642 v = B->a + B->i[i] + shift; 643 SPARSEDENSEMDOT(sum,ls,v,idx,n); 644 x[i] = omega*(sum/d); 645 } 646 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 647 mat->Mvctx); CHKERRQ(ierr); 648 649 /* t = b - (2*E - D)x */ 650 v = A->a; 651 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 652 653 /* t = (E + L)^{-1}t */ 654 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 655 diag = A->diag; 656 VecSet(&zero,mat->lvec); 657 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 658 mat->Mvctx); CHKERRQ(ierr); 659 for ( i=0; i<m; i++ ) { 660 n = diag[i] - A->i[i]; 661 idx = A->j + A->i[i] + shift; 662 v = A->a + A->i[i] + shift; 663 sum = t[i]; 664 SPARSEDENSEMDOT(sum,ts,v,idx,n); 665 d = fshift + A->a[diag[i]+shift]; 666 n = B->i[i+1] - B->i[i]; 667 idx = B->j + B->i[i] + shift; 668 v = B->a + B->i[i] + shift; 669 SPARSEDENSEMDOT(sum,ls,v,idx,n); 670 t[i] = omega*(sum/d); 671 } 672 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 673 mat->Mvctx); CHKERRQ(ierr); 674 /* x = x + t */ 675 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 676 VecDestroy(tt); 677 return 0; 678 } 679 680 681 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 682 if (flag & SOR_ZERO_INITIAL_GUESS) { 683 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 684 } 685 else { 686 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 687 CHKERRQ(ierr); 688 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 689 CHKERRQ(ierr); 690 } 691 while (its--) { 692 /* go down through the rows */ 693 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 694 mat->Mvctx); CHKERRQ(ierr); 695 for ( i=0; i<m; i++ ) { 696 n = A->i[i+1] - A->i[i]; 697 idx = A->j + A->i[i] + shift; 698 v = A->a + A->i[i] + shift; 699 sum = b[i]; 700 SPARSEDENSEMDOT(sum,xs,v,idx,n); 701 d = fshift + A->a[diag[i]+shift]; 702 n = B->i[i+1] - B->i[i]; 703 idx = B->j + B->i[i] + shift; 704 v = B->a + B->i[i] + shift; 705 SPARSEDENSEMDOT(sum,ls,v,idx,n); 706 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 707 } 708 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 709 mat->Mvctx); CHKERRQ(ierr); 710 /* come up through the rows */ 711 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 712 mat->Mvctx); CHKERRQ(ierr); 713 for ( i=m-1; i>-1; i-- ) { 714 n = A->i[i+1] - A->i[i]; 715 idx = A->j + A->i[i] + shift; 716 v = A->a + A->i[i] + shift; 717 sum = b[i]; 718 SPARSEDENSEMDOT(sum,xs,v,idx,n); 719 d = fshift + A->a[diag[i]+shift]; 720 n = B->i[i+1] - B->i[i]; 721 idx = B->j + B->i[i] + shift; 722 v = B->a + B->i[i] + shift; 723 SPARSEDENSEMDOT(sum,ls,v,idx,n); 724 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 725 } 726 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 727 mat->Mvctx); CHKERRQ(ierr); 728 } 729 } 730 else if (flag & SOR_FORWARD_SWEEP){ 731 if (flag & SOR_ZERO_INITIAL_GUESS) { 732 VecSet(&zero,mat->lvec); 733 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 734 mat->Mvctx); CHKERRQ(ierr); 735 for ( i=0; i<m; i++ ) { 736 n = diag[i] - A->i[i]; 737 idx = A->j + A->i[i] + shift; 738 v = A->a + A->i[i] + shift; 739 sum = b[i]; 740 SPARSEDENSEMDOT(sum,xs,v,idx,n); 741 d = fshift + A->a[diag[i]+shift]; 742 n = B->i[i+1] - B->i[i]; 743 idx = B->j + B->i[i] + shift; 744 v = B->a + B->i[i] + shift; 745 SPARSEDENSEMDOT(sum,ls,v,idx,n); 746 x[i] = omega*(sum/d); 747 } 748 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 749 mat->Mvctx); CHKERRQ(ierr); 750 its--; 751 } 752 while (its--) { 753 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 754 CHKERRQ(ierr); 755 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 756 CHKERRQ(ierr); 757 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 758 mat->Mvctx); CHKERRQ(ierr); 759 for ( i=0; i<m; i++ ) { 760 n = A->i[i+1] - A->i[i]; 761 idx = A->j + A->i[i] + shift; 762 v = A->a + A->i[i] + shift; 763 sum = b[i]; 764 SPARSEDENSEMDOT(sum,xs,v,idx,n); 765 d = fshift + A->a[diag[i]+shift]; 766 n = B->i[i+1] - B->i[i]; 767 idx = B->j + B->i[i] + shift; 768 v = B->a + B->i[i] + shift; 769 SPARSEDENSEMDOT(sum,ls,v,idx,n); 770 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 771 } 772 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 773 mat->Mvctx); CHKERRQ(ierr); 774 } 775 } 776 else if (flag & SOR_BACKWARD_SWEEP){ 777 if (flag & SOR_ZERO_INITIAL_GUESS) { 778 VecSet(&zero,mat->lvec); 779 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 780 mat->Mvctx); CHKERRQ(ierr); 781 for ( i=m-1; i>-1; i-- ) { 782 n = A->i[i+1] - diag[i] - 1; 783 idx = A->j + diag[i] + !shift; 784 v = A->a + diag[i] + !shift; 785 sum = b[i]; 786 SPARSEDENSEMDOT(sum,xs,v,idx,n); 787 d = fshift + A->a[diag[i]+shift]; 788 n = B->i[i+1] - B->i[i]; 789 idx = B->j + B->i[i] + shift; 790 v = B->a + B->i[i] + shift; 791 SPARSEDENSEMDOT(sum,ls,v,idx,n); 792 x[i] = omega*(sum/d); 793 } 794 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 795 mat->Mvctx); CHKERRQ(ierr); 796 its--; 797 } 798 while (its--) { 799 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 800 mat->Mvctx); CHKERRQ(ierr); 801 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 802 mat->Mvctx); CHKERRQ(ierr); 803 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 804 mat->Mvctx); CHKERRQ(ierr); 805 for ( i=m-1; i>-1; i-- ) { 806 n = A->i[i+1] - A->i[i]; 807 idx = A->j + A->i[i] + shift; 808 v = A->a + A->i[i] + shift; 809 sum = b[i]; 810 SPARSEDENSEMDOT(sum,xs,v,idx,n); 811 d = fshift + A->a[diag[i]+shift]; 812 n = B->i[i+1] - B->i[i]; 813 idx = B->j + B->i[i] + shift; 814 v = B->a + B->i[i] + shift; 815 SPARSEDENSEMDOT(sum,ls,v,idx,n); 816 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 817 } 818 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 819 mat->Mvctx); CHKERRQ(ierr); 820 } 821 } 822 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 823 if (flag & SOR_ZERO_INITIAL_GUESS) { 824 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 825 } 826 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 827 CHKERRQ(ierr); 828 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 829 CHKERRQ(ierr); 830 while (its--) { 831 /* go down through the rows */ 832 for ( i=0; i<m; i++ ) { 833 n = A->i[i+1] - A->i[i]; 834 idx = A->j + A->i[i] + shift; 835 v = A->a + A->i[i] + shift; 836 sum = b[i]; 837 SPARSEDENSEMDOT(sum,xs,v,idx,n); 838 d = fshift + A->a[diag[i]+shift]; 839 n = B->i[i+1] - B->i[i]; 840 idx = B->j + B->i[i] + shift; 841 v = B->a + B->i[i] + shift; 842 SPARSEDENSEMDOT(sum,ls,v,idx,n); 843 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 844 } 845 /* come up through the rows */ 846 for ( i=m-1; i>-1; i-- ) { 847 n = A->i[i+1] - A->i[i]; 848 idx = A->j + A->i[i] + shift; 849 v = A->a + A->i[i] + shift; 850 sum = b[i]; 851 SPARSEDENSEMDOT(sum,xs,v,idx,n); 852 d = fshift + A->a[diag[i]+shift]; 853 n = B->i[i+1] - B->i[i]; 854 idx = B->j + B->i[i] + shift; 855 v = B->a + B->i[i] + shift; 856 SPARSEDENSEMDOT(sum,ls,v,idx,n); 857 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 858 } 859 } 860 } 861 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 862 if (flag & SOR_ZERO_INITIAL_GUESS) { 863 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 864 } 865 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 866 CHKERRQ(ierr); 867 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 868 CHKERRQ(ierr); 869 while (its--) { 870 for ( i=0; i<m; i++ ) { 871 n = A->i[i+1] - A->i[i]; 872 idx = A->j + A->i[i] + shift; 873 v = A->a + A->i[i] + shift; 874 sum = b[i]; 875 SPARSEDENSEMDOT(sum,xs,v,idx,n); 876 d = fshift + A->a[diag[i]+shift]; 877 n = B->i[i+1] - B->i[i]; 878 idx = B->j + B->i[i] + shift; 879 v = B->a + B->i[i] + shift; 880 SPARSEDENSEMDOT(sum,ls,v,idx,n); 881 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 882 } 883 } 884 } 885 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 886 if (flag & SOR_ZERO_INITIAL_GUESS) { 887 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 888 } 889 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 890 mat->Mvctx); CHKERRQ(ierr); 891 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 892 mat->Mvctx); CHKERRQ(ierr); 893 while (its--) { 894 for ( i=m-1; i>-1; i-- ) { 895 n = A->i[i+1] - A->i[i]; 896 idx = A->j + A->i[i] + shift; 897 v = A->a + A->i[i] + shift; 898 sum = b[i]; 899 SPARSEDENSEMDOT(sum,xs,v,idx,n); 900 d = fshift + A->a[diag[i]+shift]; 901 n = B->i[i+1] - B->i[i]; 902 idx = B->j + B->i[i] + shift; 903 v = B->a + B->i[i] + shift; 904 SPARSEDENSEMDOT(sum,ls,v,idx,n); 905 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 906 } 907 } 908 } 909 return 0; 910 } 911 912 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 913 int *nzalloc,int *mem) 914 { 915 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 916 Mat A = mat->A, B = mat->B; 917 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 918 919 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 920 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 921 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 922 if (flag == MAT_LOCAL) { 923 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 924 } else if (flag == MAT_GLOBAL_MAX) { 925 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 926 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 927 } else if (flag == MAT_GLOBAL_SUM) { 928 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 929 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 930 } 931 return 0; 932 } 933 934 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 935 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 936 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 937 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 938 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 939 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 940 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 941 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 942 943 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 944 { 945 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 946 947 if (op == NO_NEW_NONZERO_LOCATIONS || 948 op == YES_NEW_NONZERO_LOCATIONS || 949 op == COLUMNS_SORTED || 950 op == ROW_ORIENTED) { 951 MatSetOption(a->A,op); 952 MatSetOption(a->B,op); 953 } 954 else if (op == ROWS_SORTED || 955 op == SYMMETRIC_MATRIX || 956 op == STRUCTURALLY_SYMMETRIC_MATRIX || 957 op == YES_NEW_DIAGONALS) 958 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 959 else if (op == COLUMN_ORIENTED) 960 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");} 961 else if (op == NO_NEW_DIAGONALS) 962 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 963 else 964 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 965 return 0; 966 } 967 968 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 969 { 970 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 971 *m = mat->M; *n = mat->N; 972 return 0; 973 } 974 975 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 976 { 977 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 978 *m = mat->m; *n = mat->N; 979 return 0; 980 } 981 982 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 983 { 984 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 985 *m = mat->rstart; *n = mat->rend; 986 return 0; 987 } 988 989 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 990 { 991 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 992 Scalar *vworkA, *vworkB, **pvA, **pvB; 993 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 994 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 995 996 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows") 997 lrow = row - rstart; 998 999 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1000 if (!v) {pvA = 0; pvB = 0;} 1001 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1002 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1003 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1004 nztot = nzA + nzB; 1005 1006 if (v || idx) { 1007 if (nztot) { 1008 /* Sort by increasing column numbers, assuming A and B already sorted */ 1009 int imark; 1010 if (mat->assembled) { 1011 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1012 } 1013 if (v) { 1014 *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1015 for ( i=0; i<nzB; i++ ) { 1016 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1017 else break; 1018 } 1019 imark = i; 1020 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1021 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1022 } 1023 if (idx) { 1024 *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1025 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1026 for ( i=0; i<nzB; i++ ) { 1027 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1028 else break; 1029 } 1030 imark = i; 1031 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1032 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1033 } 1034 } 1035 else {*idx = 0; *v=0;} 1036 } 1037 *nz = nztot; 1038 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1039 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1040 return 0; 1041 } 1042 1043 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1044 { 1045 if (idx) PETSCFREE(*idx); 1046 if (v) PETSCFREE(*v); 1047 return 0; 1048 } 1049 1050 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm) 1051 { 1052 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1053 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1054 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1055 double sum = 0.0; 1056 Scalar *v; 1057 1058 if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix"); 1059 if (aij->size == 1) { 1060 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1061 } else { 1062 if (type == NORM_FROBENIUS) { 1063 v = amat->a; 1064 for (i=0; i<amat->nz; i++ ) { 1065 #if defined(PETSC_COMPLEX) 1066 sum += real(conj(*v)*(*v)); v++; 1067 #else 1068 sum += (*v)*(*v); v++; 1069 #endif 1070 } 1071 v = bmat->a; 1072 for (i=0; i<bmat->nz; i++ ) { 1073 #if defined(PETSC_COMPLEX) 1074 sum += real(conj(*v)*(*v)); v++; 1075 #else 1076 sum += (*v)*(*v); v++; 1077 #endif 1078 } 1079 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1080 *norm = sqrt(*norm); 1081 } 1082 else if (type == NORM_1) { /* max column norm */ 1083 double *tmp, *tmp2; 1084 int *jj, *garray = aij->garray; 1085 tmp = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1086 tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1087 PetscZero(tmp,aij->N*sizeof(double)); 1088 *norm = 0.0; 1089 v = amat->a; jj = amat->j; 1090 for ( j=0; j<amat->nz; j++ ) { 1091 #if defined(PETSC_COMPLEX) 1092 tmp[cstart + *jj++ + shift] += abs(*v++); 1093 #else 1094 tmp[cstart + *jj++ + shift] += fabs(*v++); 1095 #endif 1096 } 1097 v = bmat->a; jj = bmat->j; 1098 for ( j=0; j<bmat->nz; j++ ) { 1099 #if defined(PETSC_COMPLEX) 1100 tmp[garray[*jj++ + shift]] += abs(*v++); 1101 #else 1102 tmp[garray[*jj++ + shift]] += fabs(*v++); 1103 #endif 1104 } 1105 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1106 for ( j=0; j<aij->N; j++ ) { 1107 if (tmp2[j] > *norm) *norm = tmp2[j]; 1108 } 1109 PETSCFREE(tmp); PETSCFREE(tmp2); 1110 } 1111 else if (type == NORM_INFINITY) { /* max row norm */ 1112 double normtemp = 0.0; 1113 for ( j=0; j<amat->m; j++ ) { 1114 v = amat->a + amat->i[j] + shift; 1115 sum = 0.0; 1116 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1117 #if defined(PETSC_COMPLEX) 1118 sum += abs(*v); v++; 1119 #else 1120 sum += fabs(*v); v++; 1121 #endif 1122 } 1123 v = bmat->a + bmat->i[j] + shift; 1124 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1125 #if defined(PETSC_COMPLEX) 1126 sum += abs(*v); v++; 1127 #else 1128 sum += fabs(*v); v++; 1129 #endif 1130 } 1131 if (sum > normtemp) normtemp = sum; 1132 MPI_Allreduce((void*)&normtemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1133 } 1134 } 1135 else { 1136 SETERRQ(1,"MatNorm_MPIRow:No support for two norm"); 1137 } 1138 } 1139 return 0; 1140 } 1141 1142 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1143 { 1144 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1145 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1146 int ierr,shift = Aloc->indexshift; 1147 Mat B; 1148 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1149 Scalar *array; 1150 1151 if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1152 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1153 CHKERRQ(ierr); 1154 1155 /* copy over the A part */ 1156 Aloc = (Mat_SeqAIJ*) a->A->data; 1157 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1158 row = a->rstart; 1159 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1160 for ( i=0; i<m; i++ ) { 1161 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1162 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1163 } 1164 aj = Aloc->j; 1165 for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1166 1167 /* copy over the B part */ 1168 Aloc = (Mat_SeqAIJ*) a->B->data; 1169 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1170 row = a->rstart; 1171 ct = cols = (int *) PETSCMALLOC( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1172 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1173 for ( i=0; i<m; i++ ) { 1174 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1175 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1176 } 1177 PETSCFREE(ct); 1178 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1179 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1180 if (matout) { 1181 *matout = B; 1182 } else { 1183 /* This isn't really an in-place transpose .... but free data structures from a */ 1184 PETSCFREE(a->rowners); 1185 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1186 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1187 if (a->colmap) PETSCFREE(a->colmap); 1188 if (a->garray) PETSCFREE(a->garray); 1189 if (a->lvec) VecDestroy(a->lvec); 1190 if (a->Mvctx) VecScatterCtxDestroy(a->Mvctx); 1191 PETSCFREE(a); 1192 PetscMemcpy(A,B,sizeof(struct _Mat)); 1193 PETSCHEADERDESTROY(B); 1194 } 1195 return 0; 1196 } 1197 1198 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1199 static int MatCopyPrivate_MPIAIJ(Mat,Mat *); 1200 1201 /* -------------------------------------------------------------------*/ 1202 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1203 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1204 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1205 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1206 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1207 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1208 MatLUFactor_MPIAIJ,0, 1209 MatRelax_MPIAIJ, 1210 MatTranspose_MPIAIJ, 1211 MatGetInfo_MPIAIJ,0, 1212 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1213 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1214 0, 1215 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1216 MatGetReordering_MPIAIJ, 1217 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1218 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1219 MatILUFactorSymbolic_MPIAIJ,0, 1220 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 1221 1222 /*@C 1223 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1224 (the default parallel PETSc format). 1225 1226 Input Parameters: 1227 . comm - MPI communicator 1228 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1229 . n - number of local columns (or PETSC_DECIDE to have calculated 1230 if N is given) 1231 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1232 . N - number of global columns (or PETSC_DECIDE to have calculated 1233 if n is given) 1234 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1235 (same for all local rows) 1236 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1237 or null (possibly different for each row). You must leave room 1238 for the diagonal entry even if it is zero. 1239 . o_nz - number of nonzeros per row in off-diagonal portion of local 1240 submatrix (same for all local rows). 1241 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1242 submatrix or null (possibly different for each row). 1243 1244 Output Parameter: 1245 . newmat - the matrix 1246 1247 Notes: 1248 The AIJ format (also called the Yale sparse matrix format or 1249 compressed row storage), is fully compatible with standard Fortran 77 1250 storage. That is, the stored row and column indices begin at 1251 one, not zero. See the users manual for further details. 1252 1253 The user MUST specify either the local or global matrix dimensions 1254 (possibly both). 1255 1256 Storage Information: 1257 For a square global matrix we define each processor's diagonal portion 1258 to be its local rows and the corresponding columns (a square submatrix); 1259 each processor's off-diagonal portion encompasses the remainder of the 1260 local matrix (a rectangular submatrix). 1261 1262 The user can specify preallocated storage for the diagonal part of 1263 the local submatrix with either d_nz or d_nnz (not both). Set both 1264 d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1265 Likewise, specify preallocated storage for the off-diagonal part of 1266 the local submatrix with o_nz or o_nnz (not both). 1267 1268 .keywords: matrix, aij, compressed row, sparse, parallel 1269 1270 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1271 @*/ 1272 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1273 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1274 { 1275 Mat mat; 1276 Mat_MPIAIJ *a; 1277 int ierr, i,sum[2],work[2]; 1278 1279 *newmat = 0; 1280 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1281 PLogObjectCreate(mat); 1282 mat->data = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a); 1283 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1284 mat->destroy = MatDestroy_MPIAIJ; 1285 mat->view = MatView_MPIAIJ; 1286 mat->factor = 0; 1287 1288 a->insertmode = NOT_SET_VALUES; 1289 MPI_Comm_rank(comm,&a->rank); 1290 MPI_Comm_size(comm,&a->size); 1291 1292 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) 1293 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1294 1295 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1296 work[0] = m; work[1] = n; 1297 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1298 if (M == PETSC_DECIDE) M = sum[0]; 1299 if (N == PETSC_DECIDE) N = sum[1]; 1300 } 1301 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1302 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1303 a->m = m; 1304 a->n = n; 1305 a->N = N; 1306 a->M = M; 1307 1308 /* build local table of row and column ownerships */ 1309 a->rowners = (int *) PETSCMALLOC(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1310 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 1311 sizeof(Mat_MPIAIJ)); 1312 a->cowners = a->rowners + a->size + 1; 1313 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1314 a->rowners[0] = 0; 1315 for ( i=2; i<=a->size; i++ ) { 1316 a->rowners[i] += a->rowners[i-1]; 1317 } 1318 a->rstart = a->rowners[a->rank]; 1319 a->rend = a->rowners[a->rank+1]; 1320 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1321 a->cowners[0] = 0; 1322 for ( i=2; i<=a->size; i++ ) { 1323 a->cowners[i] += a->cowners[i-1]; 1324 } 1325 a->cstart = a->cowners[a->rank]; 1326 a->cend = a->cowners[a->rank+1]; 1327 1328 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1329 PLogObjectParent(mat,a->A); 1330 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1331 PLogObjectParent(mat,a->B); 1332 1333 /* build cache for off array entries formed */ 1334 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1335 a->colmap = 0; 1336 a->garray = 0; 1337 1338 /* stuff used for matrix vector multiply */ 1339 a->lvec = 0; 1340 a->Mvctx = 0; 1341 a->assembled = 0; 1342 1343 *newmat = mat; 1344 return 0; 1345 } 1346 1347 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat) 1348 { 1349 Mat mat; 1350 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1351 int ierr, len; 1352 1353 if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix"); 1354 *newmat = 0; 1355 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1356 PLogObjectCreate(mat); 1357 mat->data = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a); 1358 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1359 mat->destroy = MatDestroy_MPIAIJ; 1360 mat->view = MatView_MPIAIJ; 1361 mat->factor = matin->factor; 1362 1363 a->m = oldmat->m; 1364 a->n = oldmat->n; 1365 a->M = oldmat->M; 1366 a->N = oldmat->N; 1367 1368 a->assembled = 1; 1369 a->rstart = oldmat->rstart; 1370 a->rend = oldmat->rend; 1371 a->cstart = oldmat->cstart; 1372 a->cend = oldmat->cend; 1373 a->size = oldmat->size; 1374 a->rank = oldmat->rank; 1375 a->insertmode = NOT_SET_VALUES; 1376 1377 a->rowners = (int *) PETSCMALLOC((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1378 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1379 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1380 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1381 if (oldmat->colmap) { 1382 a->colmap = (int *) PETSCMALLOC((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1383 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1384 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1385 } else a->colmap = 0; 1386 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1387 a->garray = (int *) PETSCMALLOC(len*sizeof(int)); CHKPTRQ(a->garray); 1388 PLogObjectMemory(mat,len*sizeof(int)); 1389 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1390 } else a->garray = 0; 1391 1392 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1393 PLogObjectParent(mat,a->lvec); 1394 ierr = VecScatterCtxCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1395 PLogObjectParent(mat,a->Mvctx); 1396 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1397 PLogObjectParent(mat,a->A); 1398 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1399 PLogObjectParent(mat,a->B); 1400 *newmat = mat; 1401 return 0; 1402 } 1403 1404 #include "sysio.h" 1405 1406 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat) 1407 { 1408 Mat A; 1409 int i, nz, ierr, j,rstart, rend, fd; 1410 Scalar *vals,*svals; 1411 PetscObject vobj = (PetscObject) bview; 1412 MPI_Comm comm = vobj->comm; 1413 MPI_Status status; 1414 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1415 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1416 int tag = ((PetscObject)bview)->tag; 1417 1418 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1419 if (!rank) { 1420 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1421 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1422 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object"); 1423 } 1424 1425 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1426 M = header[1]; N = header[2]; 1427 /* determine ownership of all rows */ 1428 m = M/size + ((M % size) > rank); 1429 rowners = (int *) PETSCMALLOC((size+2)*sizeof(int)); CHKPTRQ(rowners); 1430 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1431 rowners[0] = 0; 1432 for ( i=2; i<=size; i++ ) { 1433 rowners[i] += rowners[i-1]; 1434 } 1435 rstart = rowners[rank]; 1436 rend = rowners[rank+1]; 1437 1438 /* distribute row lengths to all processors */ 1439 ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1440 offlens = ourlens + (rend-rstart); 1441 if (!rank) { 1442 rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths); 1443 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1444 sndcounts = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(sndcounts); 1445 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1446 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1447 PETSCFREE(sndcounts); 1448 } 1449 else { 1450 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1451 } 1452 1453 if (!rank) { 1454 /* calculate the number of nonzeros on each processor */ 1455 procsnz = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(procsnz); 1456 PetscZero(procsnz,size*sizeof(int)); 1457 for ( i=0; i<size; i++ ) { 1458 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1459 procsnz[i] += rowlengths[j]; 1460 } 1461 } 1462 PETSCFREE(rowlengths); 1463 1464 /* determine max buffer needed and allocate it */ 1465 maxnz = 0; 1466 for ( i=0; i<size; i++ ) { 1467 maxnz = PETSCMAX(maxnz,procsnz[i]); 1468 } 1469 cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols); 1470 1471 /* read in my part of the matrix column indices */ 1472 nz = procsnz[0]; 1473 mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 1474 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1475 1476 /* read in every one elses and ship off */ 1477 for ( i=1; i<size; i++ ) { 1478 nz = procsnz[i]; 1479 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1480 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1481 } 1482 PETSCFREE(cols); 1483 } 1484 else { 1485 /* determine buffer space needed for message */ 1486 nz = 0; 1487 for ( i=0; i<m; i++ ) { 1488 nz += ourlens[i]; 1489 } 1490 mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 1491 1492 /* receive message of column indices*/ 1493 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1494 MPI_Get_count(&status,MPI_INT,&maxnz); 1495 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1496 } 1497 1498 /* loop over local rows, determining number of off diagonal entries */ 1499 PetscZero(offlens,m*sizeof(int)); 1500 jj = 0; 1501 for ( i=0; i<m; i++ ) { 1502 for ( j=0; j<ourlens[i]; j++ ) { 1503 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1504 jj++; 1505 } 1506 } 1507 1508 /* create our matrix */ 1509 for ( i=0; i<m; i++ ) { 1510 ourlens[i] -= offlens[i]; 1511 } 1512 if (type == MATMPIAIJ) { 1513 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1514 } 1515 else if (type == MATMPIROW) { 1516 ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1517 } 1518 A = *newmat; 1519 MatSetOption(A,COLUMNS_SORTED); 1520 for ( i=0; i<m; i++ ) { 1521 ourlens[i] += offlens[i]; 1522 } 1523 1524 if (!rank) { 1525 vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1526 1527 /* read in my part of the matrix numerical values */ 1528 nz = procsnz[0]; 1529 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1530 1531 /* insert into matrix */ 1532 jj = rstart; 1533 smycols = mycols; 1534 svals = vals; 1535 for ( i=0; i<m; i++ ) { 1536 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1537 smycols += ourlens[i]; 1538 svals += ourlens[i]; 1539 jj++; 1540 } 1541 1542 /* read in other processors and ship out */ 1543 for ( i=1; i<size; i++ ) { 1544 nz = procsnz[i]; 1545 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1546 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1547 } 1548 PETSCFREE(procsnz); 1549 } 1550 else { 1551 /* receive numeric values */ 1552 vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1553 1554 /* receive message of values*/ 1555 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1556 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1557 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1558 1559 /* insert into matrix */ 1560 jj = rstart; 1561 smycols = mycols; 1562 svals = vals; 1563 for ( i=0; i<m; i++ ) { 1564 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1565 smycols += ourlens[i]; 1566 svals += ourlens[i]; 1567 jj++; 1568 } 1569 } 1570 PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners); 1571 1572 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1573 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1574 return 0; 1575 } 1576