1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.94 1995/10/23 18:37:47 curfman Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscZero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 40 int *idxn,Scalar *v,InsertMode addv) 41 { 42 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 43 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (idxm[i] >= rstart && idxm[i] < rend) { 56 row = idxm[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (idxn[j] >= cstart && idxn[j] < cend){ 61 col = idxn[j] - cstart; 62 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 63 } 64 else { 65 if (aij->assembled) { 66 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 67 col = aij->colmap[idxn[j]] + shift; 68 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 69 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 70 col = idxn[j]; 71 } 72 } 73 else col = idxn[j]; 74 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 75 } 76 } 77 } 78 else { 79 ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr); 80 } 81 } 82 return 0; 83 } 84 85 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 86 { 87 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 88 MPI_Comm comm = mat->comm; 89 int size = aij->size, *owners = aij->rowners; 90 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 91 MPI_Request *send_waits,*recv_waits; 92 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 93 InsertMode addv; 94 Scalar *rvalues,*svalues; 95 96 /* make sure all processors are either in INSERTMODE or ADDMODE */ 97 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 98 if (addv == (ADD_VALUES|INSERT_VALUES)) { 99 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 100 } 101 aij->insertmode = addv; /* in case this processor had no cache */ 102 103 /* first count number of contributors to each processor */ 104 nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 105 PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 106 owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 107 for ( i=0; i<aij->stash.n; i++ ) { 108 idx = aij->stash.idx[i]; 109 for ( j=0; j<size; j++ ) { 110 if (idx >= owners[j] && idx < owners[j+1]) { 111 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 112 } 113 } 114 } 115 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 116 117 /* inform other processors of number of messages and max length*/ 118 work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work); 119 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 120 nreceives = work[rank]; 121 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 122 nmax = work[rank]; 123 PETSCFREE(work); 124 125 /* post receives: 126 1) each message will consist of ordered pairs 127 (global index,value) we store the global index as a double 128 to simplify the message passing. 129 2) since we don't know how long each individual message is we 130 allocate the largest needed buffer for each receive. Potentially 131 this is a lot of wasted space. 132 133 134 This could be done better. 135 */ 136 rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 137 CHKPTRQ(rvalues); 138 recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 139 CHKPTRQ(recv_waits); 140 for ( i=0; i<nreceives; i++ ) { 141 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 142 comm,recv_waits+i); 143 } 144 145 /* do sends: 146 1) starts[i] gives the starting index in svalues for stuff going to 147 the ith processor 148 */ 149 svalues = (Scalar *) PETSCMALLOC(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 150 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 151 CHKPTRQ(send_waits); 152 starts = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(starts); 153 starts[0] = 0; 154 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 155 for ( i=0; i<aij->stash.n; i++ ) { 156 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 157 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 158 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 159 } 160 PETSCFREE(owner); 161 starts[0] = 0; 162 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 163 count = 0; 164 for ( i=0; i<size; i++ ) { 165 if (procs[i]) { 166 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 167 comm,send_waits+count++); 168 } 169 } 170 PETSCFREE(starts); PETSCFREE(nprocs); 171 172 /* Free cache space */ 173 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 174 175 aij->svalues = svalues; aij->rvalues = rvalues; 176 aij->nsends = nsends; aij->nrecvs = nreceives; 177 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 178 aij->rmax = nmax; 179 180 return 0; 181 } 182 extern int MatSetUpMultiply_MPIAIJ(Mat); 183 184 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 187 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 188 MPI_Status *send_status,recv_status; 189 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 190 int row,col,other_disassembled,shift = C->indexshift; 191 Scalar *values,val; 192 InsertMode addv = aij->insertmode; 193 194 /* wait on receives */ 195 while (count) { 196 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 197 /* unpack receives into our local space */ 198 values = aij->rvalues + 3*imdex*aij->rmax; 199 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 200 n = n/3; 201 for ( i=0; i<n; i++ ) { 202 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 203 col = (int) PETSCREAL(values[3*i+1]); 204 val = values[3*i+2]; 205 if (col >= aij->cstart && col < aij->cend) { 206 col -= aij->cstart; 207 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 208 } 209 else { 210 if (aij->assembled) { 211 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 212 col = aij->colmap[col] + shift; 213 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 214 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 215 col = (int) PETSCREAL(values[3*i+1]); 216 } 217 } 218 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 219 } 220 } 221 count--; 222 } 223 PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 224 225 /* wait on sends */ 226 if (aij->nsends) { 227 send_status = (MPI_Status *) PETSCMALLOC(aij->nsends*sizeof(MPI_Status)); 228 CHKPTRQ(send_status); 229 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 230 PETSCFREE(send_status); 231 } 232 PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 233 234 aij->insertmode = NOT_SET_VALUES; 235 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 236 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 237 238 /* determine if any processor has disassembled, if so we must 239 also disassemble ourselfs, in order that we may reassemble. */ 240 MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 241 if (aij->assembled && !other_disassembled) { 242 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 243 } 244 245 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 246 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 247 aij->assembled = 1; 248 } 249 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 250 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 251 252 return 0; 253 } 254 255 static int MatZeroEntries_MPIAIJ(Mat A) 256 { 257 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 258 int ierr; 259 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 260 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 261 return 0; 262 } 263 264 /* the code does not do the diagonal entries correctly unless the 265 matrix is square and the column and row owerships are identical. 266 This is a BUG. The only way to fix it seems to be to access 267 aij->A and aij->B directly and not through the MatZeroRows() 268 routine. 269 */ 270 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 271 { 272 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 273 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 274 int *procs,*nprocs,j,found,idx,nsends,*work; 275 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 276 int *rvalues,tag = A->tag,count,base,slen,n,*source; 277 int *lens,imdex,*lrows,*values; 278 MPI_Comm comm = A->comm; 279 MPI_Request *send_waits,*recv_waits; 280 MPI_Status recv_status,*send_status; 281 IS istmp; 282 283 if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix"); 284 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 285 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 286 287 /* first count number of contributors to each processor */ 288 nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 289 PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 290 owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 291 for ( i=0; i<N; i++ ) { 292 idx = rows[i]; 293 found = 0; 294 for ( j=0; j<size; j++ ) { 295 if (idx >= owners[j] && idx < owners[j+1]) { 296 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 297 } 298 } 299 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 300 } 301 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 302 303 /* inform other processors of number of messages and max length*/ 304 work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work); 305 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 306 nrecvs = work[rank]; 307 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 308 nmax = work[rank]; 309 PETSCFREE(work); 310 311 /* post receives: */ 312 rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 313 CHKPTRQ(rvalues); 314 recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 315 CHKPTRQ(recv_waits); 316 for ( i=0; i<nrecvs; i++ ) { 317 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 318 } 319 320 /* do sends: 321 1) starts[i] gives the starting index in svalues for stuff going to 322 the ith processor 323 */ 324 svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 325 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 326 CHKPTRQ(send_waits); 327 starts = (int *) PETSCMALLOC( (size+1)*sizeof(int) ); CHKPTRQ(starts); 328 starts[0] = 0; 329 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 330 for ( i=0; i<N; i++ ) { 331 svalues[starts[owner[i]]++] = rows[i]; 332 } 333 ISRestoreIndices(is,&rows); 334 335 starts[0] = 0; 336 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 337 count = 0; 338 for ( i=0; i<size; i++ ) { 339 if (procs[i]) { 340 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 341 } 342 } 343 PETSCFREE(starts); 344 345 base = owners[rank]; 346 347 /* wait on receives */ 348 lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 349 source = lens + nrecvs; 350 count = nrecvs; slen = 0; 351 while (count) { 352 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 353 /* unpack receives into our local space */ 354 MPI_Get_count(&recv_status,MPI_INT,&n); 355 source[imdex] = recv_status.MPI_SOURCE; 356 lens[imdex] = n; 357 slen += n; 358 count--; 359 } 360 PETSCFREE(recv_waits); 361 362 /* move the data into the send scatter */ 363 lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 364 count = 0; 365 for ( i=0; i<nrecvs; i++ ) { 366 values = rvalues + i*nmax; 367 for ( j=0; j<lens[i]; j++ ) { 368 lrows[count++] = values[j] - base; 369 } 370 } 371 PETSCFREE(rvalues); PETSCFREE(lens); 372 PETSCFREE(owner); PETSCFREE(nprocs); 373 374 /* actually zap the local rows */ 375 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 376 PLogObjectParent(A,istmp); 377 PETSCFREE(lrows); 378 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 379 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 380 ierr = ISDestroy(istmp); CHKERRQ(ierr); 381 382 /* wait on sends */ 383 if (nsends) { 384 send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status)); 385 CHKPTRQ(send_status); 386 MPI_Waitall(nsends,send_waits,send_status); 387 PETSCFREE(send_status); 388 } 389 PETSCFREE(send_waits); PETSCFREE(svalues); 390 391 return 0; 392 } 393 394 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 395 { 396 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 397 int ierr; 398 399 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 400 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 401 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 402 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 403 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 404 return 0; 405 } 406 407 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 408 { 409 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 410 int ierr; 411 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 412 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 413 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 414 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 415 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 416 return 0; 417 } 418 419 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 420 { 421 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 422 int ierr; 423 424 if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix"); 425 /* do nondiagonal part */ 426 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 427 /* send it on its way */ 428 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 429 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 430 /* do local part */ 431 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 432 /* receive remote parts: note this assumes the values are not actually */ 433 /* inserted in yy until the next line, which is true for my implementation*/ 434 /* but is not perhaps always true. */ 435 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 436 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 437 return 0; 438 } 439 440 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 441 { 442 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 443 int ierr; 444 445 if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix"); 446 /* do nondiagonal part */ 447 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 448 /* send it on its way */ 449 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 450 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 451 /* do local part */ 452 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 453 /* receive remote parts: note this assumes the values are not actually */ 454 /* inserted in yy until the next line, which is true for my implementation*/ 455 /* but is not perhaps always true. */ 456 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 457 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 458 return 0; 459 } 460 461 /* 462 This only works correctly for square matrices where the subblock A->A is the 463 diagonal block 464 */ 465 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 466 { 467 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 468 if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix"); 469 return MatGetDiagonal(a->A,v); 470 } 471 472 static int MatDestroy_MPIAIJ(PetscObject obj) 473 { 474 Mat mat = (Mat) obj; 475 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 476 int ierr; 477 #if defined(PETSC_LOG) 478 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 479 #endif 480 PETSCFREE(aij->rowners); 481 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 482 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 483 if (aij->colmap) PETSCFREE(aij->colmap); 484 if (aij->garray) PETSCFREE(aij->garray); 485 if (aij->lvec) VecDestroy(aij->lvec); 486 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 487 PETSCFREE(aij); 488 PLogObjectDestroy(mat); 489 PETSCHEADERDESTROY(mat); 490 return 0; 491 } 492 #include "draw.h" 493 #include "pinclude/pviewer.h" 494 495 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 496 { 497 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 498 int ierr; 499 500 if (aij->size == 1) { 501 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 502 } 503 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 504 return 0; 505 } 506 507 static int MatView_MPIAIJ_ASCIIorDraw(Mat mat,Viewer viewer) 508 { 509 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 510 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 511 int ierr, format,shift = C->indexshift,rank; 512 PetscObject vobj = (PetscObject) viewer; 513 FILE *fd; 514 515 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 516 ierr = ViewerFileGetFormat_Private(viewer,&format); 517 if (format == FILE_FORMAT_INFO_DETAILED) { 518 int nz,nzalloc,mem; 519 MPI_Comm_rank(mat->comm,&rank); 520 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 521 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 522 MPIU_Seq_begin(mat->comm,1); 523 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 524 nzalloc,mem); 525 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 526 fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 527 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 528 fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 529 fflush(fd); 530 MPIU_Seq_end(mat->comm,1); 531 ierr = VecScatterView(aij->Mvctx,viewer); 532 return 0; 533 } 534 else if (format == FILE_FORMAT_INFO) { 535 return 0; 536 } 537 } 538 539 if (vobj->type == ASCII_FILE_VIEWER) { 540 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 541 MPIU_Seq_begin(mat->comm,1); 542 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 543 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 544 aij->cend); 545 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 546 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 547 fflush(fd); 548 MPIU_Seq_end(mat->comm,1); 549 } 550 else { 551 int size = aij->size; 552 rank = aij->rank; 553 if (size == 1) { 554 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 555 } 556 else { 557 /* assemble the entire matrix onto first processor. */ 558 Mat A; 559 Mat_SeqAIJ *Aloc; 560 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 561 Scalar *a; 562 563 if (!rank) { 564 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr); 565 } 566 else { 567 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr); 568 } 569 PLogObjectParent(mat,A); 570 571 572 /* copy over the A part */ 573 Aloc = (Mat_SeqAIJ*) aij->A->data; 574 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 575 row = aij->rstart; 576 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 577 for ( i=0; i<m; i++ ) { 578 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 579 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 580 } 581 aj = Aloc->j; 582 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 583 584 /* copy over the B part */ 585 Aloc = (Mat_SeqAIJ*) aij->B->data; 586 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 587 row = aij->rstart; 588 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 589 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 590 for ( i=0; i<m; i++ ) { 591 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 592 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 593 } 594 PETSCFREE(ct); 595 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 596 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 597 if (!rank) { 598 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 599 } 600 ierr = MatDestroy(A); CHKERRQ(ierr); 601 } 602 } 603 return 0; 604 } 605 606 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 607 { 608 Mat mat = (Mat) obj; 609 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 610 int ierr; 611 PetscObject vobj = (PetscObject) viewer; 612 613 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix"); 614 if (!viewer) { 615 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 616 } 617 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 618 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 619 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 620 } 621 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 622 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 623 } 624 else if (vobj->cookie == DRAW_COOKIE) { 625 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 626 } 627 else if (vobj->type == BINARY_FILE_VIEWER) { 628 return MatView_MPIAIJ_Binary(mat,viewer); 629 } 630 return 0; 631 } 632 633 extern int MatMarkDiag_SeqAIJ(Mat); 634 /* 635 This has to provide several versions. 636 637 1) per sequential 638 2) a) use only local smoothing updating outer values only once. 639 b) local smoothing updating outer values each inner iteration 640 3) color updating out values betwen colors. 641 */ 642 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 643 double fshift,int its,Vec xx) 644 { 645 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 646 Mat AA = mat->A, BB = mat->B; 647 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 648 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 649 int ierr,*idx, *diag; 650 int n = mat->n, m = mat->m, i,shift = A->indexshift; 651 Vec tt; 652 653 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix"); 654 655 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 656 xs = x + shift; /* shift by one for index start of 1 */ 657 ls = ls + shift; 658 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 659 diag = A->diag; 660 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 661 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 662 } 663 if (flag & SOR_EISENSTAT) { 664 /* Let A = L + U + D; where L is lower trianglar, 665 U is upper triangular, E is diagonal; This routine applies 666 667 (L + E)^{-1} A (U + E)^{-1} 668 669 to a vector efficiently using Eisenstat's trick. This is for 670 the case of SSOR preconditioner, so E is D/omega where omega 671 is the relaxation factor. 672 */ 673 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 674 PLogObjectParent(matin,tt); 675 VecGetArray(tt,&t); 676 scale = (2.0/omega) - 1.0; 677 /* x = (E + U)^{-1} b */ 678 VecSet(&zero,mat->lvec); 679 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 680 mat->Mvctx); CHKERRQ(ierr); 681 for ( i=m-1; i>-1; i-- ) { 682 n = A->i[i+1] - diag[i] - 1; 683 idx = A->j + diag[i] + !shift; 684 v = A->a + diag[i] + !shift; 685 sum = b[i]; 686 SPARSEDENSEMDOT(sum,xs,v,idx,n); 687 d = fshift + A->a[diag[i]+shift]; 688 n = B->i[i+1] - B->i[i]; 689 idx = B->j + B->i[i] + shift; 690 v = B->a + B->i[i] + shift; 691 SPARSEDENSEMDOT(sum,ls,v,idx,n); 692 x[i] = omega*(sum/d); 693 } 694 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 695 mat->Mvctx); CHKERRQ(ierr); 696 697 /* t = b - (2*E - D)x */ 698 v = A->a; 699 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 700 701 /* t = (E + L)^{-1}t */ 702 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 703 diag = A->diag; 704 VecSet(&zero,mat->lvec); 705 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 706 mat->Mvctx); CHKERRQ(ierr); 707 for ( i=0; i<m; i++ ) { 708 n = diag[i] - A->i[i]; 709 idx = A->j + A->i[i] + shift; 710 v = A->a + A->i[i] + shift; 711 sum = t[i]; 712 SPARSEDENSEMDOT(sum,ts,v,idx,n); 713 d = fshift + A->a[diag[i]+shift]; 714 n = B->i[i+1] - B->i[i]; 715 idx = B->j + B->i[i] + shift; 716 v = B->a + B->i[i] + shift; 717 SPARSEDENSEMDOT(sum,ls,v,idx,n); 718 t[i] = omega*(sum/d); 719 } 720 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 721 mat->Mvctx); CHKERRQ(ierr); 722 /* x = x + t */ 723 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 724 VecDestroy(tt); 725 return 0; 726 } 727 728 729 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 730 if (flag & SOR_ZERO_INITIAL_GUESS) { 731 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 732 } 733 else { 734 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 735 CHKERRQ(ierr); 736 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 737 CHKERRQ(ierr); 738 } 739 while (its--) { 740 /* go down through the rows */ 741 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 742 mat->Mvctx); CHKERRQ(ierr); 743 for ( i=0; i<m; i++ ) { 744 n = A->i[i+1] - A->i[i]; 745 idx = A->j + A->i[i] + shift; 746 v = A->a + A->i[i] + shift; 747 sum = b[i]; 748 SPARSEDENSEMDOT(sum,xs,v,idx,n); 749 d = fshift + A->a[diag[i]+shift]; 750 n = B->i[i+1] - B->i[i]; 751 idx = B->j + B->i[i] + shift; 752 v = B->a + B->i[i] + shift; 753 SPARSEDENSEMDOT(sum,ls,v,idx,n); 754 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 755 } 756 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 757 mat->Mvctx); CHKERRQ(ierr); 758 /* come up through the rows */ 759 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 760 mat->Mvctx); CHKERRQ(ierr); 761 for ( i=m-1; i>-1; i-- ) { 762 n = A->i[i+1] - A->i[i]; 763 idx = A->j + A->i[i] + shift; 764 v = A->a + A->i[i] + shift; 765 sum = b[i]; 766 SPARSEDENSEMDOT(sum,xs,v,idx,n); 767 d = fshift + A->a[diag[i]+shift]; 768 n = B->i[i+1] - B->i[i]; 769 idx = B->j + B->i[i] + shift; 770 v = B->a + B->i[i] + shift; 771 SPARSEDENSEMDOT(sum,ls,v,idx,n); 772 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 773 } 774 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 775 mat->Mvctx); CHKERRQ(ierr); 776 } 777 } 778 else if (flag & SOR_FORWARD_SWEEP){ 779 if (flag & SOR_ZERO_INITIAL_GUESS) { 780 VecSet(&zero,mat->lvec); 781 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 782 mat->Mvctx); CHKERRQ(ierr); 783 for ( i=0; i<m; i++ ) { 784 n = diag[i] - A->i[i]; 785 idx = A->j + A->i[i] + shift; 786 v = A->a + A->i[i] + shift; 787 sum = b[i]; 788 SPARSEDENSEMDOT(sum,xs,v,idx,n); 789 d = fshift + A->a[diag[i]+shift]; 790 n = B->i[i+1] - B->i[i]; 791 idx = B->j + B->i[i] + shift; 792 v = B->a + B->i[i] + shift; 793 SPARSEDENSEMDOT(sum,ls,v,idx,n); 794 x[i] = omega*(sum/d); 795 } 796 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 797 mat->Mvctx); CHKERRQ(ierr); 798 its--; 799 } 800 while (its--) { 801 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 802 CHKERRQ(ierr); 803 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 804 CHKERRQ(ierr); 805 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 806 mat->Mvctx); CHKERRQ(ierr); 807 for ( i=0; i<m; i++ ) { 808 n = A->i[i+1] - A->i[i]; 809 idx = A->j + A->i[i] + shift; 810 v = A->a + A->i[i] + shift; 811 sum = b[i]; 812 SPARSEDENSEMDOT(sum,xs,v,idx,n); 813 d = fshift + A->a[diag[i]+shift]; 814 n = B->i[i+1] - B->i[i]; 815 idx = B->j + B->i[i] + shift; 816 v = B->a + B->i[i] + shift; 817 SPARSEDENSEMDOT(sum,ls,v,idx,n); 818 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 819 } 820 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 821 mat->Mvctx); CHKERRQ(ierr); 822 } 823 } 824 else if (flag & SOR_BACKWARD_SWEEP){ 825 if (flag & SOR_ZERO_INITIAL_GUESS) { 826 VecSet(&zero,mat->lvec); 827 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 828 mat->Mvctx); CHKERRQ(ierr); 829 for ( i=m-1; i>-1; i-- ) { 830 n = A->i[i+1] - diag[i] - 1; 831 idx = A->j + diag[i] + !shift; 832 v = A->a + diag[i] + !shift; 833 sum = b[i]; 834 SPARSEDENSEMDOT(sum,xs,v,idx,n); 835 d = fshift + A->a[diag[i]+shift]; 836 n = B->i[i+1] - B->i[i]; 837 idx = B->j + B->i[i] + shift; 838 v = B->a + B->i[i] + shift; 839 SPARSEDENSEMDOT(sum,ls,v,idx,n); 840 x[i] = omega*(sum/d); 841 } 842 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 843 mat->Mvctx); CHKERRQ(ierr); 844 its--; 845 } 846 while (its--) { 847 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 848 mat->Mvctx); CHKERRQ(ierr); 849 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 850 mat->Mvctx); CHKERRQ(ierr); 851 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 852 mat->Mvctx); CHKERRQ(ierr); 853 for ( i=m-1; i>-1; i-- ) { 854 n = A->i[i+1] - A->i[i]; 855 idx = A->j + A->i[i] + shift; 856 v = A->a + A->i[i] + shift; 857 sum = b[i]; 858 SPARSEDENSEMDOT(sum,xs,v,idx,n); 859 d = fshift + A->a[diag[i]+shift]; 860 n = B->i[i+1] - B->i[i]; 861 idx = B->j + B->i[i] + shift; 862 v = B->a + B->i[i] + shift; 863 SPARSEDENSEMDOT(sum,ls,v,idx,n); 864 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 865 } 866 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 867 mat->Mvctx); CHKERRQ(ierr); 868 } 869 } 870 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 871 if (flag & SOR_ZERO_INITIAL_GUESS) { 872 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 873 } 874 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 875 CHKERRQ(ierr); 876 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 877 CHKERRQ(ierr); 878 while (its--) { 879 /* go down through the rows */ 880 for ( i=0; i<m; i++ ) { 881 n = A->i[i+1] - A->i[i]; 882 idx = A->j + A->i[i] + shift; 883 v = A->a + A->i[i] + shift; 884 sum = b[i]; 885 SPARSEDENSEMDOT(sum,xs,v,idx,n); 886 d = fshift + A->a[diag[i]+shift]; 887 n = B->i[i+1] - B->i[i]; 888 idx = B->j + B->i[i] + shift; 889 v = B->a + B->i[i] + shift; 890 SPARSEDENSEMDOT(sum,ls,v,idx,n); 891 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 892 } 893 /* come up through the rows */ 894 for ( i=m-1; i>-1; i-- ) { 895 n = A->i[i+1] - A->i[i]; 896 idx = A->j + A->i[i] + shift; 897 v = A->a + A->i[i] + shift; 898 sum = b[i]; 899 SPARSEDENSEMDOT(sum,xs,v,idx,n); 900 d = fshift + A->a[diag[i]+shift]; 901 n = B->i[i+1] - B->i[i]; 902 idx = B->j + B->i[i] + shift; 903 v = B->a + B->i[i] + shift; 904 SPARSEDENSEMDOT(sum,ls,v,idx,n); 905 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 906 } 907 } 908 } 909 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 910 if (flag & SOR_ZERO_INITIAL_GUESS) { 911 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 912 } 913 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 914 CHKERRQ(ierr); 915 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 916 CHKERRQ(ierr); 917 while (its--) { 918 for ( i=0; i<m; i++ ) { 919 n = A->i[i+1] - A->i[i]; 920 idx = A->j + A->i[i] + shift; 921 v = A->a + A->i[i] + shift; 922 sum = b[i]; 923 SPARSEDENSEMDOT(sum,xs,v,idx,n); 924 d = fshift + A->a[diag[i]+shift]; 925 n = B->i[i+1] - B->i[i]; 926 idx = B->j + B->i[i] + shift; 927 v = B->a + B->i[i] + shift; 928 SPARSEDENSEMDOT(sum,ls,v,idx,n); 929 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 930 } 931 } 932 } 933 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 934 if (flag & SOR_ZERO_INITIAL_GUESS) { 935 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 936 } 937 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 938 mat->Mvctx); CHKERRQ(ierr); 939 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 940 mat->Mvctx); CHKERRQ(ierr); 941 while (its--) { 942 for ( i=m-1; i>-1; i-- ) { 943 n = A->i[i+1] - A->i[i]; 944 idx = A->j + A->i[i] + shift; 945 v = A->a + A->i[i] + shift; 946 sum = b[i]; 947 SPARSEDENSEMDOT(sum,xs,v,idx,n); 948 d = fshift + A->a[diag[i]+shift]; 949 n = B->i[i+1] - B->i[i]; 950 idx = B->j + B->i[i] + shift; 951 v = B->a + B->i[i] + shift; 952 SPARSEDENSEMDOT(sum,ls,v,idx,n); 953 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 954 } 955 } 956 } 957 return 0; 958 } 959 960 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 961 int *nzalloc,int *mem) 962 { 963 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 964 Mat A = mat->A, B = mat->B; 965 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 966 967 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 968 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 969 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 970 if (flag == MAT_LOCAL) { 971 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 972 } else if (flag == MAT_GLOBAL_MAX) { 973 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 974 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 975 } else if (flag == MAT_GLOBAL_SUM) { 976 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 977 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 978 } 979 return 0; 980 } 981 982 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 983 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 984 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 985 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 986 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 987 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 988 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 989 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 990 991 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 992 { 993 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 994 995 if (op == NO_NEW_NONZERO_LOCATIONS || 996 op == YES_NEW_NONZERO_LOCATIONS || 997 op == COLUMNS_SORTED || 998 op == ROW_ORIENTED) { 999 MatSetOption(a->A,op); 1000 MatSetOption(a->B,op); 1001 } 1002 else if (op == ROWS_SORTED || 1003 op == SYMMETRIC_MATRIX || 1004 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1005 op == YES_NEW_DIAGONALS) 1006 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1007 else if (op == COLUMN_ORIENTED) 1008 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");} 1009 else if (op == NO_NEW_DIAGONALS) 1010 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1011 else 1012 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1013 return 0; 1014 } 1015 1016 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1019 *m = mat->M; *n = mat->N; 1020 return 0; 1021 } 1022 1023 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1024 { 1025 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1026 *m = mat->m; *n = mat->N; 1027 return 0; 1028 } 1029 1030 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1031 { 1032 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1033 *m = mat->rstart; *n = mat->rend; 1034 return 0; 1035 } 1036 1037 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1038 { 1039 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1040 Scalar *vworkA, *vworkB, **pvA, **pvB; 1041 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1042 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1043 1044 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows") 1045 lrow = row - rstart; 1046 1047 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1048 if (!v) {pvA = 0; pvB = 0;} 1049 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1050 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1051 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1052 nztot = nzA + nzB; 1053 1054 if (v || idx) { 1055 if (nztot) { 1056 /* Sort by increasing column numbers, assuming A and B already sorted */ 1057 int imark; 1058 if (mat->assembled) { 1059 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1060 } 1061 if (v) { 1062 *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1063 for ( i=0; i<nzB; i++ ) { 1064 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1065 else break; 1066 } 1067 imark = i; 1068 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1069 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1070 } 1071 if (idx) { 1072 *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1073 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1074 for ( i=0; i<nzB; i++ ) { 1075 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1076 else break; 1077 } 1078 imark = i; 1079 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1080 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1081 } 1082 } 1083 else {*idx = 0; *v=0;} 1084 } 1085 *nz = nztot; 1086 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1087 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1088 return 0; 1089 } 1090 1091 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1092 { 1093 if (idx) PETSCFREE(*idx); 1094 if (v) PETSCFREE(*v); 1095 return 0; 1096 } 1097 1098 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm) 1099 { 1100 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1101 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1102 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1103 double sum = 0.0; 1104 Scalar *v; 1105 1106 if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix"); 1107 if (aij->size == 1) { 1108 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1109 } else { 1110 if (type == NORM_FROBENIUS) { 1111 v = amat->a; 1112 for (i=0; i<amat->nz; i++ ) { 1113 #if defined(PETSC_COMPLEX) 1114 sum += real(conj(*v)*(*v)); v++; 1115 #else 1116 sum += (*v)*(*v); v++; 1117 #endif 1118 } 1119 v = bmat->a; 1120 for (i=0; i<bmat->nz; i++ ) { 1121 #if defined(PETSC_COMPLEX) 1122 sum += real(conj(*v)*(*v)); v++; 1123 #else 1124 sum += (*v)*(*v); v++; 1125 #endif 1126 } 1127 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1128 *norm = sqrt(*norm); 1129 } 1130 else if (type == NORM_1) { /* max column norm */ 1131 double *tmp, *tmp2; 1132 int *jj, *garray = aij->garray; 1133 tmp = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1134 tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1135 PetscZero(tmp,aij->N*sizeof(double)); 1136 *norm = 0.0; 1137 v = amat->a; jj = amat->j; 1138 for ( j=0; j<amat->nz; j++ ) { 1139 #if defined(PETSC_COMPLEX) 1140 tmp[cstart + *jj++ + shift] += abs(*v++); 1141 #else 1142 tmp[cstart + *jj++ + shift] += fabs(*v++); 1143 #endif 1144 } 1145 v = bmat->a; jj = bmat->j; 1146 for ( j=0; j<bmat->nz; j++ ) { 1147 #if defined(PETSC_COMPLEX) 1148 tmp[garray[*jj++ + shift]] += abs(*v++); 1149 #else 1150 tmp[garray[*jj++ + shift]] += fabs(*v++); 1151 #endif 1152 } 1153 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1154 for ( j=0; j<aij->N; j++ ) { 1155 if (tmp2[j] > *norm) *norm = tmp2[j]; 1156 } 1157 PETSCFREE(tmp); PETSCFREE(tmp2); 1158 } 1159 else if (type == NORM_INFINITY) { /* max row norm */ 1160 double ntemp = 0.0; 1161 for ( j=0; j<amat->m; j++ ) { 1162 v = amat->a + amat->i[j] + shift; 1163 sum = 0.0; 1164 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1165 #if defined(PETSC_COMPLEX) 1166 sum += abs(*v); v++; 1167 #else 1168 sum += fabs(*v); v++; 1169 #endif 1170 } 1171 v = bmat->a + bmat->i[j] + shift; 1172 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1173 #if defined(PETSC_COMPLEX) 1174 sum += abs(*v); v++; 1175 #else 1176 sum += fabs(*v); v++; 1177 #endif 1178 } 1179 if (sum > ntemp) ntemp = sum; 1180 } 1181 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1182 } 1183 else { 1184 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1185 } 1186 } 1187 return 0; 1188 } 1189 1190 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1191 { 1192 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1193 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1194 int ierr,shift = Aloc->indexshift; 1195 Mat B; 1196 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1197 Scalar *array; 1198 1199 if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1200 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1201 CHKERRQ(ierr); 1202 1203 /* copy over the A part */ 1204 Aloc = (Mat_SeqAIJ*) a->A->data; 1205 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1206 row = a->rstart; 1207 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1208 for ( i=0; i<m; i++ ) { 1209 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1210 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1211 } 1212 aj = Aloc->j; 1213 for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1214 1215 /* copy over the B part */ 1216 Aloc = (Mat_SeqAIJ*) a->B->data; 1217 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1218 row = a->rstart; 1219 ct = cols = (int *) PETSCMALLOC( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1220 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1221 for ( i=0; i<m; i++ ) { 1222 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1223 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1224 } 1225 PETSCFREE(ct); 1226 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1227 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1228 if (matout) { 1229 *matout = B; 1230 } else { 1231 /* This isn't really an in-place transpose .... but free data structures from a */ 1232 PETSCFREE(a->rowners); 1233 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1234 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1235 if (a->colmap) PETSCFREE(a->colmap); 1236 if (a->garray) PETSCFREE(a->garray); 1237 if (a->lvec) VecDestroy(a->lvec); 1238 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1239 PETSCFREE(a); 1240 PetscMemcpy(A,B,sizeof(struct _Mat)); 1241 PETSCHEADERDESTROY(B); 1242 } 1243 return 0; 1244 } 1245 1246 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1247 static int MatCopyPrivate_MPIAIJ(Mat,Mat *,int); 1248 1249 /* -------------------------------------------------------------------*/ 1250 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1251 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1252 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1253 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1254 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1255 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1256 MatLUFactor_MPIAIJ,0, 1257 MatRelax_MPIAIJ, 1258 MatTranspose_MPIAIJ, 1259 MatGetInfo_MPIAIJ,0, 1260 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1261 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1262 0, 1263 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1264 MatGetReordering_MPIAIJ, 1265 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1266 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1267 MatILUFactorSymbolic_MPIAIJ,0, 1268 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 1269 1270 /*@C 1271 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1272 (the default parallel PETSc format). 1273 1274 Input Parameters: 1275 . comm - MPI communicator 1276 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1277 . n - number of local columns (or PETSC_DECIDE to have calculated 1278 if N is given) 1279 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1280 . N - number of global columns (or PETSC_DECIDE to have calculated 1281 if n is given) 1282 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1283 (same for all local rows) 1284 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1285 or null (possibly different for each row). You must leave room 1286 for the diagonal entry even if it is zero. 1287 . o_nz - number of nonzeros per row in off-diagonal portion of local 1288 submatrix (same for all local rows). 1289 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1290 submatrix or null (possibly different for each row). 1291 1292 Output Parameter: 1293 . newmat - the matrix 1294 1295 Notes: 1296 The AIJ format (also called the Yale sparse matrix format or 1297 compressed row storage), is fully compatible with standard Fortran 77 1298 storage. That is, the stored row and column indices can begin at 1299 either one (as in Fortran) or zero. See the users manual for details. 1300 1301 The user MUST specify either the local or global matrix dimensions 1302 (possibly both). 1303 1304 Storage Information: 1305 For a square global matrix we define each processor's diagonal portion 1306 to be its local rows and the corresponding columns (a square submatrix); 1307 each processor's off-diagonal portion encompasses the remainder of the 1308 local matrix (a rectangular submatrix). 1309 1310 The user can specify preallocated storage for the diagonal part of 1311 the local submatrix with either d_nz or d_nnz (not both). Set both 1312 d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1313 Likewise, specify preallocated storage for the off-diagonal part of 1314 the local submatrix with o_nz or o_nnz (not both). 1315 1316 .keywords: matrix, aij, compressed row, sparse, parallel 1317 1318 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1319 @*/ 1320 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1321 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1322 { 1323 Mat mat; 1324 Mat_MPIAIJ *a; 1325 int ierr, i,sum[2],work[2]; 1326 1327 *newmat = 0; 1328 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1329 PLogObjectCreate(mat); 1330 mat->data = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a); 1331 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1332 mat->destroy = MatDestroy_MPIAIJ; 1333 mat->view = MatView_MPIAIJ; 1334 mat->factor = 0; 1335 1336 a->insertmode = NOT_SET_VALUES; 1337 MPI_Comm_rank(comm,&a->rank); 1338 MPI_Comm_size(comm,&a->size); 1339 1340 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) 1341 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1342 1343 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1344 work[0] = m; work[1] = n; 1345 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1346 if (M == PETSC_DECIDE) M = sum[0]; 1347 if (N == PETSC_DECIDE) N = sum[1]; 1348 } 1349 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1350 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1351 a->m = m; 1352 a->n = n; 1353 a->N = N; 1354 a->M = M; 1355 1356 /* build local table of row and column ownerships */ 1357 a->rowners = (int *) PETSCMALLOC(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1358 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 1359 sizeof(Mat_MPIAIJ)); 1360 a->cowners = a->rowners + a->size + 1; 1361 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1362 a->rowners[0] = 0; 1363 for ( i=2; i<=a->size; i++ ) { 1364 a->rowners[i] += a->rowners[i-1]; 1365 } 1366 a->rstart = a->rowners[a->rank]; 1367 a->rend = a->rowners[a->rank+1]; 1368 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1369 a->cowners[0] = 0; 1370 for ( i=2; i<=a->size; i++ ) { 1371 a->cowners[i] += a->cowners[i-1]; 1372 } 1373 a->cstart = a->cowners[a->rank]; 1374 a->cend = a->cowners[a->rank+1]; 1375 1376 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1377 PLogObjectParent(mat,a->A); 1378 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1379 PLogObjectParent(mat,a->B); 1380 1381 /* build cache for off array entries formed */ 1382 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1383 a->colmap = 0; 1384 a->garray = 0; 1385 1386 /* stuff used for matrix vector multiply */ 1387 a->lvec = 0; 1388 a->Mvctx = 0; 1389 a->assembled = 0; 1390 1391 *newmat = mat; 1392 return 0; 1393 } 1394 1395 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1396 { 1397 Mat mat; 1398 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1399 int ierr, len; 1400 1401 if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix"); 1402 *newmat = 0; 1403 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1404 PLogObjectCreate(mat); 1405 mat->data = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a); 1406 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1407 mat->destroy = MatDestroy_MPIAIJ; 1408 mat->view = MatView_MPIAIJ; 1409 mat->factor = matin->factor; 1410 1411 a->m = oldmat->m; 1412 a->n = oldmat->n; 1413 a->M = oldmat->M; 1414 a->N = oldmat->N; 1415 1416 a->assembled = 1; 1417 a->rstart = oldmat->rstart; 1418 a->rend = oldmat->rend; 1419 a->cstart = oldmat->cstart; 1420 a->cend = oldmat->cend; 1421 a->size = oldmat->size; 1422 a->rank = oldmat->rank; 1423 a->insertmode = NOT_SET_VALUES; 1424 1425 a->rowners = (int *) PETSCMALLOC((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1426 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1427 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1428 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1429 if (oldmat->colmap) { 1430 a->colmap = (int *) PETSCMALLOC((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1431 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1432 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1433 } else a->colmap = 0; 1434 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1435 a->garray = (int *) PETSCMALLOC(len*sizeof(int)); CHKPTRQ(a->garray); 1436 PLogObjectMemory(mat,len*sizeof(int)); 1437 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1438 } else a->garray = 0; 1439 1440 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1441 PLogObjectParent(mat,a->lvec); 1442 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1443 PLogObjectParent(mat,a->Mvctx); 1444 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1445 PLogObjectParent(mat,a->A); 1446 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1447 PLogObjectParent(mat,a->B); 1448 *newmat = mat; 1449 return 0; 1450 } 1451 1452 #include "sysio.h" 1453 1454 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat) 1455 { 1456 Mat A; 1457 int i, nz, ierr, j,rstart, rend, fd; 1458 Scalar *vals,*svals; 1459 PetscObject vobj = (PetscObject) bview; 1460 MPI_Comm comm = vobj->comm; 1461 MPI_Status status; 1462 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1463 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1464 int tag = ((PetscObject)bview)->tag; 1465 1466 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1467 if (!rank) { 1468 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1469 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1470 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object"); 1471 } 1472 1473 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1474 M = header[1]; N = header[2]; 1475 /* determine ownership of all rows */ 1476 m = M/size + ((M % size) > rank); 1477 rowners = (int *) PETSCMALLOC((size+2)*sizeof(int)); CHKPTRQ(rowners); 1478 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1479 rowners[0] = 0; 1480 for ( i=2; i<=size; i++ ) { 1481 rowners[i] += rowners[i-1]; 1482 } 1483 rstart = rowners[rank]; 1484 rend = rowners[rank+1]; 1485 1486 /* distribute row lengths to all processors */ 1487 ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1488 offlens = ourlens + (rend-rstart); 1489 if (!rank) { 1490 rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths); 1491 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1492 sndcounts = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(sndcounts); 1493 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1494 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1495 PETSCFREE(sndcounts); 1496 } 1497 else { 1498 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1499 } 1500 1501 if (!rank) { 1502 /* calculate the number of nonzeros on each processor */ 1503 procsnz = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(procsnz); 1504 PetscZero(procsnz,size*sizeof(int)); 1505 for ( i=0; i<size; i++ ) { 1506 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1507 procsnz[i] += rowlengths[j]; 1508 } 1509 } 1510 PETSCFREE(rowlengths); 1511 1512 /* determine max buffer needed and allocate it */ 1513 maxnz = 0; 1514 for ( i=0; i<size; i++ ) { 1515 maxnz = PETSCMAX(maxnz,procsnz[i]); 1516 } 1517 cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols); 1518 1519 /* read in my part of the matrix column indices */ 1520 nz = procsnz[0]; 1521 mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 1522 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1523 1524 /* read in every one elses and ship off */ 1525 for ( i=1; i<size; i++ ) { 1526 nz = procsnz[i]; 1527 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1528 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1529 } 1530 PETSCFREE(cols); 1531 } 1532 else { 1533 /* determine buffer space needed for message */ 1534 nz = 0; 1535 for ( i=0; i<m; i++ ) { 1536 nz += ourlens[i]; 1537 } 1538 mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 1539 1540 /* receive message of column indices*/ 1541 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1542 MPI_Get_count(&status,MPI_INT,&maxnz); 1543 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1544 } 1545 1546 /* loop over local rows, determining number of off diagonal entries */ 1547 PetscZero(offlens,m*sizeof(int)); 1548 jj = 0; 1549 for ( i=0; i<m; i++ ) { 1550 for ( j=0; j<ourlens[i]; j++ ) { 1551 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1552 jj++; 1553 } 1554 } 1555 1556 /* create our matrix */ 1557 for ( i=0; i<m; i++ ) { 1558 ourlens[i] -= offlens[i]; 1559 } 1560 if (type == MATMPIAIJ) { 1561 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1562 } 1563 else if (type == MATMPIROW) { 1564 ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1565 } 1566 A = *newmat; 1567 MatSetOption(A,COLUMNS_SORTED); 1568 for ( i=0; i<m; i++ ) { 1569 ourlens[i] += offlens[i]; 1570 } 1571 1572 if (!rank) { 1573 vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1574 1575 /* read in my part of the matrix numerical values */ 1576 nz = procsnz[0]; 1577 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1578 1579 /* insert into matrix */ 1580 jj = rstart; 1581 smycols = mycols; 1582 svals = vals; 1583 for ( i=0; i<m; i++ ) { 1584 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1585 smycols += ourlens[i]; 1586 svals += ourlens[i]; 1587 jj++; 1588 } 1589 1590 /* read in other processors and ship out */ 1591 for ( i=1; i<size; i++ ) { 1592 nz = procsnz[i]; 1593 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1594 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1595 } 1596 PETSCFREE(procsnz); 1597 } 1598 else { 1599 /* receive numeric values */ 1600 vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1601 1602 /* receive message of values*/ 1603 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1604 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1605 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1606 1607 /* insert into matrix */ 1608 jj = rstart; 1609 smycols = mycols; 1610 svals = vals; 1611 for ( i=0; i<m; i++ ) { 1612 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1613 smycols += ourlens[i]; 1614 svals += ourlens[i]; 1615 jj++; 1616 } 1617 } 1618 PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners); 1619 1620 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1621 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1622 return 0; 1623 } 1624