1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.100 1995/11/25 23:49:23 curfman Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 40 int *idxn,Scalar *v,InsertMode addv) 41 { 42 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 43 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (idxm[i] >= rstart && idxm[i] < rend) { 56 row = idxm[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (idxn[j] >= cstart && idxn[j] < cend){ 61 col = idxn[j] - cstart; 62 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 63 } 64 else { 65 if (aij->assembled) { 66 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 67 col = aij->colmap[idxn[j]] + shift; 68 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 69 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 70 col = idxn[j]; 71 } 72 } 73 else col = idxn[j]; 74 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 75 } 76 } 77 } 78 else { 79 ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr); 80 } 81 } 82 return 0; 83 } 84 85 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 86 { 87 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 88 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 89 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 90 int cstart = aij->cstart, cend = aij->cend,row,col; 91 int shift = C->indexshift; 92 93 if (!aij->assembled) SETERRQ(1,"MatGetValues_MPIAIJ:Not for unassembled matrix"); 94 for ( i=0; i<m; i++ ) { 95 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 96 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 97 if (idxm[i] >= rstart && idxm[i] < rend) { 98 row = idxm[i] - rstart; 99 for ( j=0; j<n; j++ ) { 100 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 101 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 102 if (idxn[j] >= cstart && idxn[j] < cend){ 103 col = idxn[j] - cstart; 104 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 105 } 106 else { 107 col = aij->colmap[idxn[j]] + shift; 108 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 109 } 110 } 111 } 112 else { 113 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 114 } 115 } 116 return 0; 117 } 118 119 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 120 { 121 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 122 MPI_Comm comm = mat->comm; 123 int size = aij->size, *owners = aij->rowners; 124 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 125 MPI_Request *send_waits,*recv_waits; 126 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 127 InsertMode addv; 128 Scalar *rvalues,*svalues; 129 130 /* make sure all processors are either in INSERTMODE or ADDMODE */ 131 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 132 if (addv == (ADD_VALUES|INSERT_VALUES)) { 133 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 134 } 135 aij->insertmode = addv; /* in case this processor had no cache */ 136 137 /* first count number of contributors to each processor */ 138 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 139 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 140 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 141 for ( i=0; i<aij->stash.n; i++ ) { 142 idx = aij->stash.idx[i]; 143 for ( j=0; j<size; j++ ) { 144 if (idx >= owners[j] && idx < owners[j+1]) { 145 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 146 } 147 } 148 } 149 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 150 151 /* inform other processors of number of messages and max length*/ 152 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 153 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 154 nreceives = work[rank]; 155 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 156 nmax = work[rank]; 157 PetscFree(work); 158 159 /* post receives: 160 1) each message will consist of ordered pairs 161 (global index,value) we store the global index as a double 162 to simplify the message passing. 163 2) since we don't know how long each individual message is we 164 allocate the largest needed buffer for each receive. Potentially 165 this is a lot of wasted space. 166 167 168 This could be done better. 169 */ 170 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 171 CHKPTRQ(rvalues); 172 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 173 CHKPTRQ(recv_waits); 174 for ( i=0; i<nreceives; i++ ) { 175 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 176 comm,recv_waits+i); 177 } 178 179 /* do sends: 180 1) starts[i] gives the starting index in svalues for stuff going to 181 the ith processor 182 */ 183 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 184 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 185 CHKPTRQ(send_waits); 186 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 187 starts[0] = 0; 188 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 189 for ( i=0; i<aij->stash.n; i++ ) { 190 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 191 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 192 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 193 } 194 PetscFree(owner); 195 starts[0] = 0; 196 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 197 count = 0; 198 for ( i=0; i<size; i++ ) { 199 if (procs[i]) { 200 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 201 comm,send_waits+count++); 202 } 203 } 204 PetscFree(starts); PetscFree(nprocs); 205 206 /* Free cache space */ 207 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 208 209 aij->svalues = svalues; aij->rvalues = rvalues; 210 aij->nsends = nsends; aij->nrecvs = nreceives; 211 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 212 aij->rmax = nmax; 213 214 return 0; 215 } 216 extern int MatSetUpMultiply_MPIAIJ(Mat); 217 218 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 219 { 220 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 221 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 222 MPI_Status *send_status,recv_status; 223 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 224 int row,col,other_disassembled,shift = C->indexshift; 225 Scalar *values,val; 226 InsertMode addv = aij->insertmode; 227 228 /* wait on receives */ 229 while (count) { 230 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 231 /* unpack receives into our local space */ 232 values = aij->rvalues + 3*imdex*aij->rmax; 233 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 234 n = n/3; 235 for ( i=0; i<n; i++ ) { 236 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 237 col = (int) PETSCREAL(values[3*i+1]); 238 val = values[3*i+2]; 239 if (col >= aij->cstart && col < aij->cend) { 240 col -= aij->cstart; 241 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 242 } 243 else { 244 if (aij->assembled) { 245 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 246 col = aij->colmap[col] + shift; 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 249 col = (int) PETSCREAL(values[3*i+1]); 250 } 251 } 252 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 253 } 254 } 255 count--; 256 } 257 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 258 259 /* wait on sends */ 260 if (aij->nsends) { 261 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 262 CHKPTRQ(send_status); 263 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 264 PetscFree(send_status); 265 } 266 PetscFree(aij->send_waits); PetscFree(aij->svalues); 267 268 aij->insertmode = NOT_SET_VALUES; 269 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 270 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 271 272 /* determine if any processor has disassembled, if so we must 273 also disassemble ourselfs, in order that we may reassemble. */ 274 MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 275 if (aij->assembled && !other_disassembled) { 276 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 277 } 278 279 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 280 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 281 aij->assembled = 1; 282 } 283 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 284 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 285 286 return 0; 287 } 288 289 static int MatZeroEntries_MPIAIJ(Mat A) 290 { 291 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 292 int ierr; 293 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 294 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 295 return 0; 296 } 297 298 /* the code does not do the diagonal entries correctly unless the 299 matrix is square and the column and row owerships are identical. 300 This is a BUG. The only way to fix it seems to be to access 301 aij->A and aij->B directly and not through the MatZeroRows() 302 routine. 303 */ 304 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 305 { 306 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 307 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 308 int *procs,*nprocs,j,found,idx,nsends,*work; 309 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 310 int *rvalues,tag = A->tag,count,base,slen,n,*source; 311 int *lens,imdex,*lrows,*values; 312 MPI_Comm comm = A->comm; 313 MPI_Request *send_waits,*recv_waits; 314 MPI_Status recv_status,*send_status; 315 IS istmp; 316 317 if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix"); 318 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 319 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 320 321 /* first count number of contributors to each processor */ 322 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 323 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 324 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 325 for ( i=0; i<N; i++ ) { 326 idx = rows[i]; 327 found = 0; 328 for ( j=0; j<size; j++ ) { 329 if (idx >= owners[j] && idx < owners[j+1]) { 330 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 331 } 332 } 333 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 334 } 335 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 336 337 /* inform other processors of number of messages and max length*/ 338 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 339 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 340 nrecvs = work[rank]; 341 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 342 nmax = work[rank]; 343 PetscFree(work); 344 345 /* post receives: */ 346 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 347 CHKPTRQ(rvalues); 348 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 349 CHKPTRQ(recv_waits); 350 for ( i=0; i<nrecvs; i++ ) { 351 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 352 } 353 354 /* do sends: 355 1) starts[i] gives the starting index in svalues for stuff going to 356 the ith processor 357 */ 358 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 359 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 360 CHKPTRQ(send_waits); 361 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 362 starts[0] = 0; 363 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 364 for ( i=0; i<N; i++ ) { 365 svalues[starts[owner[i]]++] = rows[i]; 366 } 367 ISRestoreIndices(is,&rows); 368 369 starts[0] = 0; 370 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 371 count = 0; 372 for ( i=0; i<size; i++ ) { 373 if (procs[i]) { 374 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 375 } 376 } 377 PetscFree(starts); 378 379 base = owners[rank]; 380 381 /* wait on receives */ 382 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 383 source = lens + nrecvs; 384 count = nrecvs; slen = 0; 385 while (count) { 386 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 387 /* unpack receives into our local space */ 388 MPI_Get_count(&recv_status,MPI_INT,&n); 389 source[imdex] = recv_status.MPI_SOURCE; 390 lens[imdex] = n; 391 slen += n; 392 count--; 393 } 394 PetscFree(recv_waits); 395 396 /* move the data into the send scatter */ 397 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 398 count = 0; 399 for ( i=0; i<nrecvs; i++ ) { 400 values = rvalues + i*nmax; 401 for ( j=0; j<lens[i]; j++ ) { 402 lrows[count++] = values[j] - base; 403 } 404 } 405 PetscFree(rvalues); PetscFree(lens); 406 PetscFree(owner); PetscFree(nprocs); 407 408 /* actually zap the local rows */ 409 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 410 PLogObjectParent(A,istmp); 411 PetscFree(lrows); 412 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 413 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 414 ierr = ISDestroy(istmp); CHKERRQ(ierr); 415 416 /* wait on sends */ 417 if (nsends) { 418 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 419 CHKPTRQ(send_status); 420 MPI_Waitall(nsends,send_waits,send_status); 421 PetscFree(send_status); 422 } 423 PetscFree(send_waits); PetscFree(svalues); 424 425 return 0; 426 } 427 428 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 429 { 430 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 431 int ierr; 432 433 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 434 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 435 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 436 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 437 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 438 return 0; 439 } 440 441 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 442 { 443 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 444 int ierr; 445 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 446 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 447 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 448 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 449 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 450 return 0; 451 } 452 453 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 454 { 455 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 456 int ierr; 457 458 if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix"); 459 /* do nondiagonal part */ 460 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 461 /* send it on its way */ 462 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 463 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 464 /* do local part */ 465 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 466 /* receive remote parts: note this assumes the values are not actually */ 467 /* inserted in yy until the next line, which is true for my implementation*/ 468 /* but is not perhaps always true. */ 469 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 470 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 471 return 0; 472 } 473 474 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 475 { 476 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 477 int ierr; 478 479 if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix"); 480 /* do nondiagonal part */ 481 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 482 /* send it on its way */ 483 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 484 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 485 /* do local part */ 486 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 487 /* receive remote parts: note this assumes the values are not actually */ 488 /* inserted in yy until the next line, which is true for my implementation*/ 489 /* but is not perhaps always true. */ 490 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 491 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 492 return 0; 493 } 494 495 /* 496 This only works correctly for square matrices where the subblock A->A is the 497 diagonal block 498 */ 499 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 500 { 501 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 502 if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix"); 503 return MatGetDiagonal(a->A,v); 504 } 505 506 static int MatDestroy_MPIAIJ(PetscObject obj) 507 { 508 Mat mat = (Mat) obj; 509 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 510 int ierr; 511 #if defined(PETSC_LOG) 512 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 513 #endif 514 PetscFree(aij->rowners); 515 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 516 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 517 if (aij->colmap) PetscFree(aij->colmap); 518 if (aij->garray) PetscFree(aij->garray); 519 if (aij->lvec) VecDestroy(aij->lvec); 520 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 521 PetscFree(aij); 522 PLogObjectDestroy(mat); 523 PetscHeaderDestroy(mat); 524 return 0; 525 } 526 #include "draw.h" 527 #include "pinclude/pviewer.h" 528 529 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 532 int ierr; 533 534 if (aij->size == 1) { 535 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 536 } 537 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 538 return 0; 539 } 540 541 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 542 { 543 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 544 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 545 int ierr, format,shift = C->indexshift,rank; 546 PetscObject vobj = (PetscObject) viewer; 547 FILE *fd; 548 549 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 550 ierr = ViewerFileGetFormat_Private(viewer,&format); 551 if (format == FILE_FORMAT_INFO_DETAILED) { 552 int nz,nzalloc,mem; 553 MPI_Comm_rank(mat->comm,&rank); 554 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 555 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 556 MPIU_Seq_begin(mat->comm,1); 557 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 558 nzalloc,mem); 559 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 560 fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 561 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 562 fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 563 fflush(fd); 564 MPIU_Seq_end(mat->comm,1); 565 ierr = VecScatterView(aij->Mvctx,viewer); 566 return 0; 567 } 568 else if (format == FILE_FORMAT_INFO) { 569 return 0; 570 } 571 } 572 573 if (vobj->type == ASCII_FILE_VIEWER) { 574 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 575 MPIU_Seq_begin(mat->comm,1); 576 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 577 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 578 aij->cend); 579 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 580 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 581 fflush(fd); 582 MPIU_Seq_end(mat->comm,1); 583 } 584 else { 585 int size = aij->size; 586 rank = aij->rank; 587 if (size == 1) { 588 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 589 } 590 else { 591 /* assemble the entire matrix onto first processor. */ 592 Mat A; 593 Mat_SeqAIJ *Aloc; 594 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 595 Scalar *a; 596 597 if (!rank) { 598 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PetscNull,0,PetscNull,&A); 599 CHKERRQ(ierr); 600 } 601 else { 602 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PetscNull,0,PetscNull,&A); 603 CHKERRQ(ierr); 604 } 605 PLogObjectParent(mat,A); 606 607 /* copy over the A part */ 608 Aloc = (Mat_SeqAIJ*) aij->A->data; 609 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 610 row = aij->rstart; 611 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 612 for ( i=0; i<m; i++ ) { 613 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 614 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 615 } 616 aj = Aloc->j; 617 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 618 619 /* copy over the B part */ 620 Aloc = (Mat_SeqAIJ*) aij->B->data; 621 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 622 row = aij->rstart; 623 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 624 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 625 for ( i=0; i<m; i++ ) { 626 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 627 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 628 } 629 PetscFree(ct); 630 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 631 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 632 if (!rank) { 633 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 634 } 635 ierr = MatDestroy(A); CHKERRQ(ierr); 636 } 637 } 638 return 0; 639 } 640 641 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 642 { 643 Mat mat = (Mat) obj; 644 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 645 int ierr; 646 PetscObject vobj = (PetscObject) viewer; 647 648 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix"); 649 if (!viewer) { 650 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 651 } 652 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 653 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 654 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 655 } 656 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 657 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 658 } 659 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 660 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 661 } 662 else if (vobj->cookie == DRAW_COOKIE) { 663 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 664 } 665 else if (vobj->type == BINARY_FILE_VIEWER) { 666 return MatView_MPIAIJ_Binary(mat,viewer); 667 } 668 return 0; 669 } 670 671 extern int MatMarkDiag_SeqAIJ(Mat); 672 /* 673 This has to provide several versions. 674 675 1) per sequential 676 2) a) use only local smoothing updating outer values only once. 677 b) local smoothing updating outer values each inner iteration 678 3) color updating out values betwen colors. 679 */ 680 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 681 double fshift,int its,Vec xx) 682 { 683 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 684 Mat AA = mat->A, BB = mat->B; 685 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 686 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 687 int ierr,*idx, *diag; 688 int n = mat->n, m = mat->m, i,shift = A->indexshift; 689 Vec tt; 690 691 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix"); 692 693 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 694 xs = x + shift; /* shift by one for index start of 1 */ 695 ls = ls + shift; 696 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 697 diag = A->diag; 698 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 699 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 700 } 701 if (flag & SOR_EISENSTAT) { 702 /* Let A = L + U + D; where L is lower trianglar, 703 U is upper triangular, E is diagonal; This routine applies 704 705 (L + E)^{-1} A (U + E)^{-1} 706 707 to a vector efficiently using Eisenstat's trick. This is for 708 the case of SSOR preconditioner, so E is D/omega where omega 709 is the relaxation factor. 710 */ 711 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 712 PLogObjectParent(matin,tt); 713 VecGetArray(tt,&t); 714 scale = (2.0/omega) - 1.0; 715 /* x = (E + U)^{-1} b */ 716 VecSet(&zero,mat->lvec); 717 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 718 mat->Mvctx); CHKERRQ(ierr); 719 for ( i=m-1; i>-1; i-- ) { 720 n = A->i[i+1] - diag[i] - 1; 721 idx = A->j + diag[i] + !shift; 722 v = A->a + diag[i] + !shift; 723 sum = b[i]; 724 SPARSEDENSEMDOT(sum,xs,v,idx,n); 725 d = fshift + A->a[diag[i]+shift]; 726 n = B->i[i+1] - B->i[i]; 727 idx = B->j + B->i[i] + shift; 728 v = B->a + B->i[i] + shift; 729 SPARSEDENSEMDOT(sum,ls,v,idx,n); 730 x[i] = omega*(sum/d); 731 } 732 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 733 mat->Mvctx); CHKERRQ(ierr); 734 735 /* t = b - (2*E - D)x */ 736 v = A->a; 737 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 738 739 /* t = (E + L)^{-1}t */ 740 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 741 diag = A->diag; 742 VecSet(&zero,mat->lvec); 743 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 744 mat->Mvctx); CHKERRQ(ierr); 745 for ( i=0; i<m; i++ ) { 746 n = diag[i] - A->i[i]; 747 idx = A->j + A->i[i] + shift; 748 v = A->a + A->i[i] + shift; 749 sum = t[i]; 750 SPARSEDENSEMDOT(sum,ts,v,idx,n); 751 d = fshift + A->a[diag[i]+shift]; 752 n = B->i[i+1] - B->i[i]; 753 idx = B->j + B->i[i] + shift; 754 v = B->a + B->i[i] + shift; 755 SPARSEDENSEMDOT(sum,ls,v,idx,n); 756 t[i] = omega*(sum/d); 757 } 758 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 759 mat->Mvctx); CHKERRQ(ierr); 760 /* x = x + t */ 761 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 762 VecDestroy(tt); 763 return 0; 764 } 765 766 767 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 768 if (flag & SOR_ZERO_INITIAL_GUESS) { 769 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 770 } 771 else { 772 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 773 CHKERRQ(ierr); 774 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 775 CHKERRQ(ierr); 776 } 777 while (its--) { 778 /* go down through the rows */ 779 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 780 mat->Mvctx); CHKERRQ(ierr); 781 for ( i=0; i<m; i++ ) { 782 n = A->i[i+1] - A->i[i]; 783 idx = A->j + A->i[i] + shift; 784 v = A->a + A->i[i] + shift; 785 sum = b[i]; 786 SPARSEDENSEMDOT(sum,xs,v,idx,n); 787 d = fshift + A->a[diag[i]+shift]; 788 n = B->i[i+1] - B->i[i]; 789 idx = B->j + B->i[i] + shift; 790 v = B->a + B->i[i] + shift; 791 SPARSEDENSEMDOT(sum,ls,v,idx,n); 792 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 793 } 794 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 795 mat->Mvctx); CHKERRQ(ierr); 796 /* come up through the rows */ 797 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 798 mat->Mvctx); CHKERRQ(ierr); 799 for ( i=m-1; i>-1; i-- ) { 800 n = A->i[i+1] - A->i[i]; 801 idx = A->j + A->i[i] + shift; 802 v = A->a + A->i[i] + shift; 803 sum = b[i]; 804 SPARSEDENSEMDOT(sum,xs,v,idx,n); 805 d = fshift + A->a[diag[i]+shift]; 806 n = B->i[i+1] - B->i[i]; 807 idx = B->j + B->i[i] + shift; 808 v = B->a + B->i[i] + shift; 809 SPARSEDENSEMDOT(sum,ls,v,idx,n); 810 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 811 } 812 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 813 mat->Mvctx); CHKERRQ(ierr); 814 } 815 } 816 else if (flag & SOR_FORWARD_SWEEP){ 817 if (flag & SOR_ZERO_INITIAL_GUESS) { 818 VecSet(&zero,mat->lvec); 819 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 820 mat->Mvctx); CHKERRQ(ierr); 821 for ( i=0; i<m; i++ ) { 822 n = diag[i] - A->i[i]; 823 idx = A->j + A->i[i] + shift; 824 v = A->a + A->i[i] + shift; 825 sum = b[i]; 826 SPARSEDENSEMDOT(sum,xs,v,idx,n); 827 d = fshift + A->a[diag[i]+shift]; 828 n = B->i[i+1] - B->i[i]; 829 idx = B->j + B->i[i] + shift; 830 v = B->a + B->i[i] + shift; 831 SPARSEDENSEMDOT(sum,ls,v,idx,n); 832 x[i] = omega*(sum/d); 833 } 834 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 835 mat->Mvctx); CHKERRQ(ierr); 836 its--; 837 } 838 while (its--) { 839 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 840 CHKERRQ(ierr); 841 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 842 CHKERRQ(ierr); 843 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 844 mat->Mvctx); CHKERRQ(ierr); 845 for ( i=0; i<m; i++ ) { 846 n = A->i[i+1] - A->i[i]; 847 idx = A->j + A->i[i] + shift; 848 v = A->a + A->i[i] + shift; 849 sum = b[i]; 850 SPARSEDENSEMDOT(sum,xs,v,idx,n); 851 d = fshift + A->a[diag[i]+shift]; 852 n = B->i[i+1] - B->i[i]; 853 idx = B->j + B->i[i] + shift; 854 v = B->a + B->i[i] + shift; 855 SPARSEDENSEMDOT(sum,ls,v,idx,n); 856 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 857 } 858 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 859 mat->Mvctx); CHKERRQ(ierr); 860 } 861 } 862 else if (flag & SOR_BACKWARD_SWEEP){ 863 if (flag & SOR_ZERO_INITIAL_GUESS) { 864 VecSet(&zero,mat->lvec); 865 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 866 mat->Mvctx); CHKERRQ(ierr); 867 for ( i=m-1; i>-1; i-- ) { 868 n = A->i[i+1] - diag[i] - 1; 869 idx = A->j + diag[i] + !shift; 870 v = A->a + diag[i] + !shift; 871 sum = b[i]; 872 SPARSEDENSEMDOT(sum,xs,v,idx,n); 873 d = fshift + A->a[diag[i]+shift]; 874 n = B->i[i+1] - B->i[i]; 875 idx = B->j + B->i[i] + shift; 876 v = B->a + B->i[i] + shift; 877 SPARSEDENSEMDOT(sum,ls,v,idx,n); 878 x[i] = omega*(sum/d); 879 } 880 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 881 mat->Mvctx); CHKERRQ(ierr); 882 its--; 883 } 884 while (its--) { 885 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 886 mat->Mvctx); CHKERRQ(ierr); 887 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 888 mat->Mvctx); CHKERRQ(ierr); 889 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 890 mat->Mvctx); CHKERRQ(ierr); 891 for ( i=m-1; i>-1; i-- ) { 892 n = A->i[i+1] - A->i[i]; 893 idx = A->j + A->i[i] + shift; 894 v = A->a + A->i[i] + shift; 895 sum = b[i]; 896 SPARSEDENSEMDOT(sum,xs,v,idx,n); 897 d = fshift + A->a[diag[i]+shift]; 898 n = B->i[i+1] - B->i[i]; 899 idx = B->j + B->i[i] + shift; 900 v = B->a + B->i[i] + shift; 901 SPARSEDENSEMDOT(sum,ls,v,idx,n); 902 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 903 } 904 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 905 mat->Mvctx); CHKERRQ(ierr); 906 } 907 } 908 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 909 if (flag & SOR_ZERO_INITIAL_GUESS) { 910 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 911 } 912 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 913 CHKERRQ(ierr); 914 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 915 CHKERRQ(ierr); 916 while (its--) { 917 /* go down through the rows */ 918 for ( i=0; i<m; i++ ) { 919 n = A->i[i+1] - A->i[i]; 920 idx = A->j + A->i[i] + shift; 921 v = A->a + A->i[i] + shift; 922 sum = b[i]; 923 SPARSEDENSEMDOT(sum,xs,v,idx,n); 924 d = fshift + A->a[diag[i]+shift]; 925 n = B->i[i+1] - B->i[i]; 926 idx = B->j + B->i[i] + shift; 927 v = B->a + B->i[i] + shift; 928 SPARSEDENSEMDOT(sum,ls,v,idx,n); 929 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 930 } 931 /* come up through the rows */ 932 for ( i=m-1; i>-1; i-- ) { 933 n = A->i[i+1] - A->i[i]; 934 idx = A->j + A->i[i] + shift; 935 v = A->a + A->i[i] + shift; 936 sum = b[i]; 937 SPARSEDENSEMDOT(sum,xs,v,idx,n); 938 d = fshift + A->a[diag[i]+shift]; 939 n = B->i[i+1] - B->i[i]; 940 idx = B->j + B->i[i] + shift; 941 v = B->a + B->i[i] + shift; 942 SPARSEDENSEMDOT(sum,ls,v,idx,n); 943 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 944 } 945 } 946 } 947 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 948 if (flag & SOR_ZERO_INITIAL_GUESS) { 949 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 950 } 951 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 952 CHKERRQ(ierr); 953 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 954 CHKERRQ(ierr); 955 while (its--) { 956 for ( i=0; i<m; i++ ) { 957 n = A->i[i+1] - A->i[i]; 958 idx = A->j + A->i[i] + shift; 959 v = A->a + A->i[i] + shift; 960 sum = b[i]; 961 SPARSEDENSEMDOT(sum,xs,v,idx,n); 962 d = fshift + A->a[diag[i]+shift]; 963 n = B->i[i+1] - B->i[i]; 964 idx = B->j + B->i[i] + shift; 965 v = B->a + B->i[i] + shift; 966 SPARSEDENSEMDOT(sum,ls,v,idx,n); 967 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 968 } 969 } 970 } 971 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 972 if (flag & SOR_ZERO_INITIAL_GUESS) { 973 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 974 } 975 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 976 mat->Mvctx); CHKERRQ(ierr); 977 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 978 mat->Mvctx); CHKERRQ(ierr); 979 while (its--) { 980 for ( i=m-1; i>-1; i-- ) { 981 n = A->i[i+1] - A->i[i]; 982 idx = A->j + A->i[i] + shift; 983 v = A->a + A->i[i] + shift; 984 sum = b[i]; 985 SPARSEDENSEMDOT(sum,xs,v,idx,n); 986 d = fshift + A->a[diag[i]+shift]; 987 n = B->i[i+1] - B->i[i]; 988 idx = B->j + B->i[i] + shift; 989 v = B->a + B->i[i] + shift; 990 SPARSEDENSEMDOT(sum,ls,v,idx,n); 991 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 992 } 993 } 994 } 995 return 0; 996 } 997 998 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 999 int *nzalloc,int *mem) 1000 { 1001 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1002 Mat A = mat->A, B = mat->B; 1003 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1004 1005 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1006 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1007 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1008 if (flag == MAT_LOCAL) { 1009 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1010 } else if (flag == MAT_GLOBAL_MAX) { 1011 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1012 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1013 } else if (flag == MAT_GLOBAL_SUM) { 1014 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1015 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1016 } 1017 return 0; 1018 } 1019 1020 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1021 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1022 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1023 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1024 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1025 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1026 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1027 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1028 1029 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1030 { 1031 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1032 1033 if (op == NO_NEW_NONZERO_LOCATIONS || 1034 op == YES_NEW_NONZERO_LOCATIONS || 1035 op == COLUMNS_SORTED || 1036 op == ROW_ORIENTED) { 1037 MatSetOption(a->A,op); 1038 MatSetOption(a->B,op); 1039 } 1040 else if (op == ROWS_SORTED || 1041 op == SYMMETRIC_MATRIX || 1042 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1043 op == YES_NEW_DIAGONALS) 1044 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1045 else if (op == COLUMN_ORIENTED) 1046 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");} 1047 else if (op == NO_NEW_DIAGONALS) 1048 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1049 else 1050 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1051 return 0; 1052 } 1053 1054 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1055 { 1056 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1057 *m = mat->M; *n = mat->N; 1058 return 0; 1059 } 1060 1061 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1062 { 1063 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1064 *m = mat->m; *n = mat->N; 1065 return 0; 1066 } 1067 1068 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1069 { 1070 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1071 *m = mat->rstart; *n = mat->rend; 1072 return 0; 1073 } 1074 1075 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1076 { 1077 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1078 Scalar *vworkA, *vworkB, **pvA, **pvB; 1079 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1080 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1081 1082 if (!mat->assembled) SETERRQ(1,"MatGetRow_MPIAIJ:Must assemble matrix"); 1083 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1084 lrow = row - rstart; 1085 1086 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1087 if (!v) {pvA = 0; pvB = 0;} 1088 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1089 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1090 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1091 nztot = nzA + nzB; 1092 1093 if (v || idx) { 1094 if (nztot) { 1095 /* Sort by increasing column numbers, assuming A and B already sorted */ 1096 int imark; 1097 if (mat->assembled) { 1098 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1099 } 1100 if (v) { 1101 *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1102 for ( i=0; i<nzB; i++ ) { 1103 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1104 else break; 1105 } 1106 imark = i; 1107 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1108 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1109 } 1110 if (idx) { 1111 *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1112 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1113 for ( i=0; i<nzB; i++ ) { 1114 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1115 else break; 1116 } 1117 imark = i; 1118 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1119 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1120 } 1121 } 1122 else {*idx = 0; *v=0;} 1123 } 1124 *nz = nztot; 1125 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1126 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1127 return 0; 1128 } 1129 1130 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1131 { 1132 if (idx) PetscFree(*idx); 1133 if (v) PetscFree(*v); 1134 return 0; 1135 } 1136 1137 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1138 { 1139 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1140 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1141 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1142 double sum = 0.0; 1143 Scalar *v; 1144 1145 if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix"); 1146 if (aij->size == 1) { 1147 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1148 } else { 1149 if (type == NORM_FROBENIUS) { 1150 v = amat->a; 1151 for (i=0; i<amat->nz; i++ ) { 1152 #if defined(PETSC_COMPLEX) 1153 sum += real(conj(*v)*(*v)); v++; 1154 #else 1155 sum += (*v)*(*v); v++; 1156 #endif 1157 } 1158 v = bmat->a; 1159 for (i=0; i<bmat->nz; i++ ) { 1160 #if defined(PETSC_COMPLEX) 1161 sum += real(conj(*v)*(*v)); v++; 1162 #else 1163 sum += (*v)*(*v); v++; 1164 #endif 1165 } 1166 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1167 *norm = sqrt(*norm); 1168 } 1169 else if (type == NORM_1) { /* max column norm */ 1170 double *tmp, *tmp2; 1171 int *jj, *garray = aij->garray; 1172 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1173 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1174 PetscMemzero(tmp,aij->N*sizeof(double)); 1175 *norm = 0.0; 1176 v = amat->a; jj = amat->j; 1177 for ( j=0; j<amat->nz; j++ ) { 1178 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1179 } 1180 v = bmat->a; jj = bmat->j; 1181 for ( j=0; j<bmat->nz; j++ ) { 1182 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1183 } 1184 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1185 for ( j=0; j<aij->N; j++ ) { 1186 if (tmp2[j] > *norm) *norm = tmp2[j]; 1187 } 1188 PetscFree(tmp); PetscFree(tmp2); 1189 } 1190 else if (type == NORM_INFINITY) { /* max row norm */ 1191 double ntemp = 0.0; 1192 for ( j=0; j<amat->m; j++ ) { 1193 v = amat->a + amat->i[j] + shift; 1194 sum = 0.0; 1195 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1196 sum += PetscAbsScalar(*v); v++; 1197 } 1198 v = bmat->a + bmat->i[j] + shift; 1199 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1200 sum += PetscAbsScalar(*v); v++; 1201 } 1202 if (sum > ntemp) ntemp = sum; 1203 } 1204 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1205 } 1206 else { 1207 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1208 } 1209 } 1210 return 0; 1211 } 1212 1213 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1214 { 1215 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1216 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1217 int ierr,shift = Aloc->indexshift; 1218 Mat B; 1219 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1220 Scalar *array; 1221 1222 if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1223 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PetscNull,0, 1224 PetscNull,&B); CHKERRQ(ierr); 1225 1226 /* copy over the A part */ 1227 Aloc = (Mat_SeqAIJ*) a->A->data; 1228 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1229 row = a->rstart; 1230 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1231 for ( i=0; i<m; i++ ) { 1232 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1233 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1234 } 1235 aj = Aloc->j; 1236 for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1237 1238 /* copy over the B part */ 1239 Aloc = (Mat_SeqAIJ*) a->B->data; 1240 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1241 row = a->rstart; 1242 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1243 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1244 for ( i=0; i<m; i++ ) { 1245 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1246 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1247 } 1248 PetscFree(ct); 1249 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1250 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1251 if (matout) { 1252 *matout = B; 1253 } else { 1254 /* This isn't really an in-place transpose .... but free data structures from a */ 1255 PetscFree(a->rowners); 1256 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1257 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1258 if (a->colmap) PetscFree(a->colmap); 1259 if (a->garray) PetscFree(a->garray); 1260 if (a->lvec) VecDestroy(a->lvec); 1261 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1262 PetscFree(a); 1263 PetscMemcpy(A,B,sizeof(struct _Mat)); 1264 PetscHeaderDestroy(B); 1265 } 1266 return 0; 1267 } 1268 1269 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1270 static int MatCopyPrivate_MPIAIJ(Mat,Mat *,int); 1271 1272 /* -------------------------------------------------------------------*/ 1273 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1274 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1275 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1276 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1277 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1278 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1279 MatLUFactor_MPIAIJ,0, 1280 MatRelax_MPIAIJ, 1281 MatTranspose_MPIAIJ, 1282 MatGetInfo_MPIAIJ,0, 1283 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1284 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1285 0, 1286 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1287 MatGetReordering_MPIAIJ, 1288 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1289 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1290 MatILUFactorSymbolic_MPIAIJ,0, 1291 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ,0,0, 1292 0,0,0, 1293 0,0,MatGetValues_MPIAIJ}; 1294 1295 /*@C 1296 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1297 (the default parallel PETSc format). 1298 1299 Input Parameters: 1300 . comm - MPI communicator 1301 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1302 . n - number of local columns (or PETSC_DECIDE to have calculated 1303 if N is given) 1304 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1305 . N - number of global columns (or PETSC_DECIDE to have calculated 1306 if n is given) 1307 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1308 (same for all local rows) 1309 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1310 or null (possibly different for each row). You must leave room 1311 for the diagonal entry even if it is zero. 1312 . o_nz - number of nonzeros per row in off-diagonal portion of local 1313 submatrix (same for all local rows). 1314 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1315 submatrix or null (possibly different for each row). 1316 1317 Output Parameter: 1318 . newmat - the matrix 1319 1320 Notes: 1321 The AIJ format (also called the Yale sparse matrix format or 1322 compressed row storage), is fully compatible with standard Fortran 77 1323 storage. That is, the stored row and column indices can begin at 1324 either one (as in Fortran) or zero. See the users manual for details. 1325 1326 The user MUST specify either the local or global matrix dimensions 1327 (possibly both). 1328 1329 Storage Information: 1330 For a square global matrix we define each processor's diagonal portion 1331 to be its local rows and the corresponding columns (a square submatrix); 1332 each processor's off-diagonal portion encompasses the remainder of the 1333 local matrix (a rectangular submatrix). 1334 1335 The user can specify preallocated storage for the diagonal part of 1336 the local submatrix with either d_nz or d_nnz (not both). Set d_nz=0 1337 and d_nnz=PetscNull for PETSc to control dynamic memory allocation. 1338 Likewise, specify preallocated storage for the off-diagonal part of 1339 the local submatrix with o_nz or o_nnz (not both). 1340 1341 By default, this format uses inodes (identical nodes) when possible. 1342 We search for consecutive rows with the same nonzero structure, thereby 1343 reusing matrix information to achieve increased efficiency. 1344 1345 Options Database Keys: 1346 $ -mat_aij_no_inode - Do not use inodes 1347 $ -mat_aij_inode_limit <limit> - Set inode limit (max limit=5) 1348 1349 .keywords: matrix, aij, compressed row, sparse, parallel 1350 1351 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1352 @*/ 1353 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1354 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 1355 { 1356 Mat mat; 1357 Mat_MPIAIJ *a; 1358 int ierr, i,sum[2],work[2]; 1359 1360 *newmat = 0; 1361 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1362 PLogObjectCreate(mat); 1363 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1364 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1365 mat->destroy = MatDestroy_MPIAIJ; 1366 mat->view = MatView_MPIAIJ; 1367 mat->factor = 0; 1368 1369 a->insertmode = NOT_SET_VALUES; 1370 MPI_Comm_rank(comm,&a->rank); 1371 MPI_Comm_size(comm,&a->size); 1372 1373 if (m == PETSC_DECIDE && (d_nnz != PetscNull || o_nnz != PetscNull)) 1374 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1375 1376 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1377 work[0] = m; work[1] = n; 1378 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1379 if (M == PETSC_DECIDE) M = sum[0]; 1380 if (N == PETSC_DECIDE) N = sum[1]; 1381 } 1382 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1383 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1384 a->m = m; 1385 a->n = n; 1386 a->N = N; 1387 a->M = M; 1388 1389 /* build local table of row and column ownerships */ 1390 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1391 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1392 a->cowners = a->rowners + a->size + 1; 1393 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1394 a->rowners[0] = 0; 1395 for ( i=2; i<=a->size; i++ ) { 1396 a->rowners[i] += a->rowners[i-1]; 1397 } 1398 a->rstart = a->rowners[a->rank]; 1399 a->rend = a->rowners[a->rank+1]; 1400 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1401 a->cowners[0] = 0; 1402 for ( i=2; i<=a->size; i++ ) { 1403 a->cowners[i] += a->cowners[i-1]; 1404 } 1405 a->cstart = a->cowners[a->rank]; 1406 a->cend = a->cowners[a->rank+1]; 1407 1408 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1409 PLogObjectParent(mat,a->A); 1410 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1411 PLogObjectParent(mat,a->B); 1412 1413 /* build cache for off array entries formed */ 1414 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1415 a->colmap = 0; 1416 a->garray = 0; 1417 1418 /* stuff used for matrix vector multiply */ 1419 a->lvec = 0; 1420 a->Mvctx = 0; 1421 a->assembled = 0; 1422 1423 *newmat = mat; 1424 return 0; 1425 } 1426 1427 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1428 { 1429 Mat mat; 1430 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1431 int ierr, len; 1432 1433 if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix"); 1434 *newmat = 0; 1435 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1436 PLogObjectCreate(mat); 1437 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1438 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1439 mat->destroy = MatDestroy_MPIAIJ; 1440 mat->view = MatView_MPIAIJ; 1441 mat->factor = matin->factor; 1442 1443 a->m = oldmat->m; 1444 a->n = oldmat->n; 1445 a->M = oldmat->M; 1446 a->N = oldmat->N; 1447 1448 a->assembled = 1; 1449 a->rstart = oldmat->rstart; 1450 a->rend = oldmat->rend; 1451 a->cstart = oldmat->cstart; 1452 a->cend = oldmat->cend; 1453 a->size = oldmat->size; 1454 a->rank = oldmat->rank; 1455 a->insertmode = NOT_SET_VALUES; 1456 1457 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1458 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1459 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1460 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1461 if (oldmat->colmap) { 1462 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1463 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1464 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1465 } else a->colmap = 0; 1466 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1467 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1468 PLogObjectMemory(mat,len*sizeof(int)); 1469 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1470 } else a->garray = 0; 1471 1472 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1473 PLogObjectParent(mat,a->lvec); 1474 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1475 PLogObjectParent(mat,a->Mvctx); 1476 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1477 PLogObjectParent(mat,a->A); 1478 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1479 PLogObjectParent(mat,a->B); 1480 *newmat = mat; 1481 return 0; 1482 } 1483 1484 #include "sysio.h" 1485 1486 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat) 1487 { 1488 Mat A; 1489 int i, nz, ierr, j,rstart, rend, fd; 1490 Scalar *vals,*svals; 1491 PetscObject vobj = (PetscObject) bview; 1492 MPI_Comm comm = vobj->comm; 1493 MPI_Status status; 1494 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1495 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1496 int tag = ((PetscObject)bview)->tag; 1497 1498 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1499 if (!rank) { 1500 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1501 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1502 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object"); 1503 } 1504 1505 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1506 M = header[1]; N = header[2]; 1507 /* determine ownership of all rows */ 1508 m = M/size + ((M % size) > rank); 1509 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1510 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1511 rowners[0] = 0; 1512 for ( i=2; i<=size; i++ ) { 1513 rowners[i] += rowners[i-1]; 1514 } 1515 rstart = rowners[rank]; 1516 rend = rowners[rank+1]; 1517 1518 /* distribute row lengths to all processors */ 1519 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1520 offlens = ourlens + (rend-rstart); 1521 if (!rank) { 1522 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1523 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1524 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1525 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1526 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1527 PetscFree(sndcounts); 1528 } 1529 else { 1530 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1531 } 1532 1533 if (!rank) { 1534 /* calculate the number of nonzeros on each processor */ 1535 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1536 PetscMemzero(procsnz,size*sizeof(int)); 1537 for ( i=0; i<size; i++ ) { 1538 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1539 procsnz[i] += rowlengths[j]; 1540 } 1541 } 1542 PetscFree(rowlengths); 1543 1544 /* determine max buffer needed and allocate it */ 1545 maxnz = 0; 1546 for ( i=0; i<size; i++ ) { 1547 maxnz = PetscMax(maxnz,procsnz[i]); 1548 } 1549 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1550 1551 /* read in my part of the matrix column indices */ 1552 nz = procsnz[0]; 1553 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1554 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1555 1556 /* read in every one elses and ship off */ 1557 for ( i=1; i<size; i++ ) { 1558 nz = procsnz[i]; 1559 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1560 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1561 } 1562 PetscFree(cols); 1563 } 1564 else { 1565 /* determine buffer space needed for message */ 1566 nz = 0; 1567 for ( i=0; i<m; i++ ) { 1568 nz += ourlens[i]; 1569 } 1570 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1571 1572 /* receive message of column indices*/ 1573 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1574 MPI_Get_count(&status,MPI_INT,&maxnz); 1575 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1576 } 1577 1578 /* loop over local rows, determining number of off diagonal entries */ 1579 PetscMemzero(offlens,m*sizeof(int)); 1580 jj = 0; 1581 for ( i=0; i<m; i++ ) { 1582 for ( j=0; j<ourlens[i]; j++ ) { 1583 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1584 jj++; 1585 } 1586 } 1587 1588 /* create our matrix */ 1589 for ( i=0; i<m; i++ ) { 1590 ourlens[i] -= offlens[i]; 1591 } 1592 if (type == MATMPIAIJ) { 1593 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1594 } 1595 else if (type == MATMPIROW) { 1596 ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1597 } 1598 A = *newmat; 1599 MatSetOption(A,COLUMNS_SORTED); 1600 for ( i=0; i<m; i++ ) { 1601 ourlens[i] += offlens[i]; 1602 } 1603 1604 if (!rank) { 1605 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1606 1607 /* read in my part of the matrix numerical values */ 1608 nz = procsnz[0]; 1609 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1610 1611 /* insert into matrix */ 1612 jj = rstart; 1613 smycols = mycols; 1614 svals = vals; 1615 for ( i=0; i<m; i++ ) { 1616 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1617 smycols += ourlens[i]; 1618 svals += ourlens[i]; 1619 jj++; 1620 } 1621 1622 /* read in other processors and ship out */ 1623 for ( i=1; i<size; i++ ) { 1624 nz = procsnz[i]; 1625 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1626 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1627 } 1628 PetscFree(procsnz); 1629 } 1630 else { 1631 /* receive numeric values */ 1632 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1633 1634 /* receive message of values*/ 1635 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1636 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1637 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1638 1639 /* insert into matrix */ 1640 jj = rstart; 1641 smycols = mycols; 1642 svals = vals; 1643 for ( i=0; i<m; i++ ) { 1644 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1645 smycols += ourlens[i]; 1646 svals += ourlens[i]; 1647 jj++; 1648 } 1649 } 1650 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1651 1652 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1653 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1654 return 0; 1655 } 1656