1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.113 1996/01/23 00:44:13 balay Exp balay $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (aij->assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 if (!aij->assembled) SETERRQ(1,"MatGetValues_MPIAIJ:Not for unassembled matrix"); 104 for ( i=0; i<m; i++ ) { 105 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 106 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 107 if (idxm[i] >= rstart && idxm[i] < rend) { 108 row = idxm[i] - rstart; 109 for ( j=0; j<n; j++ ) { 110 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 111 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 112 if (idxn[j] >= cstart && idxn[j] < cend){ 113 col = idxn[j] - cstart; 114 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 115 } 116 else { 117 col = aij->colmap[idxn[j]] + shift; 118 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 119 } 120 } 121 } 122 else { 123 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 124 } 125 } 126 return 0; 127 } 128 129 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 130 { 131 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 132 MPI_Comm comm = mat->comm; 133 int size = aij->size, *owners = aij->rowners; 134 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 135 MPI_Request *send_waits,*recv_waits; 136 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 137 InsertMode addv; 138 Scalar *rvalues,*svalues; 139 140 /* make sure all processors are either in INSERTMODE or ADDMODE */ 141 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 142 if (addv == (ADD_VALUES|INSERT_VALUES)) { 143 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 144 } 145 aij->insertmode = addv; /* in case this processor had no cache */ 146 147 /* first count number of contributors to each processor */ 148 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 149 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 150 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 151 for ( i=0; i<aij->stash.n; i++ ) { 152 idx = aij->stash.idx[i]; 153 for ( j=0; j<size; j++ ) { 154 if (idx >= owners[j] && idx < owners[j+1]) { 155 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 156 } 157 } 158 } 159 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 160 161 /* inform other processors of number of messages and max length*/ 162 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 163 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 164 nreceives = work[rank]; 165 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 166 nmax = work[rank]; 167 PetscFree(work); 168 169 /* post receives: 170 1) each message will consist of ordered pairs 171 (global index,value) we store the global index as a double 172 to simplify the message passing. 173 2) since we don't know how long each individual message is we 174 allocate the largest needed buffer for each receive. Potentially 175 this is a lot of wasted space. 176 177 178 This could be done better. 179 */ 180 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 181 CHKPTRQ(rvalues); 182 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 183 CHKPTRQ(recv_waits); 184 for ( i=0; i<nreceives; i++ ) { 185 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 186 comm,recv_waits+i); 187 } 188 189 /* do sends: 190 1) starts[i] gives the starting index in svalues for stuff going to 191 the ith processor 192 */ 193 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 194 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 195 CHKPTRQ(send_waits); 196 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 197 starts[0] = 0; 198 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 199 for ( i=0; i<aij->stash.n; i++ ) { 200 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 201 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 202 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 203 } 204 PetscFree(owner); 205 starts[0] = 0; 206 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 207 count = 0; 208 for ( i=0; i<size; i++ ) { 209 if (procs[i]) { 210 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 211 comm,send_waits+count++); 212 } 213 } 214 PetscFree(starts); PetscFree(nprocs); 215 216 /* Free cache space */ 217 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 229 { 230 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 232 MPI_Status *send_status,recv_status; 233 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234 int row,col,other_disassembled,shift = C->indexshift; 235 Scalar *values,val; 236 InsertMode addv = aij->insertmode; 237 238 /* wait on receives */ 239 while (count) { 240 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 241 /* unpack receives into our local space */ 242 values = aij->rvalues + 3*imdex*aij->rmax; 243 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 244 n = n/3; 245 for ( i=0; i<n; i++ ) { 246 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 247 col = (int) PETSCREAL(values[3*i+1]); 248 val = values[3*i+2]; 249 if (col >= aij->cstart && col < aij->cend) { 250 col -= aij->cstart; 251 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 252 } 253 else { 254 if (aij->assembled) { 255 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256 col = aij->colmap[col] + shift; 257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 258 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259 col = (int) PETSCREAL(values[3*i+1]); 260 } 261 } 262 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 263 } 264 } 265 count--; 266 } 267 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 268 269 /* wait on sends */ 270 if (aij->nsends) { 271 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 272 CHKPTRQ(send_status); 273 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 274 PetscFree(send_status); 275 } 276 PetscFree(aij->send_waits); PetscFree(aij->svalues); 277 278 aij->insertmode = NOT_SET_VALUES; 279 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 280 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 281 282 /* determine if any processor has disassembled, if so we must 283 also disassemble ourselfs, in order that we may reassemble. */ 284 MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285 if (aij->assembled && !other_disassembled) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 } 288 289 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 290 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 291 aij->assembled = 1; 292 } 293 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 294 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 295 296 return 0; 297 } 298 299 static int MatZeroEntries_MPIAIJ(Mat A) 300 { 301 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 302 int ierr; 303 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 304 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 305 return 0; 306 } 307 308 /* the code does not do the diagonal entries correctly unless the 309 matrix is square and the column and row owerships are identical. 310 This is a BUG. The only way to fix it seems to be to access 311 aij->A and aij->B directly and not through the MatZeroRows() 312 routine. 313 */ 314 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 315 { 316 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 317 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 318 int *procs,*nprocs,j,found,idx,nsends,*work; 319 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 320 int *rvalues,tag = A->tag,count,base,slen,n,*source; 321 int *lens,imdex,*lrows,*values; 322 MPI_Comm comm = A->comm; 323 MPI_Request *send_waits,*recv_waits; 324 MPI_Status recv_status,*send_status; 325 IS istmp; 326 327 if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix"); 328 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 329 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 330 331 /* first count number of contributors to each processor */ 332 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 333 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 334 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 335 for ( i=0; i<N; i++ ) { 336 idx = rows[i]; 337 found = 0; 338 for ( j=0; j<size; j++ ) { 339 if (idx >= owners[j] && idx < owners[j+1]) { 340 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 341 } 342 } 343 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 344 } 345 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 346 347 /* inform other processors of number of messages and max length*/ 348 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 349 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 350 nrecvs = work[rank]; 351 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 352 nmax = work[rank]; 353 PetscFree(work); 354 355 /* post receives: */ 356 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 357 CHKPTRQ(rvalues); 358 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 359 CHKPTRQ(recv_waits); 360 for ( i=0; i<nrecvs; i++ ) { 361 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 362 } 363 364 /* do sends: 365 1) starts[i] gives the starting index in svalues for stuff going to 366 the ith processor 367 */ 368 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 369 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 370 CHKPTRQ(send_waits); 371 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 372 starts[0] = 0; 373 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 374 for ( i=0; i<N; i++ ) { 375 svalues[starts[owner[i]]++] = rows[i]; 376 } 377 ISRestoreIndices(is,&rows); 378 379 starts[0] = 0; 380 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 381 count = 0; 382 for ( i=0; i<size; i++ ) { 383 if (procs[i]) { 384 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 385 } 386 } 387 PetscFree(starts); 388 389 base = owners[rank]; 390 391 /* wait on receives */ 392 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 393 source = lens + nrecvs; 394 count = nrecvs; slen = 0; 395 while (count) { 396 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 397 /* unpack receives into our local space */ 398 MPI_Get_count(&recv_status,MPI_INT,&n); 399 source[imdex] = recv_status.MPI_SOURCE; 400 lens[imdex] = n; 401 slen += n; 402 count--; 403 } 404 PetscFree(recv_waits); 405 406 /* move the data into the send scatter */ 407 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 408 count = 0; 409 for ( i=0; i<nrecvs; i++ ) { 410 values = rvalues + i*nmax; 411 for ( j=0; j<lens[i]; j++ ) { 412 lrows[count++] = values[j] - base; 413 } 414 } 415 PetscFree(rvalues); PetscFree(lens); 416 PetscFree(owner); PetscFree(nprocs); 417 418 /* actually zap the local rows */ 419 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 420 PLogObjectParent(A,istmp); 421 PetscFree(lrows); 422 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 423 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 424 ierr = ISDestroy(istmp); CHKERRQ(ierr); 425 426 /* wait on sends */ 427 if (nsends) { 428 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 429 CHKPTRQ(send_status); 430 MPI_Waitall(nsends,send_waits,send_status); 431 PetscFree(send_status); 432 } 433 PetscFree(send_waits); PetscFree(svalues); 434 435 return 0; 436 } 437 438 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 439 { 440 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 441 int ierr; 442 443 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 444 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 445 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 446 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 447 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 448 return 0; 449 } 450 451 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 452 { 453 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 454 int ierr; 455 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 456 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 457 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 458 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 459 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 460 return 0; 461 } 462 463 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 464 { 465 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 466 int ierr; 467 468 if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix"); 469 /* do nondiagonal part */ 470 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 471 /* send it on its way */ 472 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 473 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 474 /* do local part */ 475 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 476 /* receive remote parts: note this assumes the values are not actually */ 477 /* inserted in yy until the next line, which is true for my implementation*/ 478 /* but is not perhaps always true. */ 479 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 480 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 481 return 0; 482 } 483 484 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 487 int ierr; 488 489 if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix"); 490 /* do nondiagonal part */ 491 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 492 /* send it on its way */ 493 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 494 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 495 /* do local part */ 496 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 497 /* receive remote parts: note this assumes the values are not actually */ 498 /* inserted in yy until the next line, which is true for my implementation*/ 499 /* but is not perhaps always true. */ 500 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 501 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 502 return 0; 503 } 504 505 /* 506 This only works correctly for square matrices where the subblock A->A is the 507 diagonal block 508 */ 509 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 510 { 511 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 512 if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix"); 513 return MatGetDiagonal(a->A,v); 514 } 515 516 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 517 { 518 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 519 int ierr; 520 if (!a->assembled) SETERRQ(1,"MatScale_MPIAIJ:must assemble matrix"); 521 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 522 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 523 return 0; 524 } 525 526 static int MatDestroy_MPIAIJ(PetscObject obj) 527 { 528 Mat mat = (Mat) obj; 529 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 530 int ierr; 531 #if defined(PETSC_LOG) 532 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 533 #endif 534 PetscFree(aij->rowners); 535 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 536 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 537 if (aij->colmap) PetscFree(aij->colmap); 538 if (aij->garray) PetscFree(aij->garray); 539 if (aij->lvec) VecDestroy(aij->lvec); 540 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 541 PetscFree(aij); 542 PLogObjectDestroy(mat); 543 PetscHeaderDestroy(mat); 544 return 0; 545 } 546 #include "draw.h" 547 #include "pinclude/pviewer.h" 548 549 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 550 { 551 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 552 int ierr; 553 554 if (aij->size == 1) { 555 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 556 } 557 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 558 return 0; 559 } 560 561 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 562 { 563 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 564 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 565 int ierr, format,shift = C->indexshift,rank; 566 PetscObject vobj = (PetscObject) viewer; 567 FILE *fd; 568 569 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 570 ierr = ViewerFileGetFormat_Private(viewer,&format); 571 if (format == FILE_FORMAT_INFO_DETAILED) { 572 int nz,nzalloc,mem; 573 MPI_Comm_rank(mat->comm,&rank); 574 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 575 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 576 MPIU_Seq_begin(mat->comm,1); 577 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 578 nzalloc,mem); 579 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 580 fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 581 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 582 fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 583 fflush(fd); 584 MPIU_Seq_end(mat->comm,1); 585 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 586 return 0; 587 } 588 else if (format == FILE_FORMAT_INFO) { 589 return 0; 590 } 591 } 592 593 if (vobj->type == ASCII_FILE_VIEWER) { 594 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 595 MPIU_Seq_begin(mat->comm,1); 596 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 597 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 598 aij->cend); 599 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 600 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 601 fflush(fd); 602 MPIU_Seq_end(mat->comm,1); 603 } 604 else { 605 int size = aij->size; 606 rank = aij->rank; 607 if (size == 1) { 608 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 609 } 610 else { 611 /* assemble the entire matrix onto first processor. */ 612 Mat A; 613 Mat_SeqAIJ *Aloc; 614 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 615 Scalar *a; 616 617 if (!rank) { 618 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 619 CHKERRQ(ierr); 620 } 621 else { 622 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 623 CHKERRQ(ierr); 624 } 625 PLogObjectParent(mat,A); 626 627 /* copy over the A part */ 628 Aloc = (Mat_SeqAIJ*) aij->A->data; 629 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 630 row = aij->rstart; 631 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 632 for ( i=0; i<m; i++ ) { 633 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 634 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 635 } 636 aj = Aloc->j; 637 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 638 639 /* copy over the B part */ 640 Aloc = (Mat_SeqAIJ*) aij->B->data; 641 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 642 row = aij->rstart; 643 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 644 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 645 for ( i=0; i<m; i++ ) { 646 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 647 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 648 } 649 PetscFree(ct); 650 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 651 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 652 if (!rank) { 653 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 654 } 655 ierr = MatDestroy(A); CHKERRQ(ierr); 656 } 657 } 658 return 0; 659 } 660 661 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 662 { 663 Mat mat = (Mat) obj; 664 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 665 int ierr; 666 PetscObject vobj = (PetscObject) viewer; 667 668 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix"); 669 if (!viewer) { 670 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 671 } 672 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 673 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 674 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 675 } 676 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 677 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 678 } 679 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 680 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 681 } 682 else if (vobj->cookie == DRAW_COOKIE) { 683 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 684 } 685 else if (vobj->type == BINARY_FILE_VIEWER) { 686 return MatView_MPIAIJ_Binary(mat,viewer); 687 } 688 return 0; 689 } 690 691 extern int MatMarkDiag_SeqAIJ(Mat); 692 /* 693 This has to provide several versions. 694 695 1) per sequential 696 2) a) use only local smoothing updating outer values only once. 697 b) local smoothing updating outer values each inner iteration 698 3) color updating out values betwen colors. 699 */ 700 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 701 double fshift,int its,Vec xx) 702 { 703 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 704 Mat AA = mat->A, BB = mat->B; 705 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 706 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 707 int ierr,*idx, *diag; 708 int n = mat->n, m = mat->m, i,shift = A->indexshift; 709 Vec tt; 710 711 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix"); 712 713 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 714 xs = x + shift; /* shift by one for index start of 1 */ 715 ls = ls + shift; 716 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 717 diag = A->diag; 718 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 719 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 720 } 721 if (flag & SOR_EISENSTAT) { 722 /* Let A = L + U + D; where L is lower trianglar, 723 U is upper triangular, E is diagonal; This routine applies 724 725 (L + E)^{-1} A (U + E)^{-1} 726 727 to a vector efficiently using Eisenstat's trick. This is for 728 the case of SSOR preconditioner, so E is D/omega where omega 729 is the relaxation factor. 730 */ 731 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 732 PLogObjectParent(matin,tt); 733 VecGetArray(tt,&t); 734 scale = (2.0/omega) - 1.0; 735 /* x = (E + U)^{-1} b */ 736 VecSet(&zero,mat->lvec); 737 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 738 mat->Mvctx); CHKERRQ(ierr); 739 for ( i=m-1; i>-1; i-- ) { 740 n = A->i[i+1] - diag[i] - 1; 741 idx = A->j + diag[i] + !shift; 742 v = A->a + diag[i] + !shift; 743 sum = b[i]; 744 SPARSEDENSEMDOT(sum,xs,v,idx,n); 745 d = fshift + A->a[diag[i]+shift]; 746 n = B->i[i+1] - B->i[i]; 747 idx = B->j + B->i[i] + shift; 748 v = B->a + B->i[i] + shift; 749 SPARSEDENSEMDOT(sum,ls,v,idx,n); 750 x[i] = omega*(sum/d); 751 } 752 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 753 mat->Mvctx); CHKERRQ(ierr); 754 755 /* t = b - (2*E - D)x */ 756 v = A->a; 757 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 758 759 /* t = (E + L)^{-1}t */ 760 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 761 diag = A->diag; 762 VecSet(&zero,mat->lvec); 763 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 764 mat->Mvctx); CHKERRQ(ierr); 765 for ( i=0; i<m; i++ ) { 766 n = diag[i] - A->i[i]; 767 idx = A->j + A->i[i] + shift; 768 v = A->a + A->i[i] + shift; 769 sum = t[i]; 770 SPARSEDENSEMDOT(sum,ts,v,idx,n); 771 d = fshift + A->a[diag[i]+shift]; 772 n = B->i[i+1] - B->i[i]; 773 idx = B->j + B->i[i] + shift; 774 v = B->a + B->i[i] + shift; 775 SPARSEDENSEMDOT(sum,ls,v,idx,n); 776 t[i] = omega*(sum/d); 777 } 778 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 779 mat->Mvctx); CHKERRQ(ierr); 780 /* x = x + t */ 781 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 782 VecDestroy(tt); 783 return 0; 784 } 785 786 787 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 788 if (flag & SOR_ZERO_INITIAL_GUESS) { 789 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 790 } 791 else { 792 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 793 CHKERRQ(ierr); 794 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 795 CHKERRQ(ierr); 796 } 797 while (its--) { 798 /* go down through the rows */ 799 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 800 mat->Mvctx); CHKERRQ(ierr); 801 for ( i=0; i<m; i++ ) { 802 n = A->i[i+1] - A->i[i]; 803 idx = A->j + A->i[i] + shift; 804 v = A->a + A->i[i] + shift; 805 sum = b[i]; 806 SPARSEDENSEMDOT(sum,xs,v,idx,n); 807 d = fshift + A->a[diag[i]+shift]; 808 n = B->i[i+1] - B->i[i]; 809 idx = B->j + B->i[i] + shift; 810 v = B->a + B->i[i] + shift; 811 SPARSEDENSEMDOT(sum,ls,v,idx,n); 812 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 813 } 814 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 815 mat->Mvctx); CHKERRQ(ierr); 816 /* come up through the rows */ 817 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 818 mat->Mvctx); CHKERRQ(ierr); 819 for ( i=m-1; i>-1; i-- ) { 820 n = A->i[i+1] - A->i[i]; 821 idx = A->j + A->i[i] + shift; 822 v = A->a + A->i[i] + shift; 823 sum = b[i]; 824 SPARSEDENSEMDOT(sum,xs,v,idx,n); 825 d = fshift + A->a[diag[i]+shift]; 826 n = B->i[i+1] - B->i[i]; 827 idx = B->j + B->i[i] + shift; 828 v = B->a + B->i[i] + shift; 829 SPARSEDENSEMDOT(sum,ls,v,idx,n); 830 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 831 } 832 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 833 mat->Mvctx); CHKERRQ(ierr); 834 } 835 } 836 else if (flag & SOR_FORWARD_SWEEP){ 837 if (flag & SOR_ZERO_INITIAL_GUESS) { 838 VecSet(&zero,mat->lvec); 839 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 840 mat->Mvctx); CHKERRQ(ierr); 841 for ( i=0; i<m; i++ ) { 842 n = diag[i] - A->i[i]; 843 idx = A->j + A->i[i] + shift; 844 v = A->a + A->i[i] + shift; 845 sum = b[i]; 846 SPARSEDENSEMDOT(sum,xs,v,idx,n); 847 d = fshift + A->a[diag[i]+shift]; 848 n = B->i[i+1] - B->i[i]; 849 idx = B->j + B->i[i] + shift; 850 v = B->a + B->i[i] + shift; 851 SPARSEDENSEMDOT(sum,ls,v,idx,n); 852 x[i] = omega*(sum/d); 853 } 854 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 855 mat->Mvctx); CHKERRQ(ierr); 856 its--; 857 } 858 while (its--) { 859 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 860 CHKERRQ(ierr); 861 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 862 CHKERRQ(ierr); 863 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 864 mat->Mvctx); CHKERRQ(ierr); 865 for ( i=0; i<m; i++ ) { 866 n = A->i[i+1] - A->i[i]; 867 idx = A->j + A->i[i] + shift; 868 v = A->a + A->i[i] + shift; 869 sum = b[i]; 870 SPARSEDENSEMDOT(sum,xs,v,idx,n); 871 d = fshift + A->a[diag[i]+shift]; 872 n = B->i[i+1] - B->i[i]; 873 idx = B->j + B->i[i] + shift; 874 v = B->a + B->i[i] + shift; 875 SPARSEDENSEMDOT(sum,ls,v,idx,n); 876 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 877 } 878 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 879 mat->Mvctx); CHKERRQ(ierr); 880 } 881 } 882 else if (flag & SOR_BACKWARD_SWEEP){ 883 if (flag & SOR_ZERO_INITIAL_GUESS) { 884 VecSet(&zero,mat->lvec); 885 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 886 mat->Mvctx); CHKERRQ(ierr); 887 for ( i=m-1; i>-1; i-- ) { 888 n = A->i[i+1] - diag[i] - 1; 889 idx = A->j + diag[i] + !shift; 890 v = A->a + diag[i] + !shift; 891 sum = b[i]; 892 SPARSEDENSEMDOT(sum,xs,v,idx,n); 893 d = fshift + A->a[diag[i]+shift]; 894 n = B->i[i+1] - B->i[i]; 895 idx = B->j + B->i[i] + shift; 896 v = B->a + B->i[i] + shift; 897 SPARSEDENSEMDOT(sum,ls,v,idx,n); 898 x[i] = omega*(sum/d); 899 } 900 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 901 mat->Mvctx); CHKERRQ(ierr); 902 its--; 903 } 904 while (its--) { 905 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 906 mat->Mvctx); CHKERRQ(ierr); 907 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 908 mat->Mvctx); CHKERRQ(ierr); 909 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 910 mat->Mvctx); CHKERRQ(ierr); 911 for ( i=m-1; i>-1; i-- ) { 912 n = A->i[i+1] - A->i[i]; 913 idx = A->j + A->i[i] + shift; 914 v = A->a + A->i[i] + shift; 915 sum = b[i]; 916 SPARSEDENSEMDOT(sum,xs,v,idx,n); 917 d = fshift + A->a[diag[i]+shift]; 918 n = B->i[i+1] - B->i[i]; 919 idx = B->j + B->i[i] + shift; 920 v = B->a + B->i[i] + shift; 921 SPARSEDENSEMDOT(sum,ls,v,idx,n); 922 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 923 } 924 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 925 mat->Mvctx); CHKERRQ(ierr); 926 } 927 } 928 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 929 if (flag & SOR_ZERO_INITIAL_GUESS) { 930 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 931 } 932 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 933 CHKERRQ(ierr); 934 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 935 CHKERRQ(ierr); 936 while (its--) { 937 /* go down through the rows */ 938 for ( i=0; i<m; i++ ) { 939 n = A->i[i+1] - A->i[i]; 940 idx = A->j + A->i[i] + shift; 941 v = A->a + A->i[i] + shift; 942 sum = b[i]; 943 SPARSEDENSEMDOT(sum,xs,v,idx,n); 944 d = fshift + A->a[diag[i]+shift]; 945 n = B->i[i+1] - B->i[i]; 946 idx = B->j + B->i[i] + shift; 947 v = B->a + B->i[i] + shift; 948 SPARSEDENSEMDOT(sum,ls,v,idx,n); 949 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 950 } 951 /* come up through the rows */ 952 for ( i=m-1; i>-1; i-- ) { 953 n = A->i[i+1] - A->i[i]; 954 idx = A->j + A->i[i] + shift; 955 v = A->a + A->i[i] + shift; 956 sum = b[i]; 957 SPARSEDENSEMDOT(sum,xs,v,idx,n); 958 d = fshift + A->a[diag[i]+shift]; 959 n = B->i[i+1] - B->i[i]; 960 idx = B->j + B->i[i] + shift; 961 v = B->a + B->i[i] + shift; 962 SPARSEDENSEMDOT(sum,ls,v,idx,n); 963 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 964 } 965 } 966 } 967 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 968 if (flag & SOR_ZERO_INITIAL_GUESS) { 969 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 970 } 971 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 972 CHKERRQ(ierr); 973 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 974 CHKERRQ(ierr); 975 while (its--) { 976 for ( i=0; i<m; i++ ) { 977 n = A->i[i+1] - A->i[i]; 978 idx = A->j + A->i[i] + shift; 979 v = A->a + A->i[i] + shift; 980 sum = b[i]; 981 SPARSEDENSEMDOT(sum,xs,v,idx,n); 982 d = fshift + A->a[diag[i]+shift]; 983 n = B->i[i+1] - B->i[i]; 984 idx = B->j + B->i[i] + shift; 985 v = B->a + B->i[i] + shift; 986 SPARSEDENSEMDOT(sum,ls,v,idx,n); 987 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 988 } 989 } 990 } 991 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 992 if (flag & SOR_ZERO_INITIAL_GUESS) { 993 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 994 } 995 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 996 mat->Mvctx); CHKERRQ(ierr); 997 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 998 mat->Mvctx); CHKERRQ(ierr); 999 while (its--) { 1000 for ( i=m-1; i>-1; i-- ) { 1001 n = A->i[i+1] - A->i[i]; 1002 idx = A->j + A->i[i] + shift; 1003 v = A->a + A->i[i] + shift; 1004 sum = b[i]; 1005 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1006 d = fshift + A->a[diag[i]+shift]; 1007 n = B->i[i+1] - B->i[i]; 1008 idx = B->j + B->i[i] + shift; 1009 v = B->a + B->i[i] + shift; 1010 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1011 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1012 } 1013 } 1014 } 1015 return 0; 1016 } 1017 1018 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1019 int *nzalloc,int *mem) 1020 { 1021 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1022 Mat A = mat->A, B = mat->B; 1023 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1024 1025 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1026 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1027 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1028 if (flag == MAT_LOCAL) { 1029 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1030 } else if (flag == MAT_GLOBAL_MAX) { 1031 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1032 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1033 } else if (flag == MAT_GLOBAL_SUM) { 1034 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1035 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1036 } 1037 return 0; 1038 } 1039 1040 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1041 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1042 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1043 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1044 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1045 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1046 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1047 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1048 1049 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1050 { 1051 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1052 1053 if (op == NO_NEW_NONZERO_LOCATIONS || 1054 op == YES_NEW_NONZERO_LOCATIONS || 1055 op == COLUMNS_SORTED || 1056 op == ROW_ORIENTED) { 1057 MatSetOption(a->A,op); 1058 MatSetOption(a->B,op); 1059 } 1060 else if (op == ROWS_SORTED || 1061 op == SYMMETRIC_MATRIX || 1062 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1063 op == YES_NEW_DIAGONALS) 1064 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1065 else if (op == COLUMN_ORIENTED) { 1066 a->roworiented = 0; 1067 MatSetOption(a->A,op); 1068 MatSetOption(a->B,op); 1069 } 1070 else if (op == NO_NEW_DIAGONALS) 1071 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1072 else 1073 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1074 return 0; 1075 } 1076 1077 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1078 { 1079 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1080 *m = mat->M; *n = mat->N; 1081 return 0; 1082 } 1083 1084 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1085 { 1086 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1087 *m = mat->m; *n = mat->N; 1088 return 0; 1089 } 1090 1091 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1092 { 1093 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1094 *m = mat->rstart; *n = mat->rend; 1095 return 0; 1096 } 1097 1098 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1099 { 1100 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1101 Scalar *vworkA, *vworkB, **pvA, **pvB; 1102 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1103 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1104 1105 if (!mat->assembled) SETERRQ(1,"MatGetRow_MPIAIJ:Must assemble matrix"); 1106 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1107 lrow = row - rstart; 1108 1109 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1110 if (!v) {pvA = 0; pvB = 0;} 1111 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1112 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1113 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1114 nztot = nzA + nzB; 1115 1116 if (v || idx) { 1117 if (nztot) { 1118 /* Sort by increasing column numbers, assuming A and B already sorted */ 1119 int imark; 1120 if (mat->assembled) { 1121 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1122 } 1123 if (v) { 1124 *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1125 for ( i=0; i<nzB; i++ ) { 1126 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1127 else break; 1128 } 1129 imark = i; 1130 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1131 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1132 } 1133 if (idx) { 1134 *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1135 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1136 for ( i=0; i<nzB; i++ ) { 1137 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1138 else break; 1139 } 1140 imark = i; 1141 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1142 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1143 } 1144 } 1145 else {*idx = 0; *v=0;} 1146 } 1147 *nz = nztot; 1148 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1149 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1150 return 0; 1151 } 1152 1153 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1154 { 1155 if (idx) PetscFree(*idx); 1156 if (v) PetscFree(*v); 1157 return 0; 1158 } 1159 1160 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1161 { 1162 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1163 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1164 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1165 double sum = 0.0; 1166 Scalar *v; 1167 1168 if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix"); 1169 if (aij->size == 1) { 1170 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1171 } else { 1172 if (type == NORM_FROBENIUS) { 1173 v = amat->a; 1174 for (i=0; i<amat->nz; i++ ) { 1175 #if defined(PETSC_COMPLEX) 1176 sum += real(conj(*v)*(*v)); v++; 1177 #else 1178 sum += (*v)*(*v); v++; 1179 #endif 1180 } 1181 v = bmat->a; 1182 for (i=0; i<bmat->nz; i++ ) { 1183 #if defined(PETSC_COMPLEX) 1184 sum += real(conj(*v)*(*v)); v++; 1185 #else 1186 sum += (*v)*(*v); v++; 1187 #endif 1188 } 1189 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1190 *norm = sqrt(*norm); 1191 } 1192 else if (type == NORM_1) { /* max column norm */ 1193 double *tmp, *tmp2; 1194 int *jj, *garray = aij->garray; 1195 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1196 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1197 PetscMemzero(tmp,aij->N*sizeof(double)); 1198 *norm = 0.0; 1199 v = amat->a; jj = amat->j; 1200 for ( j=0; j<amat->nz; j++ ) { 1201 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1202 } 1203 v = bmat->a; jj = bmat->j; 1204 for ( j=0; j<bmat->nz; j++ ) { 1205 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1206 } 1207 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1208 for ( j=0; j<aij->N; j++ ) { 1209 if (tmp2[j] > *norm) *norm = tmp2[j]; 1210 } 1211 PetscFree(tmp); PetscFree(tmp2); 1212 } 1213 else if (type == NORM_INFINITY) { /* max row norm */ 1214 double ntemp = 0.0; 1215 for ( j=0; j<amat->m; j++ ) { 1216 v = amat->a + amat->i[j] + shift; 1217 sum = 0.0; 1218 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1219 sum += PetscAbsScalar(*v); v++; 1220 } 1221 v = bmat->a + bmat->i[j] + shift; 1222 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1223 sum += PetscAbsScalar(*v); v++; 1224 } 1225 if (sum > ntemp) ntemp = sum; 1226 } 1227 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1228 } 1229 else { 1230 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1231 } 1232 } 1233 return 0; 1234 } 1235 1236 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1237 { 1238 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1239 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1240 int ierr,shift = Aloc->indexshift; 1241 Mat B; 1242 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1243 Scalar *array; 1244 1245 if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1246 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1247 PETSC_NULL,&B); CHKERRQ(ierr); 1248 1249 /* copy over the A part */ 1250 Aloc = (Mat_SeqAIJ*) a->A->data; 1251 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1252 row = a->rstart; 1253 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1254 for ( i=0; i<m; i++ ) { 1255 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1256 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1257 } 1258 aj = Aloc->j; 1259 for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1260 1261 /* copy over the B part */ 1262 Aloc = (Mat_SeqAIJ*) a->B->data; 1263 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1264 row = a->rstart; 1265 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1266 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1267 for ( i=0; i<m; i++ ) { 1268 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1269 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1270 } 1271 PetscFree(ct); 1272 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1273 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1274 if (matout) { 1275 *matout = B; 1276 } else { 1277 /* This isn't really an in-place transpose .... but free data structures from a */ 1278 PetscFree(a->rowners); 1279 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1280 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1281 if (a->colmap) PetscFree(a->colmap); 1282 if (a->garray) PetscFree(a->garray); 1283 if (a->lvec) VecDestroy(a->lvec); 1284 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1285 PetscFree(a); 1286 PetscMemcpy(A,B,sizeof(struct _Mat)); 1287 PetscHeaderDestroy(B); 1288 } 1289 return 0; 1290 } 1291 1292 extern int MatPrintHelp_SeqAIJ(Mat); 1293 static int MatPrintHelp_MPIAIJ(Mat A) 1294 { 1295 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1296 1297 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1298 else return 0; 1299 } 1300 1301 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1302 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1303 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1304 /* -------------------------------------------------------------------*/ 1305 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1306 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1307 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1308 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1309 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1310 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1311 MatLUFactor_MPIAIJ,0, 1312 MatRelax_MPIAIJ, 1313 MatTranspose_MPIAIJ, 1314 MatGetInfo_MPIAIJ,0, 1315 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1316 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1317 0, 1318 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1319 MatGetReordering_MPIAIJ, 1320 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1321 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1322 MatILUFactorSymbolic_MPIAIJ,0, 1323 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1324 0,0,0, 1325 0,0,MatGetValues_MPIAIJ,0, 1326 MatPrintHelp_MPIAIJ, 1327 MatScale_MPIAIJ}; 1328 1329 /*@C 1330 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1331 (the default parallel PETSc format). 1332 1333 Input Parameters: 1334 . comm - MPI communicator 1335 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1336 . n - number of local columns (or PETSC_DECIDE to have calculated 1337 if N is given) 1338 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1339 . N - number of global columns (or PETSC_DECIDE to have calculated 1340 if n is given) 1341 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1342 (same for all local rows) 1343 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1344 or null (possibly different for each row). You must leave room 1345 for the diagonal entry even if it is zero. 1346 . o_nz - number of nonzeros per row in off-diagonal portion of local 1347 submatrix (same for all local rows). 1348 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1349 submatrix or null (possibly different for each row). 1350 1351 Output Parameter: 1352 . newmat - the matrix 1353 1354 Notes: 1355 The AIJ format (also called the Yale sparse matrix format or 1356 compressed row storage), is fully compatible with standard Fortran 77 1357 storage. That is, the stored row and column indices can begin at 1358 either one (as in Fortran) or zero. See the users manual for details. 1359 1360 The user MUST specify either the local or global matrix dimensions 1361 (possibly both). 1362 1363 Storage Information: 1364 For a square global matrix we define each processor's diagonal portion 1365 to be its local rows and the corresponding columns (a square submatrix); 1366 each processor's off-diagonal portion encompasses the remainder of the 1367 local matrix (a rectangular submatrix). 1368 1369 The user can specify preallocated storage for the diagonal part of 1370 the local submatrix with either d_nz or d_nnz (not both). Set 1371 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1372 memory allocation. Likewise, specify preallocated storage for the 1373 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1374 1375 By default, this format uses inodes (identical nodes) when possible. 1376 We search for consecutive rows with the same nonzero structure, thereby 1377 reusing matrix information to achieve increased efficiency. 1378 1379 Options Database Keys: 1380 $ -mat_aij_no_inode - Do not use inodes 1381 $ -mat_aij_inode_limit <limit> - Set inode limit (max limit=5) 1382 1383 .keywords: matrix, aij, compressed row, sparse, parallel 1384 1385 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1386 @*/ 1387 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1388 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 1389 { 1390 Mat mat; 1391 Mat_MPIAIJ *a; 1392 int ierr, i,sum[2],work[2]; 1393 1394 *newmat = 0; 1395 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1396 PLogObjectCreate(mat); 1397 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1398 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1399 mat->destroy = MatDestroy_MPIAIJ; 1400 mat->view = MatView_MPIAIJ; 1401 mat->factor = 0; 1402 1403 a->insertmode = NOT_SET_VALUES; 1404 MPI_Comm_rank(comm,&a->rank); 1405 MPI_Comm_size(comm,&a->size); 1406 1407 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1408 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1409 1410 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1411 work[0] = m; work[1] = n; 1412 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1413 if (M == PETSC_DECIDE) M = sum[0]; 1414 if (N == PETSC_DECIDE) N = sum[1]; 1415 } 1416 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1417 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1418 a->m = m; 1419 a->n = n; 1420 a->N = N; 1421 a->M = M; 1422 1423 /* build local table of row and column ownerships */ 1424 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1425 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1426 a->cowners = a->rowners + a->size + 1; 1427 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1428 a->rowners[0] = 0; 1429 for ( i=2; i<=a->size; i++ ) { 1430 a->rowners[i] += a->rowners[i-1]; 1431 } 1432 a->rstart = a->rowners[a->rank]; 1433 a->rend = a->rowners[a->rank+1]; 1434 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1435 a->cowners[0] = 0; 1436 for ( i=2; i<=a->size; i++ ) { 1437 a->cowners[i] += a->cowners[i-1]; 1438 } 1439 a->cstart = a->cowners[a->rank]; 1440 a->cend = a->cowners[a->rank+1]; 1441 1442 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1443 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1444 PLogObjectParent(mat,a->A); 1445 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1446 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1447 PLogObjectParent(mat,a->B); 1448 1449 /* build cache for off array entries formed */ 1450 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1451 a->colmap = 0; 1452 a->garray = 0; 1453 a->roworiented = 1; 1454 1455 /* stuff used for matrix vector multiply */ 1456 a->lvec = 0; 1457 a->Mvctx = 0; 1458 a->assembled = 0; 1459 1460 *newmat = mat; 1461 return 0; 1462 } 1463 1464 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1465 { 1466 Mat mat; 1467 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1468 int ierr, len,flg; 1469 1470 if (!oldmat->assembled) SETERRQ(1,"MatConvertSameType_MPIAIJ:Must assemble matrix"); 1471 *newmat = 0; 1472 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1473 PLogObjectCreate(mat); 1474 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1475 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1476 mat->destroy = MatDestroy_MPIAIJ; 1477 mat->view = MatView_MPIAIJ; 1478 mat->factor = matin->factor; 1479 1480 a->m = oldmat->m; 1481 a->n = oldmat->n; 1482 a->M = oldmat->M; 1483 a->N = oldmat->N; 1484 1485 a->assembled = 1; 1486 a->rstart = oldmat->rstart; 1487 a->rend = oldmat->rend; 1488 a->cstart = oldmat->cstart; 1489 a->cend = oldmat->cend; 1490 a->size = oldmat->size; 1491 a->rank = oldmat->rank; 1492 a->insertmode = NOT_SET_VALUES; 1493 1494 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1495 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1496 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1497 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1498 if (oldmat->colmap) { 1499 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1500 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1501 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1502 } else a->colmap = 0; 1503 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1504 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1505 PLogObjectMemory(mat,len*sizeof(int)); 1506 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1507 } else a->garray = 0; 1508 1509 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1510 PLogObjectParent(mat,a->lvec); 1511 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1512 PLogObjectParent(mat,a->Mvctx); 1513 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1514 PLogObjectParent(mat,a->A); 1515 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1516 PLogObjectParent(mat,a->B); 1517 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1518 if (flg) { 1519 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1520 } 1521 *newmat = mat; 1522 return 0; 1523 } 1524 1525 #include "sysio.h" 1526 1527 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat) 1528 { 1529 Mat A; 1530 int i, nz, ierr, j,rstart, rend, fd; 1531 Scalar *vals,*svals; 1532 PetscObject vobj = (PetscObject) bview; 1533 MPI_Comm comm = vobj->comm; 1534 MPI_Status status; 1535 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1536 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1537 int tag = ((PetscObject)bview)->tag; 1538 1539 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1540 if (!rank) { 1541 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1542 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1543 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object"); 1544 } 1545 1546 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1547 M = header[1]; N = header[2]; 1548 /* determine ownership of all rows */ 1549 m = M/size + ((M % size) > rank); 1550 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1551 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1552 rowners[0] = 0; 1553 for ( i=2; i<=size; i++ ) { 1554 rowners[i] += rowners[i-1]; 1555 } 1556 rstart = rowners[rank]; 1557 rend = rowners[rank+1]; 1558 1559 /* distribute row lengths to all processors */ 1560 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1561 offlens = ourlens + (rend-rstart); 1562 if (!rank) { 1563 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1564 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1565 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1566 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1567 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1568 PetscFree(sndcounts); 1569 } 1570 else { 1571 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1572 } 1573 1574 if (!rank) { 1575 /* calculate the number of nonzeros on each processor */ 1576 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1577 PetscMemzero(procsnz,size*sizeof(int)); 1578 for ( i=0; i<size; i++ ) { 1579 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1580 procsnz[i] += rowlengths[j]; 1581 } 1582 } 1583 PetscFree(rowlengths); 1584 1585 /* determine max buffer needed and allocate it */ 1586 maxnz = 0; 1587 for ( i=0; i<size; i++ ) { 1588 maxnz = PetscMax(maxnz,procsnz[i]); 1589 } 1590 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1591 1592 /* read in my part of the matrix column indices */ 1593 nz = procsnz[0]; 1594 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1595 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1596 1597 /* read in every one elses and ship off */ 1598 for ( i=1; i<size; i++ ) { 1599 nz = procsnz[i]; 1600 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1601 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1602 } 1603 PetscFree(cols); 1604 } 1605 else { 1606 /* determine buffer space needed for message */ 1607 nz = 0; 1608 for ( i=0; i<m; i++ ) { 1609 nz += ourlens[i]; 1610 } 1611 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1612 1613 /* receive message of column indices*/ 1614 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1615 MPI_Get_count(&status,MPI_INT,&maxnz); 1616 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1617 } 1618 1619 /* loop over local rows, determining number of off diagonal entries */ 1620 PetscMemzero(offlens,m*sizeof(int)); 1621 jj = 0; 1622 for ( i=0; i<m; i++ ) { 1623 for ( j=0; j<ourlens[i]; j++ ) { 1624 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1625 jj++; 1626 } 1627 } 1628 1629 /* create our matrix */ 1630 for ( i=0; i<m; i++ ) { 1631 ourlens[i] -= offlens[i]; 1632 } 1633 if (type == MATMPIAIJ) { 1634 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1635 } 1636 else if (type == MATMPIROW) { 1637 ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1638 } 1639 A = *newmat; 1640 MatSetOption(A,COLUMNS_SORTED); 1641 for ( i=0; i<m; i++ ) { 1642 ourlens[i] += offlens[i]; 1643 } 1644 1645 if (!rank) { 1646 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1647 1648 /* read in my part of the matrix numerical values */ 1649 nz = procsnz[0]; 1650 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1651 1652 /* insert into matrix */ 1653 jj = rstart; 1654 smycols = mycols; 1655 svals = vals; 1656 for ( i=0; i<m; i++ ) { 1657 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1658 smycols += ourlens[i]; 1659 svals += ourlens[i]; 1660 jj++; 1661 } 1662 1663 /* read in other processors and ship out */ 1664 for ( i=1; i<size; i++ ) { 1665 nz = procsnz[i]; 1666 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1667 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1668 } 1669 PetscFree(procsnz); 1670 } 1671 else { 1672 /* receive numeric values */ 1673 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1674 1675 /* receive message of values*/ 1676 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1677 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1678 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1679 1680 /* insert into matrix */ 1681 jj = rstart; 1682 smycols = mycols; 1683 svals = vals; 1684 for ( i=0; i<m; i++ ) { 1685 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1686 smycols += ourlens[i]; 1687 svals += ourlens[i]; 1688 jj++; 1689 } 1690 } 1691 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1692 1693 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1694 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1695 return 0; 1696 } 1697