1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.130 1996/03/04 05:15:55 bsmith Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 col = aij->colmap[idxn[j]] + shift; 117 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118 } 119 } 120 } 121 else { 122 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123 } 124 } 125 return 0; 126 } 127 128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 MPI_Comm comm = mat->comm; 132 int size = aij->size, *owners = aij->rowners; 133 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 134 MPI_Request *send_waits,*recv_waits; 135 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 136 InsertMode addv; 137 Scalar *rvalues,*svalues; 138 139 /* make sure all processors are either in INSERTMODE or ADDMODE */ 140 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141 if (addv == (ADD_VALUES|INSERT_VALUES)) { 142 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 143 } 144 aij->insertmode = addv; /* in case this processor had no cache */ 145 146 /* first count number of contributors to each processor */ 147 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 149 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 150 for ( i=0; i<aij->stash.n; i++ ) { 151 idx = aij->stash.idx[i]; 152 for ( j=0; j<size; j++ ) { 153 if (idx >= owners[j] && idx < owners[j+1]) { 154 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 155 } 156 } 157 } 158 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 159 160 /* inform other processors of number of messages and max length*/ 161 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 162 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 163 nreceives = work[rank]; 164 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 165 nmax = work[rank]; 166 PetscFree(work); 167 168 /* post receives: 169 1) each message will consist of ordered pairs 170 (global index,value) we store the global index as a double 171 to simplify the message passing. 172 2) since we don't know how long each individual message is we 173 allocate the largest needed buffer for each receive. Potentially 174 this is a lot of wasted space. 175 176 177 This could be done better. 178 */ 179 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 180 CHKPTRQ(rvalues); 181 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 182 CHKPTRQ(recv_waits); 183 for ( i=0; i<nreceives; i++ ) { 184 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 185 comm,recv_waits+i); 186 } 187 188 /* do sends: 189 1) starts[i] gives the starting index in svalues for stuff going to 190 the ith processor 191 */ 192 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 193 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTRQ(send_waits); 195 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 196 starts[0] = 0; 197 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 PetscFree(owner); 204 starts[0] = 0; 205 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<size; i++ ) { 208 if (procs[i]) { 209 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 PetscFree(starts); PetscFree(nprocs); 214 215 /* Free cache space */ 216 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 217 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 229 { 230 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 232 MPI_Status *send_status,recv_status; 233 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234 int row,col,other_disassembled,shift = C->indexshift; 235 Scalar *values,val; 236 InsertMode addv = aij->insertmode; 237 238 /* wait on receives */ 239 while (count) { 240 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 241 /* unpack receives into our local space */ 242 values = aij->rvalues + 3*imdex*aij->rmax; 243 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 244 n = n/3; 245 for ( i=0; i<n; i++ ) { 246 row = (int) PetscReal(values[3*i]) - aij->rstart; 247 col = (int) PetscReal(values[3*i+1]); 248 val = values[3*i+2]; 249 if (col >= aij->cstart && col < aij->cend) { 250 col -= aij->cstart; 251 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 252 } 253 else { 254 if (mat->was_assembled) { 255 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256 col = aij->colmap[col] + shift; 257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 258 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259 col = (int) PetscReal(values[3*i+1]); 260 } 261 } 262 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 263 } 264 } 265 count--; 266 } 267 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 268 269 /* wait on sends */ 270 if (aij->nsends) { 271 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 272 CHKPTRQ(send_status); 273 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 274 PetscFree(send_status); 275 } 276 PetscFree(aij->send_waits); PetscFree(aij->svalues); 277 278 aij->insertmode = NOT_SET_VALUES; 279 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 280 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 281 282 /* determine if any processor has disassembled, if so we must 283 also disassemble ourselfs, in order that we may reassemble. */ 284 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285 if (mat->was_assembled && !other_disassembled) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 } 288 289 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 290 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 291 } 292 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 293 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 294 295 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 296 return 0; 297 } 298 299 static int MatZeroEntries_MPIAIJ(Mat A) 300 { 301 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 302 int ierr; 303 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 304 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 305 return 0; 306 } 307 308 /* the code does not do the diagonal entries correctly unless the 309 matrix is square and the column and row owerships are identical. 310 This is a BUG. The only way to fix it seems to be to access 311 aij->A and aij->B directly and not through the MatZeroRows() 312 routine. 313 */ 314 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 315 { 316 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 317 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 318 int *procs,*nprocs,j,found,idx,nsends,*work; 319 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 320 int *rvalues,tag = A->tag,count,base,slen,n,*source; 321 int *lens,imdex,*lrows,*values; 322 MPI_Comm comm = A->comm; 323 MPI_Request *send_waits,*recv_waits; 324 MPI_Status recv_status,*send_status; 325 IS istmp; 326 327 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 328 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 329 330 /* first count number of contributors to each processor */ 331 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 332 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 333 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 334 for ( i=0; i<N; i++ ) { 335 idx = rows[i]; 336 found = 0; 337 for ( j=0; j<size; j++ ) { 338 if (idx >= owners[j] && idx < owners[j+1]) { 339 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 340 } 341 } 342 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 343 } 344 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 345 346 /* inform other processors of number of messages and max length*/ 347 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 348 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 349 nrecvs = work[rank]; 350 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 351 nmax = work[rank]; 352 PetscFree(work); 353 354 /* post receives: */ 355 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 356 CHKPTRQ(rvalues); 357 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 358 CHKPTRQ(recv_waits); 359 for ( i=0; i<nrecvs; i++ ) { 360 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 361 } 362 363 /* do sends: 364 1) starts[i] gives the starting index in svalues for stuff going to 365 the ith processor 366 */ 367 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 368 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 369 CHKPTRQ(send_waits); 370 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 371 starts[0] = 0; 372 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 373 for ( i=0; i<N; i++ ) { 374 svalues[starts[owner[i]]++] = rows[i]; 375 } 376 ISRestoreIndices(is,&rows); 377 378 starts[0] = 0; 379 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 380 count = 0; 381 for ( i=0; i<size; i++ ) { 382 if (procs[i]) { 383 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 384 } 385 } 386 PetscFree(starts); 387 388 base = owners[rank]; 389 390 /* wait on receives */ 391 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 392 source = lens + nrecvs; 393 count = nrecvs; slen = 0; 394 while (count) { 395 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 396 /* unpack receives into our local space */ 397 MPI_Get_count(&recv_status,MPI_INT,&n); 398 source[imdex] = recv_status.MPI_SOURCE; 399 lens[imdex] = n; 400 slen += n; 401 count--; 402 } 403 PetscFree(recv_waits); 404 405 /* move the data into the send scatter */ 406 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 407 count = 0; 408 for ( i=0; i<nrecvs; i++ ) { 409 values = rvalues + i*nmax; 410 for ( j=0; j<lens[i]; j++ ) { 411 lrows[count++] = values[j] - base; 412 } 413 } 414 PetscFree(rvalues); PetscFree(lens); 415 PetscFree(owner); PetscFree(nprocs); 416 417 /* actually zap the local rows */ 418 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 419 PLogObjectParent(A,istmp); 420 PetscFree(lrows); 421 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 422 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 423 ierr = ISDestroy(istmp); CHKERRQ(ierr); 424 425 /* wait on sends */ 426 if (nsends) { 427 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 428 CHKPTRQ(send_status); 429 MPI_Waitall(nsends,send_waits,send_status); 430 PetscFree(send_status); 431 } 432 PetscFree(send_waits); PetscFree(svalues); 433 434 return 0; 435 } 436 437 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 438 { 439 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 440 int ierr; 441 442 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 443 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 444 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 445 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 446 return 0; 447 } 448 449 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 450 { 451 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 452 int ierr; 453 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 454 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 455 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 456 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 457 return 0; 458 } 459 460 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 461 { 462 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 463 int ierr; 464 465 /* do nondiagonal part */ 466 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 467 /* send it on its way */ 468 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 469 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 470 /* do local part */ 471 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 472 /* receive remote parts: note this assumes the values are not actually */ 473 /* inserted in yy until the next line, which is true for my implementation*/ 474 /* but is not perhaps always true. */ 475 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 476 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 477 return 0; 478 } 479 480 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 483 int ierr; 484 485 /* do nondiagonal part */ 486 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 487 /* send it on its way */ 488 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 489 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 490 /* do local part */ 491 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 492 /* receive remote parts: note this assumes the values are not actually */ 493 /* inserted in yy until the next line, which is true for my implementation*/ 494 /* but is not perhaps always true. */ 495 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 496 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 497 return 0; 498 } 499 500 /* 501 This only works correctly for square matrices where the subblock A->A is the 502 diagonal block 503 */ 504 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 505 { 506 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 507 return MatGetDiagonal(a->A,v); 508 } 509 510 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 511 { 512 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 513 int ierr; 514 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 515 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 516 return 0; 517 } 518 519 static int MatDestroy_MPIAIJ(PetscObject obj) 520 { 521 Mat mat = (Mat) obj; 522 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 523 int ierr; 524 #if defined(PETSC_LOG) 525 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 526 #endif 527 PetscFree(aij->rowners); 528 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 529 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 530 if (aij->colmap) PetscFree(aij->colmap); 531 if (aij->garray) PetscFree(aij->garray); 532 if (aij->lvec) VecDestroy(aij->lvec); 533 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 534 if (aij->rowvalues) PetscFree(aij->rowvalues); 535 PetscFree(aij); 536 PLogObjectDestroy(mat); 537 PetscHeaderDestroy(mat); 538 return 0; 539 } 540 #include "draw.h" 541 #include "pinclude/pviewer.h" 542 543 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 544 { 545 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 546 int ierr; 547 548 if (aij->size == 1) { 549 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 550 } 551 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 552 return 0; 553 } 554 555 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 556 { 557 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 558 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 559 int ierr, format,shift = C->indexshift,rank; 560 PetscObject vobj = (PetscObject) viewer; 561 FILE *fd; 562 563 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 564 ierr = ViewerFileGetFormat_Private(viewer,&format); 565 if (format == FILE_FORMAT_INFO_DETAILED) { 566 int nz, nzalloc, mem, flg; 567 MPI_Comm_rank(mat->comm,&rank); 568 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 569 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 570 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 571 MPIU_Seq_begin(mat->comm,1); 572 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 573 rank,aij->m,nz,nzalloc,mem); 574 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 575 rank,aij->m,nz,nzalloc,mem); 576 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 577 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 578 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 579 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 580 fflush(fd); 581 MPIU_Seq_end(mat->comm,1); 582 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 583 return 0; 584 } 585 else if (format == FILE_FORMAT_INFO) { 586 return 0; 587 } 588 } 589 590 if (vobj->type == ASCII_FILE_VIEWER) { 591 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 592 MPIU_Seq_begin(mat->comm,1); 593 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 594 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 595 aij->cend); 596 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 597 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 598 fflush(fd); 599 MPIU_Seq_end(mat->comm,1); 600 } 601 else { 602 int size = aij->size; 603 rank = aij->rank; 604 if (size == 1) { 605 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 606 } 607 else { 608 /* assemble the entire matrix onto first processor. */ 609 Mat A; 610 Mat_SeqAIJ *Aloc; 611 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 612 Scalar *a; 613 614 if (!rank) { 615 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 616 CHKERRQ(ierr); 617 } 618 else { 619 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 620 CHKERRQ(ierr); 621 } 622 PLogObjectParent(mat,A); 623 624 /* copy over the A part */ 625 Aloc = (Mat_SeqAIJ*) aij->A->data; 626 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 627 row = aij->rstart; 628 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 629 for ( i=0; i<m; i++ ) { 630 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 631 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 632 } 633 aj = Aloc->j; 634 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 635 636 /* copy over the B part */ 637 Aloc = (Mat_SeqAIJ*) aij->B->data; 638 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 639 row = aij->rstart; 640 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 641 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 642 for ( i=0; i<m; i++ ) { 643 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 644 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 645 } 646 PetscFree(ct); 647 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 648 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 649 if (!rank) { 650 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 651 } 652 ierr = MatDestroy(A); CHKERRQ(ierr); 653 } 654 } 655 return 0; 656 } 657 658 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 659 { 660 Mat mat = (Mat) obj; 661 int ierr; 662 PetscObject vobj = (PetscObject) viewer; 663 664 if (!viewer) { 665 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 666 } 667 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 668 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 669 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 670 } 671 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 672 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 673 } 674 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 675 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 676 } 677 else if (vobj->cookie == DRAW_COOKIE) { 678 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 679 } 680 else if (vobj->type == BINARY_FILE_VIEWER) { 681 return MatView_MPIAIJ_Binary(mat,viewer); 682 } 683 return 0; 684 } 685 686 extern int MatMarkDiag_SeqAIJ(Mat); 687 /* 688 This has to provide several versions. 689 690 1) per sequential 691 2) a) use only local smoothing updating outer values only once. 692 b) local smoothing updating outer values each inner iteration 693 3) color updating out values betwen colors. 694 */ 695 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 696 double fshift,int its,Vec xx) 697 { 698 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 699 Mat AA = mat->A, BB = mat->B; 700 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 701 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 702 int ierr,*idx, *diag; 703 int n = mat->n, m = mat->m, i,shift = A->indexshift; 704 Vec tt; 705 706 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 707 xs = x + shift; /* shift by one for index start of 1 */ 708 ls = ls + shift; 709 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 710 diag = A->diag; 711 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 712 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 713 } 714 if (flag & SOR_EISENSTAT) { 715 /* Let A = L + U + D; where L is lower trianglar, 716 U is upper triangular, E is diagonal; This routine applies 717 718 (L + E)^{-1} A (U + E)^{-1} 719 720 to a vector efficiently using Eisenstat's trick. This is for 721 the case of SSOR preconditioner, so E is D/omega where omega 722 is the relaxation factor. 723 */ 724 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 725 PLogObjectParent(matin,tt); 726 VecGetArray(tt,&t); 727 scale = (2.0/omega) - 1.0; 728 /* x = (E + U)^{-1} b */ 729 VecSet(&zero,mat->lvec); 730 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 731 mat->Mvctx); CHKERRQ(ierr); 732 for ( i=m-1; i>-1; i-- ) { 733 n = A->i[i+1] - diag[i] - 1; 734 idx = A->j + diag[i] + !shift; 735 v = A->a + diag[i] + !shift; 736 sum = b[i]; 737 SPARSEDENSEMDOT(sum,xs,v,idx,n); 738 d = fshift + A->a[diag[i]+shift]; 739 n = B->i[i+1] - B->i[i]; 740 idx = B->j + B->i[i] + shift; 741 v = B->a + B->i[i] + shift; 742 SPARSEDENSEMDOT(sum,ls,v,idx,n); 743 x[i] = omega*(sum/d); 744 } 745 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 746 mat->Mvctx); CHKERRQ(ierr); 747 748 /* t = b - (2*E - D)x */ 749 v = A->a; 750 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 751 752 /* t = (E + L)^{-1}t */ 753 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 754 diag = A->diag; 755 VecSet(&zero,mat->lvec); 756 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 757 mat->Mvctx); CHKERRQ(ierr); 758 for ( i=0; i<m; i++ ) { 759 n = diag[i] - A->i[i]; 760 idx = A->j + A->i[i] + shift; 761 v = A->a + A->i[i] + shift; 762 sum = t[i]; 763 SPARSEDENSEMDOT(sum,ts,v,idx,n); 764 d = fshift + A->a[diag[i]+shift]; 765 n = B->i[i+1] - B->i[i]; 766 idx = B->j + B->i[i] + shift; 767 v = B->a + B->i[i] + shift; 768 SPARSEDENSEMDOT(sum,ls,v,idx,n); 769 t[i] = omega*(sum/d); 770 } 771 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 772 mat->Mvctx); CHKERRQ(ierr); 773 /* x = x + t */ 774 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 775 VecDestroy(tt); 776 return 0; 777 } 778 779 780 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 781 if (flag & SOR_ZERO_INITIAL_GUESS) { 782 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 783 } 784 else { 785 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 786 CHKERRQ(ierr); 787 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 788 CHKERRQ(ierr); 789 } 790 while (its--) { 791 /* go down through the rows */ 792 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 793 mat->Mvctx); CHKERRQ(ierr); 794 for ( i=0; i<m; i++ ) { 795 n = A->i[i+1] - A->i[i]; 796 idx = A->j + A->i[i] + shift; 797 v = A->a + A->i[i] + shift; 798 sum = b[i]; 799 SPARSEDENSEMDOT(sum,xs,v,idx,n); 800 d = fshift + A->a[diag[i]+shift]; 801 n = B->i[i+1] - B->i[i]; 802 idx = B->j + B->i[i] + shift; 803 v = B->a + B->i[i] + shift; 804 SPARSEDENSEMDOT(sum,ls,v,idx,n); 805 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 806 } 807 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 808 mat->Mvctx); CHKERRQ(ierr); 809 /* come up through the rows */ 810 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 811 mat->Mvctx); CHKERRQ(ierr); 812 for ( i=m-1; i>-1; i-- ) { 813 n = A->i[i+1] - A->i[i]; 814 idx = A->j + A->i[i] + shift; 815 v = A->a + A->i[i] + shift; 816 sum = b[i]; 817 SPARSEDENSEMDOT(sum,xs,v,idx,n); 818 d = fshift + A->a[diag[i]+shift]; 819 n = B->i[i+1] - B->i[i]; 820 idx = B->j + B->i[i] + shift; 821 v = B->a + B->i[i] + shift; 822 SPARSEDENSEMDOT(sum,ls,v,idx,n); 823 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 824 } 825 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 826 mat->Mvctx); CHKERRQ(ierr); 827 } 828 } 829 else if (flag & SOR_FORWARD_SWEEP){ 830 if (flag & SOR_ZERO_INITIAL_GUESS) { 831 VecSet(&zero,mat->lvec); 832 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 833 mat->Mvctx); CHKERRQ(ierr); 834 for ( i=0; i<m; i++ ) { 835 n = diag[i] - A->i[i]; 836 idx = A->j + A->i[i] + shift; 837 v = A->a + A->i[i] + shift; 838 sum = b[i]; 839 SPARSEDENSEMDOT(sum,xs,v,idx,n); 840 d = fshift + A->a[diag[i]+shift]; 841 n = B->i[i+1] - B->i[i]; 842 idx = B->j + B->i[i] + shift; 843 v = B->a + B->i[i] + shift; 844 SPARSEDENSEMDOT(sum,ls,v,idx,n); 845 x[i] = omega*(sum/d); 846 } 847 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 848 mat->Mvctx); CHKERRQ(ierr); 849 its--; 850 } 851 while (its--) { 852 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 853 CHKERRQ(ierr); 854 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 855 CHKERRQ(ierr); 856 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 857 mat->Mvctx); CHKERRQ(ierr); 858 for ( i=0; i<m; i++ ) { 859 n = A->i[i+1] - A->i[i]; 860 idx = A->j + A->i[i] + shift; 861 v = A->a + A->i[i] + shift; 862 sum = b[i]; 863 SPARSEDENSEMDOT(sum,xs,v,idx,n); 864 d = fshift + A->a[diag[i]+shift]; 865 n = B->i[i+1] - B->i[i]; 866 idx = B->j + B->i[i] + shift; 867 v = B->a + B->i[i] + shift; 868 SPARSEDENSEMDOT(sum,ls,v,idx,n); 869 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 870 } 871 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 872 mat->Mvctx); CHKERRQ(ierr); 873 } 874 } 875 else if (flag & SOR_BACKWARD_SWEEP){ 876 if (flag & SOR_ZERO_INITIAL_GUESS) { 877 VecSet(&zero,mat->lvec); 878 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 879 mat->Mvctx); CHKERRQ(ierr); 880 for ( i=m-1; i>-1; i-- ) { 881 n = A->i[i+1] - diag[i] - 1; 882 idx = A->j + diag[i] + !shift; 883 v = A->a + diag[i] + !shift; 884 sum = b[i]; 885 SPARSEDENSEMDOT(sum,xs,v,idx,n); 886 d = fshift + A->a[diag[i]+shift]; 887 n = B->i[i+1] - B->i[i]; 888 idx = B->j + B->i[i] + shift; 889 v = B->a + B->i[i] + shift; 890 SPARSEDENSEMDOT(sum,ls,v,idx,n); 891 x[i] = omega*(sum/d); 892 } 893 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 894 mat->Mvctx); CHKERRQ(ierr); 895 its--; 896 } 897 while (its--) { 898 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 899 mat->Mvctx); CHKERRQ(ierr); 900 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 901 mat->Mvctx); CHKERRQ(ierr); 902 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 903 mat->Mvctx); CHKERRQ(ierr); 904 for ( i=m-1; i>-1; i-- ) { 905 n = A->i[i+1] - A->i[i]; 906 idx = A->j + A->i[i] + shift; 907 v = A->a + A->i[i] + shift; 908 sum = b[i]; 909 SPARSEDENSEMDOT(sum,xs,v,idx,n); 910 d = fshift + A->a[diag[i]+shift]; 911 n = B->i[i+1] - B->i[i]; 912 idx = B->j + B->i[i] + shift; 913 v = B->a + B->i[i] + shift; 914 SPARSEDENSEMDOT(sum,ls,v,idx,n); 915 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 916 } 917 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 918 mat->Mvctx); CHKERRQ(ierr); 919 } 920 } 921 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 922 if (flag & SOR_ZERO_INITIAL_GUESS) { 923 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 924 } 925 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 926 CHKERRQ(ierr); 927 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 928 CHKERRQ(ierr); 929 while (its--) { 930 /* go down through the rows */ 931 for ( i=0; i<m; i++ ) { 932 n = A->i[i+1] - A->i[i]; 933 idx = A->j + A->i[i] + shift; 934 v = A->a + A->i[i] + shift; 935 sum = b[i]; 936 SPARSEDENSEMDOT(sum,xs,v,idx,n); 937 d = fshift + A->a[diag[i]+shift]; 938 n = B->i[i+1] - B->i[i]; 939 idx = B->j + B->i[i] + shift; 940 v = B->a + B->i[i] + shift; 941 SPARSEDENSEMDOT(sum,ls,v,idx,n); 942 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 943 } 944 /* come up through the rows */ 945 for ( i=m-1; i>-1; i-- ) { 946 n = A->i[i+1] - A->i[i]; 947 idx = A->j + A->i[i] + shift; 948 v = A->a + A->i[i] + shift; 949 sum = b[i]; 950 SPARSEDENSEMDOT(sum,xs,v,idx,n); 951 d = fshift + A->a[diag[i]+shift]; 952 n = B->i[i+1] - B->i[i]; 953 idx = B->j + B->i[i] + shift; 954 v = B->a + B->i[i] + shift; 955 SPARSEDENSEMDOT(sum,ls,v,idx,n); 956 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 957 } 958 } 959 } 960 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 961 if (flag & SOR_ZERO_INITIAL_GUESS) { 962 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 963 } 964 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 965 CHKERRQ(ierr); 966 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 967 CHKERRQ(ierr); 968 while (its--) { 969 for ( i=0; i<m; i++ ) { 970 n = A->i[i+1] - A->i[i]; 971 idx = A->j + A->i[i] + shift; 972 v = A->a + A->i[i] + shift; 973 sum = b[i]; 974 SPARSEDENSEMDOT(sum,xs,v,idx,n); 975 d = fshift + A->a[diag[i]+shift]; 976 n = B->i[i+1] - B->i[i]; 977 idx = B->j + B->i[i] + shift; 978 v = B->a + B->i[i] + shift; 979 SPARSEDENSEMDOT(sum,ls,v,idx,n); 980 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 981 } 982 } 983 } 984 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 985 if (flag & SOR_ZERO_INITIAL_GUESS) { 986 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 987 } 988 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 989 mat->Mvctx); CHKERRQ(ierr); 990 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 991 mat->Mvctx); CHKERRQ(ierr); 992 while (its--) { 993 for ( i=m-1; i>-1; i-- ) { 994 n = A->i[i+1] - A->i[i]; 995 idx = A->j + A->i[i] + shift; 996 v = A->a + A->i[i] + shift; 997 sum = b[i]; 998 SPARSEDENSEMDOT(sum,xs,v,idx,n); 999 d = fshift + A->a[diag[i]+shift]; 1000 n = B->i[i+1] - B->i[i]; 1001 idx = B->j + B->i[i] + shift; 1002 v = B->a + B->i[i] + shift; 1003 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1004 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1005 } 1006 } 1007 } 1008 return 0; 1009 } 1010 1011 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1012 int *nzalloc,int *mem) 1013 { 1014 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1015 Mat A = mat->A, B = mat->B; 1016 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1017 1018 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1019 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1020 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1021 if (flag == MAT_LOCAL) { 1022 if (nz) *nz = isend[0]; 1023 if (nzalloc) *nzalloc = isend[1]; 1024 if (mem) *mem = isend[2]; 1025 } else if (flag == MAT_GLOBAL_MAX) { 1026 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1027 if (nz) *nz = irecv[0]; 1028 if (nzalloc) *nzalloc = irecv[1]; 1029 if (mem) *mem = irecv[2]; 1030 } else if (flag == MAT_GLOBAL_SUM) { 1031 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1032 if (nz) *nz = irecv[0]; 1033 if (nzalloc) *nzalloc = irecv[1]; 1034 if (mem) *mem = irecv[2]; 1035 } 1036 return 0; 1037 } 1038 1039 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1040 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1041 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1042 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1043 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1044 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1045 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1046 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1047 1048 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1051 1052 if (op == NO_NEW_NONZERO_LOCATIONS || 1053 op == YES_NEW_NONZERO_LOCATIONS || 1054 op == COLUMNS_SORTED || 1055 op == ROW_ORIENTED) { 1056 MatSetOption(a->A,op); 1057 MatSetOption(a->B,op); 1058 } 1059 else if (op == ROWS_SORTED || 1060 op == SYMMETRIC_MATRIX || 1061 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1062 op == YES_NEW_DIAGONALS) 1063 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1064 else if (op == COLUMN_ORIENTED) { 1065 a->roworiented = 0; 1066 MatSetOption(a->A,op); 1067 MatSetOption(a->B,op); 1068 } 1069 else if (op == NO_NEW_DIAGONALS) 1070 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1071 else 1072 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1073 return 0; 1074 } 1075 1076 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1077 { 1078 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1079 *m = mat->M; *n = mat->N; 1080 return 0; 1081 } 1082 1083 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1084 { 1085 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1086 *m = mat->m; *n = mat->N; 1087 return 0; 1088 } 1089 1090 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1091 { 1092 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1093 *m = mat->rstart; *n = mat->rend; 1094 return 0; 1095 } 1096 1097 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1098 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1099 1100 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1101 { 1102 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1103 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1104 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1105 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1106 int *cmap, *idx_p; 1107 1108 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 1109 mat->getrowactive = PETSC_TRUE; 1110 1111 if (!mat->rowvalues && (idx || v)) { 1112 /* 1113 allocate enough space to hold information from the longest row. 1114 */ 1115 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1116 int max = 1,n = mat->n,tmp; 1117 for ( i=0; i<n; i++ ) { 1118 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1119 if (max < tmp) { max = tmp; } 1120 } 1121 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1122 CHKPTRQ(mat->rowvalues); 1123 mat->rowindices = (int *) (mat->rowvalues + max); 1124 } 1125 1126 1127 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1128 lrow = row - rstart; 1129 1130 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1131 if (!v) {pvA = 0; pvB = 0;} 1132 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1133 ierr = MatGetRow_SeqAIJ(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1134 ierr = MatGetRow_SeqAIJ(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1135 nztot = nzA + nzB; 1136 1137 cmap = mat->garray; 1138 if (v || idx) { 1139 if (nztot) { 1140 /* Sort by increasing column numbers, assuming A and B already sorted */ 1141 int imark = -1; 1142 if (v) { 1143 *v = v_p = mat->rowvalues; 1144 for ( i=0; i<nzB; i++ ) { 1145 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1146 else break; 1147 } 1148 imark = i; 1149 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1150 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1151 } 1152 if (idx) { 1153 *idx = idx_p = mat->rowindices; 1154 if (imark > -1) { 1155 for ( i=0; i<imark; i++ ) { 1156 idx_p[i] = cmap[cworkB[i]]; 1157 } 1158 } else { 1159 for ( i=0; i<nzB; i++ ) { 1160 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1161 else break; 1162 } 1163 imark = i; 1164 } 1165 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1166 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1167 } 1168 } 1169 else {*idx = 0; *v=0;} 1170 } 1171 *nz = nztot; 1172 ierr = MatRestoreRow_SeqAIJ(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1173 ierr = MatRestoreRow_SeqAIJ(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1174 return 0; 1175 } 1176 1177 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1178 { 1179 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1180 if (aij->getrowactive == PETSC_FALSE) { 1181 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1182 } 1183 aij->getrowactive = PETSC_FALSE; 1184 return 0; 1185 } 1186 1187 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1188 { 1189 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1190 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1191 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1192 double sum = 0.0; 1193 Scalar *v; 1194 1195 if (aij->size == 1) { 1196 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1197 } else { 1198 if (type == NORM_FROBENIUS) { 1199 v = amat->a; 1200 for (i=0; i<amat->nz; i++ ) { 1201 #if defined(PETSC_COMPLEX) 1202 sum += real(conj(*v)*(*v)); v++; 1203 #else 1204 sum += (*v)*(*v); v++; 1205 #endif 1206 } 1207 v = bmat->a; 1208 for (i=0; i<bmat->nz; i++ ) { 1209 #if defined(PETSC_COMPLEX) 1210 sum += real(conj(*v)*(*v)); v++; 1211 #else 1212 sum += (*v)*(*v); v++; 1213 #endif 1214 } 1215 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1216 *norm = sqrt(*norm); 1217 } 1218 else if (type == NORM_1) { /* max column norm */ 1219 double *tmp, *tmp2; 1220 int *jj, *garray = aij->garray; 1221 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1222 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1223 PetscMemzero(tmp,aij->N*sizeof(double)); 1224 *norm = 0.0; 1225 v = amat->a; jj = amat->j; 1226 for ( j=0; j<amat->nz; j++ ) { 1227 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1228 } 1229 v = bmat->a; jj = bmat->j; 1230 for ( j=0; j<bmat->nz; j++ ) { 1231 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1232 } 1233 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1234 for ( j=0; j<aij->N; j++ ) { 1235 if (tmp2[j] > *norm) *norm = tmp2[j]; 1236 } 1237 PetscFree(tmp); PetscFree(tmp2); 1238 } 1239 else if (type == NORM_INFINITY) { /* max row norm */ 1240 double ntemp = 0.0; 1241 for ( j=0; j<amat->m; j++ ) { 1242 v = amat->a + amat->i[j] + shift; 1243 sum = 0.0; 1244 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1245 sum += PetscAbsScalar(*v); v++; 1246 } 1247 v = bmat->a + bmat->i[j] + shift; 1248 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1249 sum += PetscAbsScalar(*v); v++; 1250 } 1251 if (sum > ntemp) ntemp = sum; 1252 } 1253 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1254 } 1255 else { 1256 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1257 } 1258 } 1259 return 0; 1260 } 1261 1262 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1263 { 1264 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1265 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1266 int ierr,shift = Aloc->indexshift; 1267 Mat B; 1268 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1269 Scalar *array; 1270 1271 if (matout == PETSC_NULL && M != N) 1272 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1273 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1274 PETSC_NULL,&B); CHKERRQ(ierr); 1275 1276 /* copy over the A part */ 1277 Aloc = (Mat_SeqAIJ*) a->A->data; 1278 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1279 row = a->rstart; 1280 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1281 for ( i=0; i<m; i++ ) { 1282 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1283 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1284 } 1285 aj = Aloc->j; 1286 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1287 1288 /* copy over the B part */ 1289 Aloc = (Mat_SeqAIJ*) a->B->data; 1290 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1291 row = a->rstart; 1292 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1293 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1294 for ( i=0; i<m; i++ ) { 1295 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1296 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1297 } 1298 PetscFree(ct); 1299 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1300 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1301 if (matout != PETSC_NULL) { 1302 *matout = B; 1303 } else { 1304 /* This isn't really an in-place transpose .... but free data structures from a */ 1305 PetscFree(a->rowners); 1306 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1307 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1308 if (a->colmap) PetscFree(a->colmap); 1309 if (a->garray) PetscFree(a->garray); 1310 if (a->lvec) VecDestroy(a->lvec); 1311 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1312 PetscFree(a); 1313 PetscMemcpy(A,B,sizeof(struct _Mat)); 1314 PetscHeaderDestroy(B); 1315 } 1316 return 0; 1317 } 1318 1319 extern int MatPrintHelp_SeqAIJ(Mat); 1320 static int MatPrintHelp_MPIAIJ(Mat A) 1321 { 1322 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1323 1324 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1325 else return 0; 1326 } 1327 1328 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1329 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1330 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1331 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1332 /* -------------------------------------------------------------------*/ 1333 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1334 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1335 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1336 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1337 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1338 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1339 MatLUFactor_MPIAIJ,0, 1340 MatRelax_MPIAIJ, 1341 MatTranspose_MPIAIJ, 1342 MatGetInfo_MPIAIJ,0, 1343 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1344 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1345 0, 1346 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1347 MatGetReordering_MPIAIJ, 1348 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1349 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1350 MatILUFactorSymbolic_MPIAIJ,0, 1351 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1352 0,0,0, 1353 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1354 MatPrintHelp_MPIAIJ, 1355 MatScale_MPIAIJ}; 1356 1357 /*@C 1358 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1359 (the default parallel PETSc format). For good matrix assembly performance 1360 the user should preallocate the matrix storage by setting the parameters 1361 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1362 performance can be increased by more than a factor of 50. 1363 1364 Input Parameters: 1365 . comm - MPI communicator 1366 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1367 . n - number of local columns (or PETSC_DECIDE to have calculated 1368 if N is given) 1369 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1370 . N - number of global columns (or PETSC_DECIDE to have calculated 1371 if n is given) 1372 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1373 (same for all local rows) 1374 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1375 or null (possibly different for each row). You must leave room 1376 for the diagonal entry even if it is zero. 1377 . o_nz - number of nonzeros per row in off-diagonal portion of local 1378 submatrix (same for all local rows). 1379 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1380 submatrix or null (possibly different for each row). 1381 1382 Output Parameter: 1383 . newmat - the matrix 1384 1385 Notes: 1386 The AIJ format (also called the Yale sparse matrix format or 1387 compressed row storage), is fully compatible with standard Fortran 77 1388 storage. That is, the stored row and column indices can begin at 1389 either one (as in Fortran) or zero. See the users manual for details. 1390 1391 The user MUST specify either the local or global matrix dimensions 1392 (possibly both). 1393 1394 By default, this format uses inodes (identical nodes) when possible. 1395 We search for consecutive rows with the same nonzero structure, thereby 1396 reusing matrix information to achieve increased efficiency. 1397 1398 Options Database Keys: 1399 $ -mat_aij_no_inode - Do not use inodes 1400 $ -mat_aij_inode_limit <limit> - Set inode limit. 1401 $ (max limit=5) 1402 $ -mat_aij_oneindex - Internally use indexing starting at 1 1403 $ rather than 0. Note: When calling MatSetValues(), 1404 $ the user still MUST index entries starting at 0! 1405 1406 Storage Information: 1407 For a square global matrix we define each processor's diagonal portion 1408 to be its local rows and the corresponding columns (a square submatrix); 1409 each processor's off-diagonal portion encompasses the remainder of the 1410 local matrix (a rectangular submatrix). 1411 1412 The user can specify preallocated storage for the diagonal part of 1413 the local submatrix with either d_nz or d_nnz (not both). Set 1414 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1415 memory allocation. Likewise, specify preallocated storage for the 1416 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1417 1418 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1419 the figure below we depict these three local rows and all columns (0-11). 1420 1421 $ 0 1 2 3 4 5 6 7 8 9 10 11 1422 $ ------------------- 1423 $ row 3 | o o o d d d o o o o o o 1424 $ row 4 | o o o d d d o o o o o o 1425 $ row 5 | o o o d d d o o o o o o 1426 $ ------------------- 1427 $ 1428 1429 Thus, any entries in the d locations are stored in the d (diagonal) 1430 submatrix, and any entries in the o locations are stored in the 1431 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1432 stored simply in the MATSEQAIJ format for compressed row storage. 1433 1434 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1435 and o_nz should indicate the number of nonzeros per row in the o matrix. 1436 In general, for PDE problems in which most nonzeros are near the diagonal, 1437 one expects d_nz >> o_nz. For additional details, see the users manual 1438 chapter on matrices and the file $(PETSC_DIR)/Performance. 1439 1440 .keywords: matrix, aij, compressed row, sparse, parallel 1441 1442 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1443 @*/ 1444 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1445 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 1446 { 1447 Mat mat; 1448 Mat_MPIAIJ *a; 1449 int ierr, i,sum[2],work[2]; 1450 1451 *newmat = 0; 1452 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1453 PLogObjectCreate(mat); 1454 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1455 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1456 mat->destroy = MatDestroy_MPIAIJ; 1457 mat->view = MatView_MPIAIJ; 1458 mat->factor = 0; 1459 mat->assembled = PETSC_FALSE; 1460 1461 a->insertmode = NOT_SET_VALUES; 1462 MPI_Comm_rank(comm,&a->rank); 1463 MPI_Comm_size(comm,&a->size); 1464 1465 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1466 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1467 1468 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1469 work[0] = m; work[1] = n; 1470 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1471 if (M == PETSC_DECIDE) M = sum[0]; 1472 if (N == PETSC_DECIDE) N = sum[1]; 1473 } 1474 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1475 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1476 a->m = m; 1477 a->n = n; 1478 a->N = N; 1479 a->M = M; 1480 1481 /* build local table of row and column ownerships */ 1482 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1483 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1484 a->cowners = a->rowners + a->size + 1; 1485 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1486 a->rowners[0] = 0; 1487 for ( i=2; i<=a->size; i++ ) { 1488 a->rowners[i] += a->rowners[i-1]; 1489 } 1490 a->rstart = a->rowners[a->rank]; 1491 a->rend = a->rowners[a->rank+1]; 1492 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1493 a->cowners[0] = 0; 1494 for ( i=2; i<=a->size; i++ ) { 1495 a->cowners[i] += a->cowners[i-1]; 1496 } 1497 a->cstart = a->cowners[a->rank]; 1498 a->cend = a->cowners[a->rank+1]; 1499 1500 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1501 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1502 PLogObjectParent(mat,a->A); 1503 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1504 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1505 PLogObjectParent(mat,a->B); 1506 1507 /* build cache for off array entries formed */ 1508 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1509 a->colmap = 0; 1510 a->garray = 0; 1511 a->roworiented = 1; 1512 1513 /* stuff used for matrix vector multiply */ 1514 a->lvec = 0; 1515 a->Mvctx = 0; 1516 1517 /* stuff for MatGetRow() */ 1518 a->rowindices = 0; 1519 a->rowvalues = 0; 1520 a->getrowactive = PETSC_FALSE; 1521 1522 *newmat = mat; 1523 return 0; 1524 } 1525 1526 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1527 { 1528 Mat mat; 1529 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1530 int ierr, len,flg; 1531 1532 *newmat = 0; 1533 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1534 PLogObjectCreate(mat); 1535 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1536 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1537 mat->destroy = MatDestroy_MPIAIJ; 1538 mat->view = MatView_MPIAIJ; 1539 mat->factor = matin->factor; 1540 mat->assembled = PETSC_TRUE; 1541 1542 a->m = oldmat->m; 1543 a->n = oldmat->n; 1544 a->M = oldmat->M; 1545 a->N = oldmat->N; 1546 1547 a->rstart = oldmat->rstart; 1548 a->rend = oldmat->rend; 1549 a->cstart = oldmat->cstart; 1550 a->cend = oldmat->cend; 1551 a->size = oldmat->size; 1552 a->rank = oldmat->rank; 1553 a->insertmode = NOT_SET_VALUES; 1554 a->rowvalues = 0; 1555 a->getrowactive = PETSC_FALSE; 1556 1557 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1558 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1559 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1560 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1561 if (oldmat->colmap) { 1562 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1563 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1564 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1565 } else a->colmap = 0; 1566 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1567 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1568 PLogObjectMemory(mat,len*sizeof(int)); 1569 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1570 } else a->garray = 0; 1571 1572 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1573 PLogObjectParent(mat,a->lvec); 1574 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1575 PLogObjectParent(mat,a->Mvctx); 1576 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1577 PLogObjectParent(mat,a->A); 1578 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1579 PLogObjectParent(mat,a->B); 1580 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1581 if (flg) { 1582 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1583 } 1584 *newmat = mat; 1585 return 0; 1586 } 1587 1588 #include "sysio.h" 1589 1590 int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat) 1591 { 1592 Mat A; 1593 int i, nz, ierr, j,rstart, rend, fd; 1594 Scalar *vals,*svals; 1595 PetscObject vobj = (PetscObject) bview; 1596 MPI_Comm comm = vobj->comm; 1597 MPI_Status status; 1598 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1599 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1600 int tag = ((PetscObject)bview)->tag; 1601 1602 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1603 if (!rank) { 1604 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1605 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1606 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1607 } 1608 1609 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1610 M = header[1]; N = header[2]; 1611 /* determine ownership of all rows */ 1612 m = M/size + ((M % size) > rank); 1613 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1614 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1615 rowners[0] = 0; 1616 for ( i=2; i<=size; i++ ) { 1617 rowners[i] += rowners[i-1]; 1618 } 1619 rstart = rowners[rank]; 1620 rend = rowners[rank+1]; 1621 1622 /* distribute row lengths to all processors */ 1623 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1624 offlens = ourlens + (rend-rstart); 1625 if (!rank) { 1626 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1627 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1628 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1629 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1630 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1631 PetscFree(sndcounts); 1632 } 1633 else { 1634 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1635 } 1636 1637 if (!rank) { 1638 /* calculate the number of nonzeros on each processor */ 1639 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1640 PetscMemzero(procsnz,size*sizeof(int)); 1641 for ( i=0; i<size; i++ ) { 1642 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1643 procsnz[i] += rowlengths[j]; 1644 } 1645 } 1646 PetscFree(rowlengths); 1647 1648 /* determine max buffer needed and allocate it */ 1649 maxnz = 0; 1650 for ( i=0; i<size; i++ ) { 1651 maxnz = PetscMax(maxnz,procsnz[i]); 1652 } 1653 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1654 1655 /* read in my part of the matrix column indices */ 1656 nz = procsnz[0]; 1657 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1658 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1659 1660 /* read in every one elses and ship off */ 1661 for ( i=1; i<size; i++ ) { 1662 nz = procsnz[i]; 1663 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1664 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1665 } 1666 PetscFree(cols); 1667 } 1668 else { 1669 /* determine buffer space needed for message */ 1670 nz = 0; 1671 for ( i=0; i<m; i++ ) { 1672 nz += ourlens[i]; 1673 } 1674 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1675 1676 /* receive message of column indices*/ 1677 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1678 MPI_Get_count(&status,MPI_INT,&maxnz); 1679 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1680 } 1681 1682 /* loop over local rows, determining number of off diagonal entries */ 1683 PetscMemzero(offlens,m*sizeof(int)); 1684 jj = 0; 1685 for ( i=0; i<m; i++ ) { 1686 for ( j=0; j<ourlens[i]; j++ ) { 1687 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1688 jj++; 1689 } 1690 } 1691 1692 /* create our matrix */ 1693 for ( i=0; i<m; i++ ) { 1694 ourlens[i] -= offlens[i]; 1695 } 1696 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1697 A = *newmat; 1698 MatSetOption(A,COLUMNS_SORTED); 1699 for ( i=0; i<m; i++ ) { 1700 ourlens[i] += offlens[i]; 1701 } 1702 1703 if (!rank) { 1704 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1705 1706 /* read in my part of the matrix numerical values */ 1707 nz = procsnz[0]; 1708 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1709 1710 /* insert into matrix */ 1711 jj = rstart; 1712 smycols = mycols; 1713 svals = vals; 1714 for ( i=0; i<m; i++ ) { 1715 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1716 smycols += ourlens[i]; 1717 svals += ourlens[i]; 1718 jj++; 1719 } 1720 1721 /* read in other processors and ship out */ 1722 for ( i=1; i<size; i++ ) { 1723 nz = procsnz[i]; 1724 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1725 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1726 } 1727 PetscFree(procsnz); 1728 } 1729 else { 1730 /* receive numeric values */ 1731 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1732 1733 /* receive message of values*/ 1734 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1735 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1736 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1737 1738 /* insert into matrix */ 1739 jj = rstart; 1740 smycols = mycols; 1741 svals = vals; 1742 for ( i=0; i<m; i++ ) { 1743 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1744 smycols += ourlens[i]; 1745 svals += ourlens[i]; 1746 jj++; 1747 } 1748 } 1749 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1750 1751 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1752 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1753 return 0; 1754 } 1755