1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.131 1996/03/08 05:47:18 bsmith Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 col = aij->colmap[idxn[j]] + shift; 117 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118 } 119 } 120 } 121 else { 122 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123 } 124 } 125 return 0; 126 } 127 128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 MPI_Comm comm = mat->comm; 132 int size = aij->size, *owners = aij->rowners; 133 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 134 MPI_Request *send_waits,*recv_waits; 135 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 136 InsertMode addv; 137 Scalar *rvalues,*svalues; 138 139 /* make sure all processors are either in INSERTMODE or ADDMODE */ 140 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141 if (addv == (ADD_VALUES|INSERT_VALUES)) { 142 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 143 } 144 aij->insertmode = addv; /* in case this processor had no cache */ 145 146 /* first count number of contributors to each processor */ 147 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 149 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 150 for ( i=0; i<aij->stash.n; i++ ) { 151 idx = aij->stash.idx[i]; 152 for ( j=0; j<size; j++ ) { 153 if (idx >= owners[j] && idx < owners[j+1]) { 154 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 155 } 156 } 157 } 158 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 159 160 /* inform other processors of number of messages and max length*/ 161 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 162 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 163 nreceives = work[rank]; 164 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 165 nmax = work[rank]; 166 PetscFree(work); 167 168 /* post receives: 169 1) each message will consist of ordered pairs 170 (global index,value) we store the global index as a double 171 to simplify the message passing. 172 2) since we don't know how long each individual message is we 173 allocate the largest needed buffer for each receive. Potentially 174 this is a lot of wasted space. 175 176 177 This could be done better. 178 */ 179 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 180 CHKPTRQ(rvalues); 181 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 182 CHKPTRQ(recv_waits); 183 for ( i=0; i<nreceives; i++ ) { 184 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 185 comm,recv_waits+i); 186 } 187 188 /* do sends: 189 1) starts[i] gives the starting index in svalues for stuff going to 190 the ith processor 191 */ 192 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 193 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTRQ(send_waits); 195 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 196 starts[0] = 0; 197 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 PetscFree(owner); 204 starts[0] = 0; 205 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<size; i++ ) { 208 if (procs[i]) { 209 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 PetscFree(starts); PetscFree(nprocs); 214 215 /* Free cache space */ 216 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 217 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 229 { 230 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 232 MPI_Status *send_status,recv_status; 233 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234 int row,col,other_disassembled,shift = C->indexshift; 235 Scalar *values,val; 236 InsertMode addv = aij->insertmode; 237 238 /* wait on receives */ 239 while (count) { 240 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 241 /* unpack receives into our local space */ 242 values = aij->rvalues + 3*imdex*aij->rmax; 243 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 244 n = n/3; 245 for ( i=0; i<n; i++ ) { 246 row = (int) PetscReal(values[3*i]) - aij->rstart; 247 col = (int) PetscReal(values[3*i+1]); 248 val = values[3*i+2]; 249 if (col >= aij->cstart && col < aij->cend) { 250 col -= aij->cstart; 251 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 252 } 253 else { 254 if (mat->was_assembled) { 255 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256 col = aij->colmap[col] + shift; 257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 258 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259 col = (int) PetscReal(values[3*i+1]); 260 } 261 } 262 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 263 } 264 } 265 count--; 266 } 267 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 268 269 /* wait on sends */ 270 if (aij->nsends) { 271 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 272 CHKPTRQ(send_status); 273 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 274 PetscFree(send_status); 275 } 276 PetscFree(aij->send_waits); PetscFree(aij->svalues); 277 278 aij->insertmode = NOT_SET_VALUES; 279 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 280 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 281 282 /* determine if any processor has disassembled, if so we must 283 also disassemble ourselfs, in order that we may reassemble. */ 284 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285 if (mat->was_assembled && !other_disassembled) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 } 288 289 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 290 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 291 } 292 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 293 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 294 295 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 296 return 0; 297 } 298 299 static int MatZeroEntries_MPIAIJ(Mat A) 300 { 301 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 302 int ierr; 303 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 304 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 305 return 0; 306 } 307 308 /* the code does not do the diagonal entries correctly unless the 309 matrix is square and the column and row owerships are identical. 310 This is a BUG. The only way to fix it seems to be to access 311 aij->A and aij->B directly and not through the MatZeroRows() 312 routine. 313 */ 314 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 315 { 316 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 317 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 318 int *procs,*nprocs,j,found,idx,nsends,*work; 319 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 320 int *rvalues,tag = A->tag,count,base,slen,n,*source; 321 int *lens,imdex,*lrows,*values; 322 MPI_Comm comm = A->comm; 323 MPI_Request *send_waits,*recv_waits; 324 MPI_Status recv_status,*send_status; 325 IS istmp; 326 327 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 328 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 329 330 /* first count number of contributors to each processor */ 331 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 332 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 333 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 334 for ( i=0; i<N; i++ ) { 335 idx = rows[i]; 336 found = 0; 337 for ( j=0; j<size; j++ ) { 338 if (idx >= owners[j] && idx < owners[j+1]) { 339 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 340 } 341 } 342 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 343 } 344 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 345 346 /* inform other processors of number of messages and max length*/ 347 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 348 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 349 nrecvs = work[rank]; 350 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 351 nmax = work[rank]; 352 PetscFree(work); 353 354 /* post receives: */ 355 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 356 CHKPTRQ(rvalues); 357 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 358 CHKPTRQ(recv_waits); 359 for ( i=0; i<nrecvs; i++ ) { 360 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 361 } 362 363 /* do sends: 364 1) starts[i] gives the starting index in svalues for stuff going to 365 the ith processor 366 */ 367 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 368 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 369 CHKPTRQ(send_waits); 370 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 371 starts[0] = 0; 372 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 373 for ( i=0; i<N; i++ ) { 374 svalues[starts[owner[i]]++] = rows[i]; 375 } 376 ISRestoreIndices(is,&rows); 377 378 starts[0] = 0; 379 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 380 count = 0; 381 for ( i=0; i<size; i++ ) { 382 if (procs[i]) { 383 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 384 } 385 } 386 PetscFree(starts); 387 388 base = owners[rank]; 389 390 /* wait on receives */ 391 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 392 source = lens + nrecvs; 393 count = nrecvs; slen = 0; 394 while (count) { 395 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 396 /* unpack receives into our local space */ 397 MPI_Get_count(&recv_status,MPI_INT,&n); 398 source[imdex] = recv_status.MPI_SOURCE; 399 lens[imdex] = n; 400 slen += n; 401 count--; 402 } 403 PetscFree(recv_waits); 404 405 /* move the data into the send scatter */ 406 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 407 count = 0; 408 for ( i=0; i<nrecvs; i++ ) { 409 values = rvalues + i*nmax; 410 for ( j=0; j<lens[i]; j++ ) { 411 lrows[count++] = values[j] - base; 412 } 413 } 414 PetscFree(rvalues); PetscFree(lens); 415 PetscFree(owner); PetscFree(nprocs); 416 417 /* actually zap the local rows */ 418 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 419 PLogObjectParent(A,istmp); 420 PetscFree(lrows); 421 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 422 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 423 ierr = ISDestroy(istmp); CHKERRQ(ierr); 424 425 /* wait on sends */ 426 if (nsends) { 427 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 428 CHKPTRQ(send_status); 429 MPI_Waitall(nsends,send_waits,send_status); 430 PetscFree(send_status); 431 } 432 PetscFree(send_waits); PetscFree(svalues); 433 434 return 0; 435 } 436 437 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 438 { 439 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 440 int ierr; 441 442 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 443 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 444 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 445 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 446 return 0; 447 } 448 449 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 450 { 451 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 452 int ierr; 453 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 454 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 455 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 456 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 457 return 0; 458 } 459 460 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 461 { 462 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 463 int ierr; 464 465 /* do nondiagonal part */ 466 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 467 /* send it on its way */ 468 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 469 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 470 /* do local part */ 471 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 472 /* receive remote parts: note this assumes the values are not actually */ 473 /* inserted in yy until the next line, which is true for my implementation*/ 474 /* but is not perhaps always true. */ 475 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 476 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 477 return 0; 478 } 479 480 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 483 int ierr; 484 485 /* do nondiagonal part */ 486 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 487 /* send it on its way */ 488 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 489 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 490 /* do local part */ 491 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 492 /* receive remote parts: note this assumes the values are not actually */ 493 /* inserted in yy until the next line, which is true for my implementation*/ 494 /* but is not perhaps always true. */ 495 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 496 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 497 return 0; 498 } 499 500 /* 501 This only works correctly for square matrices where the subblock A->A is the 502 diagonal block 503 */ 504 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 505 { 506 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 507 return MatGetDiagonal(a->A,v); 508 } 509 510 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 511 { 512 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 513 int ierr; 514 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 515 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 516 return 0; 517 } 518 519 static int MatDestroy_MPIAIJ(PetscObject obj) 520 { 521 Mat mat = (Mat) obj; 522 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 523 int ierr; 524 #if defined(PETSC_LOG) 525 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 526 #endif 527 PetscFree(aij->rowners); 528 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 529 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 530 if (aij->colmap) PetscFree(aij->colmap); 531 if (aij->garray) PetscFree(aij->garray); 532 if (aij->lvec) VecDestroy(aij->lvec); 533 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 534 if (aij->rowvalues) PetscFree(aij->rowvalues); 535 PetscFree(aij); 536 PLogObjectDestroy(mat); 537 PetscHeaderDestroy(mat); 538 return 0; 539 } 540 #include "draw.h" 541 #include "pinclude/pviewer.h" 542 543 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 544 { 545 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 546 int ierr; 547 548 if (aij->size == 1) { 549 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 550 } 551 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 552 return 0; 553 } 554 555 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 556 { 557 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 558 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 559 int ierr, format,shift = C->indexshift,rank; 560 FILE *fd; 561 ViewerType vtype; 562 563 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 564 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 565 ierr = ViewerFileGetFormat_Private(viewer,&format); 566 if (format == FILE_FORMAT_INFO_DETAILED) { 567 int nz, nzalloc, mem, flg; 568 MPI_Comm_rank(mat->comm,&rank); 569 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 570 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 571 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 572 MPIU_Seq_begin(mat->comm,1); 573 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 574 rank,aij->m,nz,nzalloc,mem); 575 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 576 rank,aij->m,nz,nzalloc,mem); 577 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 578 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 579 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 580 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 581 fflush(fd); 582 MPIU_Seq_end(mat->comm,1); 583 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 584 return 0; 585 } 586 else if (format == FILE_FORMAT_INFO) { 587 return 0; 588 } 589 } 590 591 if (vtype == DRAW_VIEWER) { 592 Draw draw; 593 PetscTruth isnull; 594 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 595 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 596 } 597 598 if (vtype == ASCII_FILE_VIEWER) { 599 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 600 MPIU_Seq_begin(mat->comm,1); 601 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 602 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 603 aij->cend); 604 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 605 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 606 fflush(fd); 607 MPIU_Seq_end(mat->comm,1); 608 } 609 else { 610 int size = aij->size; 611 rank = aij->rank; 612 if (size == 1) { 613 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 614 } 615 else { 616 /* assemble the entire matrix onto first processor. */ 617 Mat A; 618 Mat_SeqAIJ *Aloc; 619 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 620 Scalar *a; 621 622 if (!rank) { 623 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 624 CHKERRQ(ierr); 625 } 626 else { 627 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 628 CHKERRQ(ierr); 629 } 630 PLogObjectParent(mat,A); 631 632 /* copy over the A part */ 633 Aloc = (Mat_SeqAIJ*) aij->A->data; 634 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 635 row = aij->rstart; 636 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 637 for ( i=0; i<m; i++ ) { 638 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 639 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 640 } 641 aj = Aloc->j; 642 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 643 644 /* copy over the B part */ 645 Aloc = (Mat_SeqAIJ*) aij->B->data; 646 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 647 row = aij->rstart; 648 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 649 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 650 for ( i=0; i<m; i++ ) { 651 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 652 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 653 } 654 PetscFree(ct); 655 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 656 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 657 if (!rank) { 658 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 659 } 660 ierr = MatDestroy(A); CHKERRQ(ierr); 661 } 662 } 663 return 0; 664 } 665 666 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 667 { 668 Mat mat = (Mat) obj; 669 int ierr; 670 ViewerType vtype; 671 672 if (!viewer) { 673 viewer = STDOUT_VIEWER_SELF; 674 } 675 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 676 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 677 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 678 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 679 } 680 else if (vtype == BINARY_FILE_VIEWER) { 681 return MatView_MPIAIJ_Binary(mat,viewer); 682 } 683 return 0; 684 } 685 686 extern int MatMarkDiag_SeqAIJ(Mat); 687 /* 688 This has to provide several versions. 689 690 1) per sequential 691 2) a) use only local smoothing updating outer values only once. 692 b) local smoothing updating outer values each inner iteration 693 3) color updating out values betwen colors. 694 */ 695 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 696 double fshift,int its,Vec xx) 697 { 698 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 699 Mat AA = mat->A, BB = mat->B; 700 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 701 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 702 int ierr,*idx, *diag; 703 int n = mat->n, m = mat->m, i,shift = A->indexshift; 704 Vec tt; 705 706 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 707 xs = x + shift; /* shift by one for index start of 1 */ 708 ls = ls + shift; 709 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 710 diag = A->diag; 711 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 712 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 713 } 714 if (flag & SOR_EISENSTAT) { 715 /* Let A = L + U + D; where L is lower trianglar, 716 U is upper triangular, E is diagonal; This routine applies 717 718 (L + E)^{-1} A (U + E)^{-1} 719 720 to a vector efficiently using Eisenstat's trick. This is for 721 the case of SSOR preconditioner, so E is D/omega where omega 722 is the relaxation factor. 723 */ 724 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 725 PLogObjectParent(matin,tt); 726 VecGetArray(tt,&t); 727 scale = (2.0/omega) - 1.0; 728 /* x = (E + U)^{-1} b */ 729 VecSet(&zero,mat->lvec); 730 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 731 mat->Mvctx); CHKERRQ(ierr); 732 for ( i=m-1; i>-1; i-- ) { 733 n = A->i[i+1] - diag[i] - 1; 734 idx = A->j + diag[i] + !shift; 735 v = A->a + diag[i] + !shift; 736 sum = b[i]; 737 SPARSEDENSEMDOT(sum,xs,v,idx,n); 738 d = fshift + A->a[diag[i]+shift]; 739 n = B->i[i+1] - B->i[i]; 740 idx = B->j + B->i[i] + shift; 741 v = B->a + B->i[i] + shift; 742 SPARSEDENSEMDOT(sum,ls,v,idx,n); 743 x[i] = omega*(sum/d); 744 } 745 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 746 mat->Mvctx); CHKERRQ(ierr); 747 748 /* t = b - (2*E - D)x */ 749 v = A->a; 750 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 751 752 /* t = (E + L)^{-1}t */ 753 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 754 diag = A->diag; 755 VecSet(&zero,mat->lvec); 756 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 757 mat->Mvctx); CHKERRQ(ierr); 758 for ( i=0; i<m; i++ ) { 759 n = diag[i] - A->i[i]; 760 idx = A->j + A->i[i] + shift; 761 v = A->a + A->i[i] + shift; 762 sum = t[i]; 763 SPARSEDENSEMDOT(sum,ts,v,idx,n); 764 d = fshift + A->a[diag[i]+shift]; 765 n = B->i[i+1] - B->i[i]; 766 idx = B->j + B->i[i] + shift; 767 v = B->a + B->i[i] + shift; 768 SPARSEDENSEMDOT(sum,ls,v,idx,n); 769 t[i] = omega*(sum/d); 770 } 771 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 772 mat->Mvctx); CHKERRQ(ierr); 773 /* x = x + t */ 774 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 775 VecDestroy(tt); 776 return 0; 777 } 778 779 780 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 781 if (flag & SOR_ZERO_INITIAL_GUESS) { 782 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 783 } 784 else { 785 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 786 CHKERRQ(ierr); 787 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 788 CHKERRQ(ierr); 789 } 790 while (its--) { 791 /* go down through the rows */ 792 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 793 mat->Mvctx); CHKERRQ(ierr); 794 for ( i=0; i<m; i++ ) { 795 n = A->i[i+1] - A->i[i]; 796 idx = A->j + A->i[i] + shift; 797 v = A->a + A->i[i] + shift; 798 sum = b[i]; 799 SPARSEDENSEMDOT(sum,xs,v,idx,n); 800 d = fshift + A->a[diag[i]+shift]; 801 n = B->i[i+1] - B->i[i]; 802 idx = B->j + B->i[i] + shift; 803 v = B->a + B->i[i] + shift; 804 SPARSEDENSEMDOT(sum,ls,v,idx,n); 805 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 806 } 807 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 808 mat->Mvctx); CHKERRQ(ierr); 809 /* come up through the rows */ 810 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 811 mat->Mvctx); CHKERRQ(ierr); 812 for ( i=m-1; i>-1; i-- ) { 813 n = A->i[i+1] - A->i[i]; 814 idx = A->j + A->i[i] + shift; 815 v = A->a + A->i[i] + shift; 816 sum = b[i]; 817 SPARSEDENSEMDOT(sum,xs,v,idx,n); 818 d = fshift + A->a[diag[i]+shift]; 819 n = B->i[i+1] - B->i[i]; 820 idx = B->j + B->i[i] + shift; 821 v = B->a + B->i[i] + shift; 822 SPARSEDENSEMDOT(sum,ls,v,idx,n); 823 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 824 } 825 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 826 mat->Mvctx); CHKERRQ(ierr); 827 } 828 } 829 else if (flag & SOR_FORWARD_SWEEP){ 830 if (flag & SOR_ZERO_INITIAL_GUESS) { 831 VecSet(&zero,mat->lvec); 832 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 833 mat->Mvctx); CHKERRQ(ierr); 834 for ( i=0; i<m; i++ ) { 835 n = diag[i] - A->i[i]; 836 idx = A->j + A->i[i] + shift; 837 v = A->a + A->i[i] + shift; 838 sum = b[i]; 839 SPARSEDENSEMDOT(sum,xs,v,idx,n); 840 d = fshift + A->a[diag[i]+shift]; 841 n = B->i[i+1] - B->i[i]; 842 idx = B->j + B->i[i] + shift; 843 v = B->a + B->i[i] + shift; 844 SPARSEDENSEMDOT(sum,ls,v,idx,n); 845 x[i] = omega*(sum/d); 846 } 847 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 848 mat->Mvctx); CHKERRQ(ierr); 849 its--; 850 } 851 while (its--) { 852 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 853 CHKERRQ(ierr); 854 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 855 CHKERRQ(ierr); 856 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 857 mat->Mvctx); CHKERRQ(ierr); 858 for ( i=0; i<m; i++ ) { 859 n = A->i[i+1] - A->i[i]; 860 idx = A->j + A->i[i] + shift; 861 v = A->a + A->i[i] + shift; 862 sum = b[i]; 863 SPARSEDENSEMDOT(sum,xs,v,idx,n); 864 d = fshift + A->a[diag[i]+shift]; 865 n = B->i[i+1] - B->i[i]; 866 idx = B->j + B->i[i] + shift; 867 v = B->a + B->i[i] + shift; 868 SPARSEDENSEMDOT(sum,ls,v,idx,n); 869 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 870 } 871 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 872 mat->Mvctx); CHKERRQ(ierr); 873 } 874 } 875 else if (flag & SOR_BACKWARD_SWEEP){ 876 if (flag & SOR_ZERO_INITIAL_GUESS) { 877 VecSet(&zero,mat->lvec); 878 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 879 mat->Mvctx); CHKERRQ(ierr); 880 for ( i=m-1; i>-1; i-- ) { 881 n = A->i[i+1] - diag[i] - 1; 882 idx = A->j + diag[i] + !shift; 883 v = A->a + diag[i] + !shift; 884 sum = b[i]; 885 SPARSEDENSEMDOT(sum,xs,v,idx,n); 886 d = fshift + A->a[diag[i]+shift]; 887 n = B->i[i+1] - B->i[i]; 888 idx = B->j + B->i[i] + shift; 889 v = B->a + B->i[i] + shift; 890 SPARSEDENSEMDOT(sum,ls,v,idx,n); 891 x[i] = omega*(sum/d); 892 } 893 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 894 mat->Mvctx); CHKERRQ(ierr); 895 its--; 896 } 897 while (its--) { 898 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 899 mat->Mvctx); CHKERRQ(ierr); 900 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 901 mat->Mvctx); CHKERRQ(ierr); 902 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 903 mat->Mvctx); CHKERRQ(ierr); 904 for ( i=m-1; i>-1; i-- ) { 905 n = A->i[i+1] - A->i[i]; 906 idx = A->j + A->i[i] + shift; 907 v = A->a + A->i[i] + shift; 908 sum = b[i]; 909 SPARSEDENSEMDOT(sum,xs,v,idx,n); 910 d = fshift + A->a[diag[i]+shift]; 911 n = B->i[i+1] - B->i[i]; 912 idx = B->j + B->i[i] + shift; 913 v = B->a + B->i[i] + shift; 914 SPARSEDENSEMDOT(sum,ls,v,idx,n); 915 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 916 } 917 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 918 mat->Mvctx); CHKERRQ(ierr); 919 } 920 } 921 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 922 if (flag & SOR_ZERO_INITIAL_GUESS) { 923 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 924 } 925 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 926 CHKERRQ(ierr); 927 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 928 CHKERRQ(ierr); 929 while (its--) { 930 /* go down through the rows */ 931 for ( i=0; i<m; i++ ) { 932 n = A->i[i+1] - A->i[i]; 933 idx = A->j + A->i[i] + shift; 934 v = A->a + A->i[i] + shift; 935 sum = b[i]; 936 SPARSEDENSEMDOT(sum,xs,v,idx,n); 937 d = fshift + A->a[diag[i]+shift]; 938 n = B->i[i+1] - B->i[i]; 939 idx = B->j + B->i[i] + shift; 940 v = B->a + B->i[i] + shift; 941 SPARSEDENSEMDOT(sum,ls,v,idx,n); 942 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 943 } 944 /* come up through the rows */ 945 for ( i=m-1; i>-1; i-- ) { 946 n = A->i[i+1] - A->i[i]; 947 idx = A->j + A->i[i] + shift; 948 v = A->a + A->i[i] + shift; 949 sum = b[i]; 950 SPARSEDENSEMDOT(sum,xs,v,idx,n); 951 d = fshift + A->a[diag[i]+shift]; 952 n = B->i[i+1] - B->i[i]; 953 idx = B->j + B->i[i] + shift; 954 v = B->a + B->i[i] + shift; 955 SPARSEDENSEMDOT(sum,ls,v,idx,n); 956 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 957 } 958 } 959 } 960 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 961 if (flag & SOR_ZERO_INITIAL_GUESS) { 962 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 963 } 964 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 965 CHKERRQ(ierr); 966 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 967 CHKERRQ(ierr); 968 while (its--) { 969 for ( i=0; i<m; i++ ) { 970 n = A->i[i+1] - A->i[i]; 971 idx = A->j + A->i[i] + shift; 972 v = A->a + A->i[i] + shift; 973 sum = b[i]; 974 SPARSEDENSEMDOT(sum,xs,v,idx,n); 975 d = fshift + A->a[diag[i]+shift]; 976 n = B->i[i+1] - B->i[i]; 977 idx = B->j + B->i[i] + shift; 978 v = B->a + B->i[i] + shift; 979 SPARSEDENSEMDOT(sum,ls,v,idx,n); 980 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 981 } 982 } 983 } 984 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 985 if (flag & SOR_ZERO_INITIAL_GUESS) { 986 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 987 } 988 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 989 mat->Mvctx); CHKERRQ(ierr); 990 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 991 mat->Mvctx); CHKERRQ(ierr); 992 while (its--) { 993 for ( i=m-1; i>-1; i-- ) { 994 n = A->i[i+1] - A->i[i]; 995 idx = A->j + A->i[i] + shift; 996 v = A->a + A->i[i] + shift; 997 sum = b[i]; 998 SPARSEDENSEMDOT(sum,xs,v,idx,n); 999 d = fshift + A->a[diag[i]+shift]; 1000 n = B->i[i+1] - B->i[i]; 1001 idx = B->j + B->i[i] + shift; 1002 v = B->a + B->i[i] + shift; 1003 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1004 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1005 } 1006 } 1007 } 1008 return 0; 1009 } 1010 1011 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1012 int *nzalloc,int *mem) 1013 { 1014 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1015 Mat A = mat->A, B = mat->B; 1016 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1017 1018 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1019 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1020 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1021 if (flag == MAT_LOCAL) { 1022 if (nz) *nz = isend[0]; 1023 if (nzalloc) *nzalloc = isend[1]; 1024 if (mem) *mem = isend[2]; 1025 } else if (flag == MAT_GLOBAL_MAX) { 1026 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1027 if (nz) *nz = irecv[0]; 1028 if (nzalloc) *nzalloc = irecv[1]; 1029 if (mem) *mem = irecv[2]; 1030 } else if (flag == MAT_GLOBAL_SUM) { 1031 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1032 if (nz) *nz = irecv[0]; 1033 if (nzalloc) *nzalloc = irecv[1]; 1034 if (mem) *mem = irecv[2]; 1035 } 1036 return 0; 1037 } 1038 1039 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1040 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1041 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1042 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1043 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1044 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1045 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1046 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1047 1048 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1051 1052 if (op == NO_NEW_NONZERO_LOCATIONS || 1053 op == YES_NEW_NONZERO_LOCATIONS || 1054 op == COLUMNS_SORTED || 1055 op == ROW_ORIENTED) { 1056 MatSetOption(a->A,op); 1057 MatSetOption(a->B,op); 1058 } 1059 else if (op == ROWS_SORTED || 1060 op == SYMMETRIC_MATRIX || 1061 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1062 op == YES_NEW_DIAGONALS) 1063 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1064 else if (op == COLUMN_ORIENTED) { 1065 a->roworiented = 0; 1066 MatSetOption(a->A,op); 1067 MatSetOption(a->B,op); 1068 } 1069 else if (op == NO_NEW_DIAGONALS) 1070 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1071 else 1072 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1073 return 0; 1074 } 1075 1076 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1077 { 1078 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1079 *m = mat->M; *n = mat->N; 1080 return 0; 1081 } 1082 1083 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1084 { 1085 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1086 *m = mat->m; *n = mat->N; 1087 return 0; 1088 } 1089 1090 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1091 { 1092 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1093 *m = mat->rstart; *n = mat->rend; 1094 return 0; 1095 } 1096 1097 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1098 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1099 1100 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1101 { 1102 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1103 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1104 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1105 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1106 int *cmap, *idx_p; 1107 1108 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 1109 mat->getrowactive = PETSC_TRUE; 1110 1111 if (!mat->rowvalues && (idx || v)) { 1112 /* 1113 allocate enough space to hold information from the longest row. 1114 */ 1115 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1116 int max = 1,n = mat->n,tmp; 1117 for ( i=0; i<n; i++ ) { 1118 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1119 if (max < tmp) { max = tmp; } 1120 } 1121 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1122 CHKPTRQ(mat->rowvalues); 1123 mat->rowindices = (int *) (mat->rowvalues + max); 1124 } 1125 1126 1127 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1128 lrow = row - rstart; 1129 1130 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1131 if (!v) {pvA = 0; pvB = 0;} 1132 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1133 ierr = MatGetRow_SeqAIJ(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1134 ierr = MatGetRow_SeqAIJ(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1135 nztot = nzA + nzB; 1136 1137 cmap = mat->garray; 1138 if (v || idx) { 1139 if (nztot) { 1140 /* Sort by increasing column numbers, assuming A and B already sorted */ 1141 int imark = -1; 1142 if (v) { 1143 *v = v_p = mat->rowvalues; 1144 for ( i=0; i<nzB; i++ ) { 1145 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1146 else break; 1147 } 1148 imark = i; 1149 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1150 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1151 } 1152 if (idx) { 1153 *idx = idx_p = mat->rowindices; 1154 if (imark > -1) { 1155 for ( i=0; i<imark; i++ ) { 1156 idx_p[i] = cmap[cworkB[i]]; 1157 } 1158 } else { 1159 for ( i=0; i<nzB; i++ ) { 1160 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1161 else break; 1162 } 1163 imark = i; 1164 } 1165 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1166 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1167 } 1168 } 1169 else {*idx = 0; *v=0;} 1170 } 1171 *nz = nztot; 1172 ierr = MatRestoreRow_SeqAIJ(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1173 ierr = MatRestoreRow_SeqAIJ(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1174 return 0; 1175 } 1176 1177 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1178 { 1179 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1180 if (aij->getrowactive == PETSC_FALSE) { 1181 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1182 } 1183 aij->getrowactive = PETSC_FALSE; 1184 return 0; 1185 } 1186 1187 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1188 { 1189 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1190 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1191 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1192 double sum = 0.0; 1193 Scalar *v; 1194 1195 if (aij->size == 1) { 1196 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1197 } else { 1198 if (type == NORM_FROBENIUS) { 1199 v = amat->a; 1200 for (i=0; i<amat->nz; i++ ) { 1201 #if defined(PETSC_COMPLEX) 1202 sum += real(conj(*v)*(*v)); v++; 1203 #else 1204 sum += (*v)*(*v); v++; 1205 #endif 1206 } 1207 v = bmat->a; 1208 for (i=0; i<bmat->nz; i++ ) { 1209 #if defined(PETSC_COMPLEX) 1210 sum += real(conj(*v)*(*v)); v++; 1211 #else 1212 sum += (*v)*(*v); v++; 1213 #endif 1214 } 1215 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1216 *norm = sqrt(*norm); 1217 } 1218 else if (type == NORM_1) { /* max column norm */ 1219 double *tmp, *tmp2; 1220 int *jj, *garray = aij->garray; 1221 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1222 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1223 PetscMemzero(tmp,aij->N*sizeof(double)); 1224 *norm = 0.0; 1225 v = amat->a; jj = amat->j; 1226 for ( j=0; j<amat->nz; j++ ) { 1227 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1228 } 1229 v = bmat->a; jj = bmat->j; 1230 for ( j=0; j<bmat->nz; j++ ) { 1231 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1232 } 1233 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1234 for ( j=0; j<aij->N; j++ ) { 1235 if (tmp2[j] > *norm) *norm = tmp2[j]; 1236 } 1237 PetscFree(tmp); PetscFree(tmp2); 1238 } 1239 else if (type == NORM_INFINITY) { /* max row norm */ 1240 double ntemp = 0.0; 1241 for ( j=0; j<amat->m; j++ ) { 1242 v = amat->a + amat->i[j] + shift; 1243 sum = 0.0; 1244 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1245 sum += PetscAbsScalar(*v); v++; 1246 } 1247 v = bmat->a + bmat->i[j] + shift; 1248 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1249 sum += PetscAbsScalar(*v); v++; 1250 } 1251 if (sum > ntemp) ntemp = sum; 1252 } 1253 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1254 } 1255 else { 1256 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1257 } 1258 } 1259 return 0; 1260 } 1261 1262 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1263 { 1264 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1265 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1266 int ierr,shift = Aloc->indexshift; 1267 Mat B; 1268 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1269 Scalar *array; 1270 1271 if (matout == PETSC_NULL && M != N) 1272 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1273 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1274 PETSC_NULL,&B); CHKERRQ(ierr); 1275 1276 /* copy over the A part */ 1277 Aloc = (Mat_SeqAIJ*) a->A->data; 1278 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1279 row = a->rstart; 1280 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1281 for ( i=0; i<m; i++ ) { 1282 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1283 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1284 } 1285 aj = Aloc->j; 1286 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1287 1288 /* copy over the B part */ 1289 Aloc = (Mat_SeqAIJ*) a->B->data; 1290 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1291 row = a->rstart; 1292 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1293 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1294 for ( i=0; i<m; i++ ) { 1295 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1296 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1297 } 1298 PetscFree(ct); 1299 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1300 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1301 if (matout != PETSC_NULL) { 1302 *matout = B; 1303 } else { 1304 /* This isn't really an in-place transpose .... but free data structures from a */ 1305 PetscFree(a->rowners); 1306 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1307 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1308 if (a->colmap) PetscFree(a->colmap); 1309 if (a->garray) PetscFree(a->garray); 1310 if (a->lvec) VecDestroy(a->lvec); 1311 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1312 PetscFree(a); 1313 PetscMemcpy(A,B,sizeof(struct _Mat)); 1314 PetscHeaderDestroy(B); 1315 } 1316 return 0; 1317 } 1318 1319 extern int MatPrintHelp_SeqAIJ(Mat); 1320 static int MatPrintHelp_MPIAIJ(Mat A) 1321 { 1322 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1323 1324 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1325 else return 0; 1326 } 1327 1328 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1329 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1330 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1331 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1332 /* -------------------------------------------------------------------*/ 1333 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1334 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1335 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1336 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1337 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1338 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1339 MatLUFactor_MPIAIJ,0, 1340 MatRelax_MPIAIJ, 1341 MatTranspose_MPIAIJ, 1342 MatGetInfo_MPIAIJ,0, 1343 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1344 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1345 0, 1346 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1347 MatGetReordering_MPIAIJ, 1348 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1349 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1350 MatILUFactorSymbolic_MPIAIJ,0, 1351 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1352 0,0,0, 1353 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1354 MatPrintHelp_MPIAIJ, 1355 MatScale_MPIAIJ}; 1356 1357 /*@C 1358 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1359 (the default parallel PETSc format). For good matrix assembly performance 1360 the user should preallocate the matrix storage by setting the parameters 1361 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1362 performance can be increased by more than a factor of 50. 1363 1364 Input Parameters: 1365 . comm - MPI communicator 1366 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1367 . n - number of local columns (or PETSC_DECIDE to have calculated 1368 if N is given) 1369 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1370 . N - number of global columns (or PETSC_DECIDE to have calculated 1371 if n is given) 1372 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1373 (same for all local rows) 1374 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1375 or null (possibly different for each row). You must leave room 1376 for the diagonal entry even if it is zero. 1377 . o_nz - number of nonzeros per row in off-diagonal portion of local 1378 submatrix (same for all local rows). 1379 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1380 submatrix or null (possibly different for each row). 1381 1382 Output Parameter: 1383 . newmat - the matrix 1384 1385 Notes: 1386 The AIJ format (also called the Yale sparse matrix format or 1387 compressed row storage), is fully compatible with standard Fortran 77 1388 storage. That is, the stored row and column indices can begin at 1389 either one (as in Fortran) or zero. See the users manual for details. 1390 1391 The user MUST specify either the local or global matrix dimensions 1392 (possibly both). 1393 1394 By default, this format uses inodes (identical nodes) when possible. 1395 We search for consecutive rows with the same nonzero structure, thereby 1396 reusing matrix information to achieve increased efficiency. 1397 1398 Options Database Keys: 1399 $ -mat_aij_no_inode - Do not use inodes 1400 $ -mat_aij_inode_limit <limit> - Set inode limit. 1401 $ (max limit=5) 1402 $ -mat_aij_oneindex - Internally use indexing starting at 1 1403 $ rather than 0. Note: When calling MatSetValues(), 1404 $ the user still MUST index entries starting at 0! 1405 1406 Storage Information: 1407 For a square global matrix we define each processor's diagonal portion 1408 to be its local rows and the corresponding columns (a square submatrix); 1409 each processor's off-diagonal portion encompasses the remainder of the 1410 local matrix (a rectangular submatrix). 1411 1412 The user can specify preallocated storage for the diagonal part of 1413 the local submatrix with either d_nz or d_nnz (not both). Set 1414 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1415 memory allocation. Likewise, specify preallocated storage for the 1416 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1417 1418 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1419 the figure below we depict these three local rows and all columns (0-11). 1420 1421 $ 0 1 2 3 4 5 6 7 8 9 10 11 1422 $ ------------------- 1423 $ row 3 | o o o d d d o o o o o o 1424 $ row 4 | o o o d d d o o o o o o 1425 $ row 5 | o o o d d d o o o o o o 1426 $ ------------------- 1427 $ 1428 1429 Thus, any entries in the d locations are stored in the d (diagonal) 1430 submatrix, and any entries in the o locations are stored in the 1431 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1432 stored simply in the MATSEQAIJ format for compressed row storage. 1433 1434 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1435 and o_nz should indicate the number of nonzeros per row in the o matrix. 1436 In general, for PDE problems in which most nonzeros are near the diagonal, 1437 one expects d_nz >> o_nz. For additional details, see the users manual 1438 chapter on matrices and the file $(PETSC_DIR)/Performance. 1439 1440 .keywords: matrix, aij, compressed row, sparse, parallel 1441 1442 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1443 @*/ 1444 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1445 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 1446 { 1447 Mat mat; 1448 Mat_MPIAIJ *a; 1449 int ierr, i,sum[2],work[2]; 1450 1451 *newmat = 0; 1452 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1453 PLogObjectCreate(mat); 1454 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1455 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1456 mat->destroy = MatDestroy_MPIAIJ; 1457 mat->view = MatView_MPIAIJ; 1458 mat->factor = 0; 1459 mat->assembled = PETSC_FALSE; 1460 1461 a->insertmode = NOT_SET_VALUES; 1462 MPI_Comm_rank(comm,&a->rank); 1463 MPI_Comm_size(comm,&a->size); 1464 1465 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1466 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1467 1468 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1469 work[0] = m; work[1] = n; 1470 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1471 if (M == PETSC_DECIDE) M = sum[0]; 1472 if (N == PETSC_DECIDE) N = sum[1]; 1473 } 1474 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1475 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1476 a->m = m; 1477 a->n = n; 1478 a->N = N; 1479 a->M = M; 1480 1481 /* build local table of row and column ownerships */ 1482 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1483 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1484 a->cowners = a->rowners + a->size + 1; 1485 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1486 a->rowners[0] = 0; 1487 for ( i=2; i<=a->size; i++ ) { 1488 a->rowners[i] += a->rowners[i-1]; 1489 } 1490 a->rstart = a->rowners[a->rank]; 1491 a->rend = a->rowners[a->rank+1]; 1492 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1493 a->cowners[0] = 0; 1494 for ( i=2; i<=a->size; i++ ) { 1495 a->cowners[i] += a->cowners[i-1]; 1496 } 1497 a->cstart = a->cowners[a->rank]; 1498 a->cend = a->cowners[a->rank+1]; 1499 1500 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1501 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1502 PLogObjectParent(mat,a->A); 1503 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1504 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1505 PLogObjectParent(mat,a->B); 1506 1507 /* build cache for off array entries formed */ 1508 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1509 a->colmap = 0; 1510 a->garray = 0; 1511 a->roworiented = 1; 1512 1513 /* stuff used for matrix vector multiply */ 1514 a->lvec = 0; 1515 a->Mvctx = 0; 1516 1517 /* stuff for MatGetRow() */ 1518 a->rowindices = 0; 1519 a->rowvalues = 0; 1520 a->getrowactive = PETSC_FALSE; 1521 1522 *newmat = mat; 1523 return 0; 1524 } 1525 1526 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1527 { 1528 Mat mat; 1529 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1530 int ierr, len,flg; 1531 1532 *newmat = 0; 1533 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1534 PLogObjectCreate(mat); 1535 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1536 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1537 mat->destroy = MatDestroy_MPIAIJ; 1538 mat->view = MatView_MPIAIJ; 1539 mat->factor = matin->factor; 1540 mat->assembled = PETSC_TRUE; 1541 1542 a->m = oldmat->m; 1543 a->n = oldmat->n; 1544 a->M = oldmat->M; 1545 a->N = oldmat->N; 1546 1547 a->rstart = oldmat->rstart; 1548 a->rend = oldmat->rend; 1549 a->cstart = oldmat->cstart; 1550 a->cend = oldmat->cend; 1551 a->size = oldmat->size; 1552 a->rank = oldmat->rank; 1553 a->insertmode = NOT_SET_VALUES; 1554 a->rowvalues = 0; 1555 a->getrowactive = PETSC_FALSE; 1556 1557 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1558 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1559 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1560 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1561 if (oldmat->colmap) { 1562 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1563 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1564 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1565 } else a->colmap = 0; 1566 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1567 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1568 PLogObjectMemory(mat,len*sizeof(int)); 1569 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1570 } else a->garray = 0; 1571 1572 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1573 PLogObjectParent(mat,a->lvec); 1574 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1575 PLogObjectParent(mat,a->Mvctx); 1576 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1577 PLogObjectParent(mat,a->A); 1578 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1579 PLogObjectParent(mat,a->B); 1580 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1581 if (flg) { 1582 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1583 } 1584 *newmat = mat; 1585 return 0; 1586 } 1587 1588 #include "sysio.h" 1589 1590 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1591 { 1592 Mat A; 1593 int i, nz, ierr, j,rstart, rend, fd; 1594 Scalar *vals,*svals; 1595 MPI_Comm comm = ((PetscObject)viewer)->comm; 1596 MPI_Status status; 1597 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1598 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1599 int tag = ((PetscObject)viewer)->tag; 1600 1601 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1602 if (!rank) { 1603 ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr); 1604 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1605 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1606 } 1607 1608 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1609 M = header[1]; N = header[2]; 1610 /* determine ownership of all rows */ 1611 m = M/size + ((M % size) > rank); 1612 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1613 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1614 rowners[0] = 0; 1615 for ( i=2; i<=size; i++ ) { 1616 rowners[i] += rowners[i-1]; 1617 } 1618 rstart = rowners[rank]; 1619 rend = rowners[rank+1]; 1620 1621 /* distribute row lengths to all processors */ 1622 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1623 offlens = ourlens + (rend-rstart); 1624 if (!rank) { 1625 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1626 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1627 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1628 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1629 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1630 PetscFree(sndcounts); 1631 } 1632 else { 1633 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1634 } 1635 1636 if (!rank) { 1637 /* calculate the number of nonzeros on each processor */ 1638 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1639 PetscMemzero(procsnz,size*sizeof(int)); 1640 for ( i=0; i<size; i++ ) { 1641 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1642 procsnz[i] += rowlengths[j]; 1643 } 1644 } 1645 PetscFree(rowlengths); 1646 1647 /* determine max buffer needed and allocate it */ 1648 maxnz = 0; 1649 for ( i=0; i<size; i++ ) { 1650 maxnz = PetscMax(maxnz,procsnz[i]); 1651 } 1652 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1653 1654 /* read in my part of the matrix column indices */ 1655 nz = procsnz[0]; 1656 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1657 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1658 1659 /* read in every one elses and ship off */ 1660 for ( i=1; i<size; i++ ) { 1661 nz = procsnz[i]; 1662 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1663 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1664 } 1665 PetscFree(cols); 1666 } 1667 else { 1668 /* determine buffer space needed for message */ 1669 nz = 0; 1670 for ( i=0; i<m; i++ ) { 1671 nz += ourlens[i]; 1672 } 1673 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1674 1675 /* receive message of column indices*/ 1676 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1677 MPI_Get_count(&status,MPI_INT,&maxnz); 1678 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1679 } 1680 1681 /* loop over local rows, determining number of off diagonal entries */ 1682 PetscMemzero(offlens,m*sizeof(int)); 1683 jj = 0; 1684 for ( i=0; i<m; i++ ) { 1685 for ( j=0; j<ourlens[i]; j++ ) { 1686 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1687 jj++; 1688 } 1689 } 1690 1691 /* create our matrix */ 1692 for ( i=0; i<m; i++ ) { 1693 ourlens[i] -= offlens[i]; 1694 } 1695 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1696 A = *newmat; 1697 MatSetOption(A,COLUMNS_SORTED); 1698 for ( i=0; i<m; i++ ) { 1699 ourlens[i] += offlens[i]; 1700 } 1701 1702 if (!rank) { 1703 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1704 1705 /* read in my part of the matrix numerical values */ 1706 nz = procsnz[0]; 1707 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1708 1709 /* insert into matrix */ 1710 jj = rstart; 1711 smycols = mycols; 1712 svals = vals; 1713 for ( i=0; i<m; i++ ) { 1714 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1715 smycols += ourlens[i]; 1716 svals += ourlens[i]; 1717 jj++; 1718 } 1719 1720 /* read in other processors and ship out */ 1721 for ( i=1; i<size; i++ ) { 1722 nz = procsnz[i]; 1723 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1724 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1725 } 1726 PetscFree(procsnz); 1727 } 1728 else { 1729 /* receive numeric values */ 1730 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1731 1732 /* receive message of values*/ 1733 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1734 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1735 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1736 1737 /* insert into matrix */ 1738 jj = rstart; 1739 smycols = mycols; 1740 svals = vals; 1741 for ( i=0; i<m; i++ ) { 1742 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1743 smycols += ourlens[i]; 1744 svals += ourlens[i]; 1745 jj++; 1746 } 1747 } 1748 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1749 1750 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1751 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1752 return 0; 1753 } 1754