1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.139 1996/04/09 18:54:19 curfman Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 col = aij->colmap[idxn[j]] + shift; 117 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118 } 119 } 120 } 121 else { 122 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123 } 124 } 125 return 0; 126 } 127 128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 MPI_Comm comm = mat->comm; 132 int size = aij->size, *owners = aij->rowners; 133 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 134 MPI_Request *send_waits,*recv_waits; 135 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 136 InsertMode addv; 137 Scalar *rvalues,*svalues; 138 139 /* make sure all processors are either in INSERTMODE or ADDMODE */ 140 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141 if (addv == (ADD_VALUES|INSERT_VALUES)) { 142 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 143 } 144 aij->insertmode = addv; /* in case this processor had no cache */ 145 146 /* first count number of contributors to each processor */ 147 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 149 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 150 for ( i=0; i<aij->stash.n; i++ ) { 151 idx = aij->stash.idx[i]; 152 for ( j=0; j<size; j++ ) { 153 if (idx >= owners[j] && idx < owners[j+1]) { 154 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 155 } 156 } 157 } 158 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 159 160 /* inform other processors of number of messages and max length*/ 161 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 162 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 163 nreceives = work[rank]; 164 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 165 nmax = work[rank]; 166 PetscFree(work); 167 168 /* post receives: 169 1) each message will consist of ordered pairs 170 (global index,value) we store the global index as a double 171 to simplify the message passing. 172 2) since we don't know how long each individual message is we 173 allocate the largest needed buffer for each receive. Potentially 174 this is a lot of wasted space. 175 176 177 This could be done better. 178 */ 179 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 180 CHKPTRQ(rvalues); 181 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 182 CHKPTRQ(recv_waits); 183 for ( i=0; i<nreceives; i++ ) { 184 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 185 comm,recv_waits+i); 186 } 187 188 /* do sends: 189 1) starts[i] gives the starting index in svalues for stuff going to 190 the ith processor 191 */ 192 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 193 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTRQ(send_waits); 195 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 196 starts[0] = 0; 197 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 PetscFree(owner); 204 starts[0] = 0; 205 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<size; i++ ) { 208 if (procs[i]) { 209 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 PetscFree(starts); PetscFree(nprocs); 214 215 /* Free cache space */ 216 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 217 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 229 { 230 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 232 MPI_Status *send_status,recv_status; 233 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234 int row,col,other_disassembled,shift = C->indexshift; 235 Scalar *values,val; 236 InsertMode addv = aij->insertmode; 237 238 /* wait on receives */ 239 while (count) { 240 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 241 /* unpack receives into our local space */ 242 values = aij->rvalues + 3*imdex*aij->rmax; 243 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 244 n = n/3; 245 for ( i=0; i<n; i++ ) { 246 row = (int) PetscReal(values[3*i]) - aij->rstart; 247 col = (int) PetscReal(values[3*i+1]); 248 val = values[3*i+2]; 249 if (col >= aij->cstart && col < aij->cend) { 250 col -= aij->cstart; 251 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 252 } 253 else { 254 if (mat->was_assembled) { 255 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256 col = aij->colmap[col] + shift; 257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 258 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259 col = (int) PetscReal(values[3*i+1]); 260 } 261 } 262 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 263 } 264 } 265 count--; 266 } 267 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 268 269 /* wait on sends */ 270 if (aij->nsends) { 271 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 272 CHKPTRQ(send_status); 273 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 274 PetscFree(send_status); 275 } 276 PetscFree(aij->send_waits); PetscFree(aij->svalues); 277 278 aij->insertmode = NOT_SET_VALUES; 279 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 280 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 281 282 /* determine if any processor has disassembled, if so we must 283 also disassemble ourselfs, in order that we may reassemble. */ 284 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285 if (mat->was_assembled && !other_disassembled) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 } 288 289 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 290 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 291 } 292 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 293 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 294 295 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 296 return 0; 297 } 298 299 static int MatZeroEntries_MPIAIJ(Mat A) 300 { 301 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 302 int ierr; 303 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 304 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 305 return 0; 306 } 307 308 /* the code does not do the diagonal entries correctly unless the 309 matrix is square and the column and row owerships are identical. 310 This is a BUG. The only way to fix it seems to be to access 311 aij->A and aij->B directly and not through the MatZeroRows() 312 routine. 313 */ 314 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 315 { 316 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 317 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 318 int *procs,*nprocs,j,found,idx,nsends,*work; 319 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 320 int *rvalues,tag = A->tag,count,base,slen,n,*source; 321 int *lens,imdex,*lrows,*values; 322 MPI_Comm comm = A->comm; 323 MPI_Request *send_waits,*recv_waits; 324 MPI_Status recv_status,*send_status; 325 IS istmp; 326 327 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 328 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 329 330 /* first count number of contributors to each processor */ 331 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 332 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 333 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 334 for ( i=0; i<N; i++ ) { 335 idx = rows[i]; 336 found = 0; 337 for ( j=0; j<size; j++ ) { 338 if (idx >= owners[j] && idx < owners[j+1]) { 339 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 340 } 341 } 342 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 343 } 344 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 345 346 /* inform other processors of number of messages and max length*/ 347 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 348 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 349 nrecvs = work[rank]; 350 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 351 nmax = work[rank]; 352 PetscFree(work); 353 354 /* post receives: */ 355 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 356 CHKPTRQ(rvalues); 357 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 358 CHKPTRQ(recv_waits); 359 for ( i=0; i<nrecvs; i++ ) { 360 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 361 } 362 363 /* do sends: 364 1) starts[i] gives the starting index in svalues for stuff going to 365 the ith processor 366 */ 367 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 368 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 369 CHKPTRQ(send_waits); 370 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 371 starts[0] = 0; 372 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 373 for ( i=0; i<N; i++ ) { 374 svalues[starts[owner[i]]++] = rows[i]; 375 } 376 ISRestoreIndices(is,&rows); 377 378 starts[0] = 0; 379 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 380 count = 0; 381 for ( i=0; i<size; i++ ) { 382 if (procs[i]) { 383 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 384 } 385 } 386 PetscFree(starts); 387 388 base = owners[rank]; 389 390 /* wait on receives */ 391 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 392 source = lens + nrecvs; 393 count = nrecvs; slen = 0; 394 while (count) { 395 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 396 /* unpack receives into our local space */ 397 MPI_Get_count(&recv_status,MPI_INT,&n); 398 source[imdex] = recv_status.MPI_SOURCE; 399 lens[imdex] = n; 400 slen += n; 401 count--; 402 } 403 PetscFree(recv_waits); 404 405 /* move the data into the send scatter */ 406 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 407 count = 0; 408 for ( i=0; i<nrecvs; i++ ) { 409 values = rvalues + i*nmax; 410 for ( j=0; j<lens[i]; j++ ) { 411 lrows[count++] = values[j] - base; 412 } 413 } 414 PetscFree(rvalues); PetscFree(lens); 415 PetscFree(owner); PetscFree(nprocs); 416 417 /* actually zap the local rows */ 418 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 419 PLogObjectParent(A,istmp); 420 PetscFree(lrows); 421 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 422 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 423 ierr = ISDestroy(istmp); CHKERRQ(ierr); 424 425 /* wait on sends */ 426 if (nsends) { 427 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 428 CHKPTRQ(send_status); 429 MPI_Waitall(nsends,send_waits,send_status); 430 PetscFree(send_status); 431 } 432 PetscFree(send_waits); PetscFree(svalues); 433 434 return 0; 435 } 436 437 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 438 { 439 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 440 int ierr; 441 442 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 443 ierr = MatMult_SeqAIJ(a->A,xx,yy); CHKERRQ(ierr); 444 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 445 ierr = MatMultAdd_SeqAIJ(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 446 return 0; 447 } 448 449 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 450 { 451 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 452 int ierr; 453 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 454 ierr = MatMultAdd_SeqAIJ(a->A,xx,yy,zz); CHKERRQ(ierr); 455 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 456 ierr = MatMultAdd_SeqAIJ(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 457 return 0; 458 } 459 460 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 461 { 462 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 463 int ierr; 464 465 /* do nondiagonal part */ 466 ierr = MatMultTrans_SeqAIJ(a->B,xx,a->lvec); CHKERRQ(ierr); 467 /* send it on its way */ 468 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 469 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 470 /* do local part */ 471 ierr = MatMultTrans_SeqAIJ(a->A,xx,yy); CHKERRQ(ierr); 472 /* receive remote parts: note this assumes the values are not actually */ 473 /* inserted in yy until the next line, which is true for my implementation*/ 474 /* but is not perhaps always true. */ 475 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 476 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 477 return 0; 478 } 479 480 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 483 int ierr; 484 485 /* do nondiagonal part */ 486 ierr = MatMultTrans_SeqAIJ(a->B,xx,a->lvec); CHKERRQ(ierr); 487 /* send it on its way */ 488 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 489 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 490 /* do local part */ 491 ierr = MatMultTransAdd_SeqAIJ(a->A,xx,yy,zz); CHKERRQ(ierr); 492 /* receive remote parts: note this assumes the values are not actually */ 493 /* inserted in yy until the next line, which is true for my implementation*/ 494 /* but is not perhaps always true. */ 495 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 496 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 497 return 0; 498 } 499 500 /* 501 This only works correctly for square matrices where the subblock A->A is the 502 diagonal block 503 */ 504 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 505 { 506 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 507 if (a->M != a->N) 508 SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 509 return MatGetDiagonal(a->A,v); 510 } 511 512 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 513 { 514 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 515 int ierr; 516 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 517 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 518 return 0; 519 } 520 521 static int MatDestroy_MPIAIJ(PetscObject obj) 522 { 523 Mat mat = (Mat) obj; 524 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 525 int ierr; 526 #if defined(PETSC_LOG) 527 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 528 #endif 529 PetscFree(aij->rowners); 530 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 531 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 532 if (aij->colmap) PetscFree(aij->colmap); 533 if (aij->garray) PetscFree(aij->garray); 534 if (aij->lvec) VecDestroy(aij->lvec); 535 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 536 if (aij->rowvalues) PetscFree(aij->rowvalues); 537 PetscFree(aij); 538 PLogObjectDestroy(mat); 539 PetscHeaderDestroy(mat); 540 return 0; 541 } 542 #include "draw.h" 543 #include "pinclude/pviewer.h" 544 545 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 546 { 547 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 548 int ierr; 549 550 if (aij->size == 1) { 551 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 552 } 553 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 554 return 0; 555 } 556 557 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 560 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 561 int ierr, format,shift = C->indexshift,rank; 562 FILE *fd; 563 ViewerType vtype; 564 565 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 566 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 567 ierr = ViewerGetFormat(viewer,&format); 568 if (format == ASCII_FORMAT_INFO_DETAILED) { 569 int nz, nzalloc, mem, flg; 570 MPI_Comm_rank(mat->comm,&rank); 571 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 572 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 573 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 574 PetscSequentialPhaseBegin(mat->comm,1); 575 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 576 rank,aij->m,nz,nzalloc,mem); 577 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 578 rank,aij->m,nz,nzalloc,mem); 579 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 580 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 581 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 582 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 583 fflush(fd); 584 PetscSequentialPhaseEnd(mat->comm,1); 585 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 586 return 0; 587 } 588 else if (format == ASCII_FORMAT_INFO) { 589 return 0; 590 } 591 } 592 593 if (vtype == DRAW_VIEWER) { 594 Draw draw; 595 PetscTruth isnull; 596 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 597 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 598 } 599 600 if (vtype == ASCII_FILE_VIEWER) { 601 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 602 PetscSequentialPhaseBegin(mat->comm,1); 603 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 604 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 605 aij->cend); 606 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 607 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 608 fflush(fd); 609 PetscSequentialPhaseEnd(mat->comm,1); 610 } 611 else { 612 int size = aij->size; 613 rank = aij->rank; 614 if (size == 1) { 615 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 616 } 617 else { 618 /* assemble the entire matrix onto first processor. */ 619 Mat A; 620 Mat_SeqAIJ *Aloc; 621 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 622 Scalar *a; 623 624 if (!rank) { 625 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 626 CHKERRQ(ierr); 627 } 628 else { 629 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 630 CHKERRQ(ierr); 631 } 632 PLogObjectParent(mat,A); 633 634 /* copy over the A part */ 635 Aloc = (Mat_SeqAIJ*) aij->A->data; 636 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 637 row = aij->rstart; 638 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 639 for ( i=0; i<m; i++ ) { 640 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 641 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 642 } 643 aj = Aloc->j; 644 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 645 646 /* copy over the B part */ 647 Aloc = (Mat_SeqAIJ*) aij->B->data; 648 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 649 row = aij->rstart; 650 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 651 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 652 for ( i=0; i<m; i++ ) { 653 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 654 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 655 } 656 PetscFree(ct); 657 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 658 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 659 if (!rank) { 660 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 661 } 662 ierr = MatDestroy(A); CHKERRQ(ierr); 663 } 664 } 665 return 0; 666 } 667 668 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 669 { 670 Mat mat = (Mat) obj; 671 int ierr; 672 ViewerType vtype; 673 674 if (!viewer) { 675 viewer = STDOUT_VIEWER_SELF; 676 } 677 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 678 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 679 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 680 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 681 } 682 else if (vtype == BINARY_FILE_VIEWER) { 683 return MatView_MPIAIJ_Binary(mat,viewer); 684 } 685 return 0; 686 } 687 688 /* 689 This has to provide several versions. 690 691 1) per sequential 692 2) a) use only local smoothing updating outer values only once. 693 b) local smoothing updating outer values each inner iteration 694 3) color updating out values betwen colors. 695 */ 696 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 697 double fshift,int its,Vec xx) 698 { 699 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 700 Mat AA = mat->A, BB = mat->B; 701 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 702 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 703 int ierr,*idx, *diag; 704 int n = mat->n, m = mat->m, i,shift = A->indexshift; 705 Vec tt; 706 707 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 708 xs = x + shift; /* shift by one for index start of 1 */ 709 ls = ls + shift; 710 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 711 diag = A->diag; 712 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 713 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 714 } 715 if (flag & SOR_EISENSTAT) { 716 /* Let A = L + U + D; where L is lower trianglar, 717 U is upper triangular, E is diagonal; This routine applies 718 719 (L + E)^{-1} A (U + E)^{-1} 720 721 to a vector efficiently using Eisenstat's trick. This is for 722 the case of SSOR preconditioner, so E is D/omega where omega 723 is the relaxation factor. 724 */ 725 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 726 PLogObjectParent(matin,tt); 727 VecGetArray(tt,&t); 728 scale = (2.0/omega) - 1.0; 729 /* x = (E + U)^{-1} b */ 730 VecSet(&zero,mat->lvec); 731 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 732 mat->Mvctx); CHKERRQ(ierr); 733 for ( i=m-1; i>-1; i-- ) { 734 n = A->i[i+1] - diag[i] - 1; 735 idx = A->j + diag[i] + !shift; 736 v = A->a + diag[i] + !shift; 737 sum = b[i]; 738 SPARSEDENSEMDOT(sum,xs,v,idx,n); 739 d = fshift + A->a[diag[i]+shift]; 740 n = B->i[i+1] - B->i[i]; 741 idx = B->j + B->i[i] + shift; 742 v = B->a + B->i[i] + shift; 743 SPARSEDENSEMDOT(sum,ls,v,idx,n); 744 x[i] = omega*(sum/d); 745 } 746 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 747 mat->Mvctx); CHKERRQ(ierr); 748 749 /* t = b - (2*E - D)x */ 750 v = A->a; 751 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 752 753 /* t = (E + L)^{-1}t */ 754 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 755 diag = A->diag; 756 VecSet(&zero,mat->lvec); 757 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 758 mat->Mvctx); CHKERRQ(ierr); 759 for ( i=0; i<m; i++ ) { 760 n = diag[i] - A->i[i]; 761 idx = A->j + A->i[i] + shift; 762 v = A->a + A->i[i] + shift; 763 sum = t[i]; 764 SPARSEDENSEMDOT(sum,ts,v,idx,n); 765 d = fshift + A->a[diag[i]+shift]; 766 n = B->i[i+1] - B->i[i]; 767 idx = B->j + B->i[i] + shift; 768 v = B->a + B->i[i] + shift; 769 SPARSEDENSEMDOT(sum,ls,v,idx,n); 770 t[i] = omega*(sum/d); 771 } 772 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 773 mat->Mvctx); CHKERRQ(ierr); 774 /* x = x + t */ 775 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 776 VecDestroy(tt); 777 return 0; 778 } 779 780 781 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 782 if (flag & SOR_ZERO_INITIAL_GUESS) { 783 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 784 } 785 else { 786 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 787 CHKERRQ(ierr); 788 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 789 CHKERRQ(ierr); 790 } 791 while (its--) { 792 /* go down through the rows */ 793 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 794 mat->Mvctx); CHKERRQ(ierr); 795 for ( i=0; i<m; i++ ) { 796 n = A->i[i+1] - A->i[i]; 797 idx = A->j + A->i[i] + shift; 798 v = A->a + A->i[i] + shift; 799 sum = b[i]; 800 SPARSEDENSEMDOT(sum,xs,v,idx,n); 801 d = fshift + A->a[diag[i]+shift]; 802 n = B->i[i+1] - B->i[i]; 803 idx = B->j + B->i[i] + shift; 804 v = B->a + B->i[i] + shift; 805 SPARSEDENSEMDOT(sum,ls,v,idx,n); 806 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 807 } 808 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 809 mat->Mvctx); CHKERRQ(ierr); 810 /* come up through the rows */ 811 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 812 mat->Mvctx); CHKERRQ(ierr); 813 for ( i=m-1; i>-1; i-- ) { 814 n = A->i[i+1] - A->i[i]; 815 idx = A->j + A->i[i] + shift; 816 v = A->a + A->i[i] + shift; 817 sum = b[i]; 818 SPARSEDENSEMDOT(sum,xs,v,idx,n); 819 d = fshift + A->a[diag[i]+shift]; 820 n = B->i[i+1] - B->i[i]; 821 idx = B->j + B->i[i] + shift; 822 v = B->a + B->i[i] + shift; 823 SPARSEDENSEMDOT(sum,ls,v,idx,n); 824 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 825 } 826 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 827 mat->Mvctx); CHKERRQ(ierr); 828 } 829 } 830 else if (flag & SOR_FORWARD_SWEEP){ 831 if (flag & SOR_ZERO_INITIAL_GUESS) { 832 VecSet(&zero,mat->lvec); 833 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 834 mat->Mvctx); CHKERRQ(ierr); 835 for ( i=0; i<m; i++ ) { 836 n = diag[i] - A->i[i]; 837 idx = A->j + A->i[i] + shift; 838 v = A->a + A->i[i] + shift; 839 sum = b[i]; 840 SPARSEDENSEMDOT(sum,xs,v,idx,n); 841 d = fshift + A->a[diag[i]+shift]; 842 n = B->i[i+1] - B->i[i]; 843 idx = B->j + B->i[i] + shift; 844 v = B->a + B->i[i] + shift; 845 SPARSEDENSEMDOT(sum,ls,v,idx,n); 846 x[i] = omega*(sum/d); 847 } 848 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 849 mat->Mvctx); CHKERRQ(ierr); 850 its--; 851 } 852 while (its--) { 853 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 854 CHKERRQ(ierr); 855 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 856 CHKERRQ(ierr); 857 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 858 mat->Mvctx); CHKERRQ(ierr); 859 for ( i=0; i<m; i++ ) { 860 n = A->i[i+1] - A->i[i]; 861 idx = A->j + A->i[i] + shift; 862 v = A->a + A->i[i] + shift; 863 sum = b[i]; 864 SPARSEDENSEMDOT(sum,xs,v,idx,n); 865 d = fshift + A->a[diag[i]+shift]; 866 n = B->i[i+1] - B->i[i]; 867 idx = B->j + B->i[i] + shift; 868 v = B->a + B->i[i] + shift; 869 SPARSEDENSEMDOT(sum,ls,v,idx,n); 870 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 871 } 872 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 873 mat->Mvctx); CHKERRQ(ierr); 874 } 875 } 876 else if (flag & SOR_BACKWARD_SWEEP){ 877 if (flag & SOR_ZERO_INITIAL_GUESS) { 878 VecSet(&zero,mat->lvec); 879 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 880 mat->Mvctx); CHKERRQ(ierr); 881 for ( i=m-1; i>-1; i-- ) { 882 n = A->i[i+1] - diag[i] - 1; 883 idx = A->j + diag[i] + !shift; 884 v = A->a + diag[i] + !shift; 885 sum = b[i]; 886 SPARSEDENSEMDOT(sum,xs,v,idx,n); 887 d = fshift + A->a[diag[i]+shift]; 888 n = B->i[i+1] - B->i[i]; 889 idx = B->j + B->i[i] + shift; 890 v = B->a + B->i[i] + shift; 891 SPARSEDENSEMDOT(sum,ls,v,idx,n); 892 x[i] = omega*(sum/d); 893 } 894 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 895 mat->Mvctx); CHKERRQ(ierr); 896 its--; 897 } 898 while (its--) { 899 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 900 mat->Mvctx); CHKERRQ(ierr); 901 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 902 mat->Mvctx); CHKERRQ(ierr); 903 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 904 mat->Mvctx); CHKERRQ(ierr); 905 for ( i=m-1; i>-1; i-- ) { 906 n = A->i[i+1] - A->i[i]; 907 idx = A->j + A->i[i] + shift; 908 v = A->a + A->i[i] + shift; 909 sum = b[i]; 910 SPARSEDENSEMDOT(sum,xs,v,idx,n); 911 d = fshift + A->a[diag[i]+shift]; 912 n = B->i[i+1] - B->i[i]; 913 idx = B->j + B->i[i] + shift; 914 v = B->a + B->i[i] + shift; 915 SPARSEDENSEMDOT(sum,ls,v,idx,n); 916 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 917 } 918 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 919 mat->Mvctx); CHKERRQ(ierr); 920 } 921 } 922 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 923 if (flag & SOR_ZERO_INITIAL_GUESS) { 924 return MatRelax_SeqAIJ(mat->A,bb,omega,flag,fshift,its,xx); 925 } 926 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 927 CHKERRQ(ierr); 928 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 929 CHKERRQ(ierr); 930 while (its--) { 931 /* go down through the rows */ 932 for ( i=0; i<m; i++ ) { 933 n = A->i[i+1] - A->i[i]; 934 idx = A->j + A->i[i] + shift; 935 v = A->a + A->i[i] + shift; 936 sum = b[i]; 937 SPARSEDENSEMDOT(sum,xs,v,idx,n); 938 d = fshift + A->a[diag[i]+shift]; 939 n = B->i[i+1] - B->i[i]; 940 idx = B->j + B->i[i] + shift; 941 v = B->a + B->i[i] + shift; 942 SPARSEDENSEMDOT(sum,ls,v,idx,n); 943 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 944 } 945 /* come up through the rows */ 946 for ( i=m-1; i>-1; i-- ) { 947 n = A->i[i+1] - A->i[i]; 948 idx = A->j + A->i[i] + shift; 949 v = A->a + A->i[i] + shift; 950 sum = b[i]; 951 SPARSEDENSEMDOT(sum,xs,v,idx,n); 952 d = fshift + A->a[diag[i]+shift]; 953 n = B->i[i+1] - B->i[i]; 954 idx = B->j + B->i[i] + shift; 955 v = B->a + B->i[i] + shift; 956 SPARSEDENSEMDOT(sum,ls,v,idx,n); 957 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 958 } 959 } 960 } 961 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 962 if (flag & SOR_ZERO_INITIAL_GUESS) { 963 return MatRelax_SeqAIJ(mat->A,bb,omega,flag,fshift,its,xx); 964 } 965 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 966 CHKERRQ(ierr); 967 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 968 CHKERRQ(ierr); 969 while (its--) { 970 for ( i=0; i<m; i++ ) { 971 n = A->i[i+1] - A->i[i]; 972 idx = A->j + A->i[i] + shift; 973 v = A->a + A->i[i] + shift; 974 sum = b[i]; 975 SPARSEDENSEMDOT(sum,xs,v,idx,n); 976 d = fshift + A->a[diag[i]+shift]; 977 n = B->i[i+1] - B->i[i]; 978 idx = B->j + B->i[i] + shift; 979 v = B->a + B->i[i] + shift; 980 SPARSEDENSEMDOT(sum,ls,v,idx,n); 981 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 982 } 983 } 984 } 985 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 986 if (flag & SOR_ZERO_INITIAL_GUESS) { 987 return MatRelax_SeqAIJ(mat->A,bb,omega,flag,fshift,its,xx); 988 } 989 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 990 mat->Mvctx); CHKERRQ(ierr); 991 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 992 mat->Mvctx); CHKERRQ(ierr); 993 while (its--) { 994 for ( i=m-1; i>-1; i-- ) { 995 n = A->i[i+1] - A->i[i]; 996 idx = A->j + A->i[i] + shift; 997 v = A->a + A->i[i] + shift; 998 sum = b[i]; 999 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1000 d = fshift + A->a[diag[i]+shift]; 1001 n = B->i[i+1] - B->i[i]; 1002 idx = B->j + B->i[i] + shift; 1003 v = B->a + B->i[i] + shift; 1004 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1005 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1006 } 1007 } 1008 } 1009 return 0; 1010 } 1011 1012 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1013 int *nzalloc,int *mem) 1014 { 1015 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1016 Mat A = mat->A, B = mat->B; 1017 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1018 1019 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1020 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1021 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1022 if (flag == MAT_LOCAL) { 1023 if (nz) *nz = isend[0]; 1024 if (nzalloc) *nzalloc = isend[1]; 1025 if (mem) *mem = isend[2]; 1026 } else if (flag == MAT_GLOBAL_MAX) { 1027 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1028 if (nz) *nz = irecv[0]; 1029 if (nzalloc) *nzalloc = irecv[1]; 1030 if (mem) *mem = irecv[2]; 1031 } else if (flag == MAT_GLOBAL_SUM) { 1032 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1033 if (nz) *nz = irecv[0]; 1034 if (nzalloc) *nzalloc = irecv[1]; 1035 if (mem) *mem = irecv[2]; 1036 } 1037 return 0; 1038 } 1039 1040 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1041 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1042 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1043 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1044 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1045 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1046 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1047 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1048 1049 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1050 { 1051 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1052 1053 if (op == NO_NEW_NONZERO_LOCATIONS || 1054 op == YES_NEW_NONZERO_LOCATIONS || 1055 op == COLUMNS_SORTED || 1056 op == ROW_ORIENTED) { 1057 MatSetOption(a->A,op); 1058 MatSetOption(a->B,op); 1059 } 1060 else if (op == ROWS_SORTED || 1061 op == SYMMETRIC_MATRIX || 1062 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1063 op == YES_NEW_DIAGONALS) 1064 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1065 else if (op == COLUMN_ORIENTED) { 1066 a->roworiented = 0; 1067 MatSetOption(a->A,op); 1068 MatSetOption(a->B,op); 1069 } 1070 else if (op == NO_NEW_DIAGONALS) 1071 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1072 else 1073 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1074 return 0; 1075 } 1076 1077 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1078 { 1079 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1080 *m = mat->M; *n = mat->N; 1081 return 0; 1082 } 1083 1084 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1085 { 1086 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1087 *m = mat->m; *n = mat->N; 1088 return 0; 1089 } 1090 1091 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1092 { 1093 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1094 *m = mat->rstart; *n = mat->rend; 1095 return 0; 1096 } 1097 1098 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1099 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1100 1101 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1102 { 1103 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1104 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1105 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1106 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1107 int *cmap, *idx_p; 1108 1109 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 1110 mat->getrowactive = PETSC_TRUE; 1111 1112 if (!mat->rowvalues && (idx || v)) { 1113 /* 1114 allocate enough space to hold information from the longest row. 1115 */ 1116 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1117 int max = 1,n = mat->n,tmp; 1118 for ( i=0; i<n; i++ ) { 1119 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1120 if (max < tmp) { max = tmp; } 1121 } 1122 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1123 CHKPTRQ(mat->rowvalues); 1124 mat->rowindices = (int *) (mat->rowvalues + max); 1125 } 1126 1127 1128 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1129 lrow = row - rstart; 1130 1131 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1132 if (!v) {pvA = 0; pvB = 0;} 1133 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1134 ierr = MatGetRow_SeqAIJ(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1135 ierr = MatGetRow_SeqAIJ(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1136 nztot = nzA + nzB; 1137 1138 cmap = mat->garray; 1139 if (v || idx) { 1140 if (nztot) { 1141 /* Sort by increasing column numbers, assuming A and B already sorted */ 1142 int imark = -1; 1143 if (v) { 1144 *v = v_p = mat->rowvalues; 1145 for ( i=0; i<nzB; i++ ) { 1146 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1147 else break; 1148 } 1149 imark = i; 1150 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1151 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1152 } 1153 if (idx) { 1154 *idx = idx_p = mat->rowindices; 1155 if (imark > -1) { 1156 for ( i=0; i<imark; i++ ) { 1157 idx_p[i] = cmap[cworkB[i]]; 1158 } 1159 } else { 1160 for ( i=0; i<nzB; i++ ) { 1161 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1162 else break; 1163 } 1164 imark = i; 1165 } 1166 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1167 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1168 } 1169 } 1170 else {*idx = 0; *v=0;} 1171 } 1172 *nz = nztot; 1173 ierr = MatRestoreRow_SeqAIJ(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1174 ierr = MatRestoreRow_SeqAIJ(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1175 return 0; 1176 } 1177 1178 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1179 { 1180 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1181 if (aij->getrowactive == PETSC_FALSE) { 1182 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1183 } 1184 aij->getrowactive = PETSC_FALSE; 1185 return 0; 1186 } 1187 1188 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1189 { 1190 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1191 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1192 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1193 double sum = 0.0; 1194 Scalar *v; 1195 1196 if (aij->size == 1) { 1197 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1198 } else { 1199 if (type == NORM_FROBENIUS) { 1200 v = amat->a; 1201 for (i=0; i<amat->nz; i++ ) { 1202 #if defined(PETSC_COMPLEX) 1203 sum += real(conj(*v)*(*v)); v++; 1204 #else 1205 sum += (*v)*(*v); v++; 1206 #endif 1207 } 1208 v = bmat->a; 1209 for (i=0; i<bmat->nz; i++ ) { 1210 #if defined(PETSC_COMPLEX) 1211 sum += real(conj(*v)*(*v)); v++; 1212 #else 1213 sum += (*v)*(*v); v++; 1214 #endif 1215 } 1216 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1217 *norm = sqrt(*norm); 1218 } 1219 else if (type == NORM_1) { /* max column norm */ 1220 double *tmp, *tmp2; 1221 int *jj, *garray = aij->garray; 1222 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1223 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1224 PetscMemzero(tmp,aij->N*sizeof(double)); 1225 *norm = 0.0; 1226 v = amat->a; jj = amat->j; 1227 for ( j=0; j<amat->nz; j++ ) { 1228 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1229 } 1230 v = bmat->a; jj = bmat->j; 1231 for ( j=0; j<bmat->nz; j++ ) { 1232 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1233 } 1234 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1235 for ( j=0; j<aij->N; j++ ) { 1236 if (tmp2[j] > *norm) *norm = tmp2[j]; 1237 } 1238 PetscFree(tmp); PetscFree(tmp2); 1239 } 1240 else if (type == NORM_INFINITY) { /* max row norm */ 1241 double ntemp = 0.0; 1242 for ( j=0; j<amat->m; j++ ) { 1243 v = amat->a + amat->i[j] + shift; 1244 sum = 0.0; 1245 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1246 sum += PetscAbsScalar(*v); v++; 1247 } 1248 v = bmat->a + bmat->i[j] + shift; 1249 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1250 sum += PetscAbsScalar(*v); v++; 1251 } 1252 if (sum > ntemp) ntemp = sum; 1253 } 1254 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1255 } 1256 else { 1257 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1258 } 1259 } 1260 return 0; 1261 } 1262 1263 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1264 { 1265 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1266 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1267 int ierr,shift = Aloc->indexshift; 1268 Mat B; 1269 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1270 Scalar *array; 1271 1272 if (matout == PETSC_NULL && M != N) 1273 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1274 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1275 PETSC_NULL,&B); CHKERRQ(ierr); 1276 1277 /* copy over the A part */ 1278 Aloc = (Mat_SeqAIJ*) a->A->data; 1279 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1280 row = a->rstart; 1281 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1282 for ( i=0; i<m; i++ ) { 1283 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1284 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1285 } 1286 aj = Aloc->j; 1287 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1288 1289 /* copy over the B part */ 1290 Aloc = (Mat_SeqAIJ*) a->B->data; 1291 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1292 row = a->rstart; 1293 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1294 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1295 for ( i=0; i<m; i++ ) { 1296 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1297 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1298 } 1299 PetscFree(ct); 1300 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1301 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1302 if (matout != PETSC_NULL) { 1303 *matout = B; 1304 } else { 1305 /* This isn't really an in-place transpose .... but free data structures from a */ 1306 PetscFree(a->rowners); 1307 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1308 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1309 if (a->colmap) PetscFree(a->colmap); 1310 if (a->garray) PetscFree(a->garray); 1311 if (a->lvec) VecDestroy(a->lvec); 1312 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1313 PetscFree(a); 1314 PetscMemcpy(A,B,sizeof(struct _Mat)); 1315 PetscHeaderDestroy(B); 1316 } 1317 return 0; 1318 } 1319 1320 extern int MatPrintHelp_SeqAIJ(Mat); 1321 static int MatPrintHelp_MPIAIJ(Mat A) 1322 { 1323 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1324 1325 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1326 else return 0; 1327 } 1328 1329 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1330 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1331 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1332 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1333 /* -------------------------------------------------------------------*/ 1334 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1335 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1336 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1337 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1338 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1339 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1340 MatLUFactor_MPIAIJ,0, 1341 MatRelax_MPIAIJ, 1342 MatTranspose_MPIAIJ, 1343 MatGetInfo_MPIAIJ,0, 1344 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1345 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1346 0, 1347 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1348 MatGetReordering_MPIAIJ, 1349 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1350 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1351 MatILUFactorSymbolic_MPIAIJ,0, 1352 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1353 0,0,0, 1354 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1355 MatPrintHelp_MPIAIJ, 1356 MatScale_MPIAIJ}; 1357 1358 /*@C 1359 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1360 (the default parallel PETSc format). For good matrix assembly performance 1361 the user should preallocate the matrix storage by setting the parameters 1362 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1363 performance can be increased by more than a factor of 50. 1364 1365 Input Parameters: 1366 . comm - MPI communicator 1367 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1368 . n - number of local columns (or PETSC_DECIDE to have calculated 1369 if N is given) 1370 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1371 . N - number of global columns (or PETSC_DECIDE to have calculated 1372 if n is given) 1373 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1374 (same for all local rows) 1375 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1376 or null (possibly different for each row). You must leave room 1377 for the diagonal entry even if it is zero. 1378 . o_nz - number of nonzeros per row in off-diagonal portion of local 1379 submatrix (same for all local rows). 1380 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1381 submatrix or null (possibly different for each row). 1382 1383 Output Parameter: 1384 . A - the matrix 1385 1386 Notes: 1387 The AIJ format (also called the Yale sparse matrix format or 1388 compressed row storage), is fully compatible with standard Fortran 77 1389 storage. That is, the stored row and column indices can begin at 1390 either one (as in Fortran) or zero. See the users manual for details. 1391 1392 The user MUST specify either the local or global matrix dimensions 1393 (possibly both). 1394 1395 By default, this format uses inodes (identical nodes) when possible. 1396 We search for consecutive rows with the same nonzero structure, thereby 1397 reusing matrix information to achieve increased efficiency. 1398 1399 Options Database Keys: 1400 $ -mat_aij_no_inode - Do not use inodes 1401 $ -mat_aij_inode_limit <limit> - Set inode limit. 1402 $ (max limit=5) 1403 $ -mat_aij_oneindex - Internally use indexing starting at 1 1404 $ rather than 0. Note: When calling MatSetValues(), 1405 $ the user still MUST index entries starting at 0! 1406 1407 Storage Information: 1408 For a square global matrix we define each processor's diagonal portion 1409 to be its local rows and the corresponding columns (a square submatrix); 1410 each processor's off-diagonal portion encompasses the remainder of the 1411 local matrix (a rectangular submatrix). 1412 1413 The user can specify preallocated storage for the diagonal part of 1414 the local submatrix with either d_nz or d_nnz (not both). Set 1415 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1416 memory allocation. Likewise, specify preallocated storage for the 1417 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1418 1419 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1420 the figure below we depict these three local rows and all columns (0-11). 1421 1422 $ 0 1 2 3 4 5 6 7 8 9 10 11 1423 $ ------------------- 1424 $ row 3 | o o o d d d o o o o o o 1425 $ row 4 | o o o d d d o o o o o o 1426 $ row 5 | o o o d d d o o o o o o 1427 $ ------------------- 1428 $ 1429 1430 Thus, any entries in the d locations are stored in the d (diagonal) 1431 submatrix, and any entries in the o locations are stored in the 1432 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1433 stored simply in the MATSEQAIJ format for compressed row storage. 1434 1435 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1436 and o_nz should indicate the number of nonzeros per row in the o matrix. 1437 In general, for PDE problems in which most nonzeros are near the diagonal, 1438 one expects d_nz >> o_nz. For additional details, see the users manual 1439 chapter on matrices and the file $(PETSC_DIR)/Performance. 1440 1441 .keywords: matrix, aij, compressed row, sparse, parallel 1442 1443 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1444 @*/ 1445 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1446 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1447 { 1448 Mat B; 1449 Mat_MPIAIJ *b; 1450 int ierr, i,sum[2],work[2]; 1451 1452 *A = 0; 1453 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1454 PLogObjectCreate(B); 1455 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1456 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1457 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1458 B->destroy = MatDestroy_MPIAIJ; 1459 B->view = MatView_MPIAIJ; 1460 B->factor = 0; 1461 B->assembled = PETSC_FALSE; 1462 1463 b->insertmode = NOT_SET_VALUES; 1464 MPI_Comm_rank(comm,&b->rank); 1465 MPI_Comm_size(comm,&b->size); 1466 1467 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1468 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1469 1470 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1471 work[0] = m; work[1] = n; 1472 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1473 if (M == PETSC_DECIDE) M = sum[0]; 1474 if (N == PETSC_DECIDE) N = sum[1]; 1475 } 1476 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1477 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1478 b->m = m; B->m = m; 1479 b->n = n; B->n = n; 1480 b->N = N; B->N = N; 1481 b->M = M; B->M = M; 1482 1483 /* build local table of row and column ownerships */ 1484 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1485 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1486 b->cowners = b->rowners + b->size + 1; 1487 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1488 b->rowners[0] = 0; 1489 for ( i=2; i<=b->size; i++ ) { 1490 b->rowners[i] += b->rowners[i-1]; 1491 } 1492 b->rstart = b->rowners[b->rank]; 1493 b->rend = b->rowners[b->rank+1]; 1494 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1495 b->cowners[0] = 0; 1496 for ( i=2; i<=b->size; i++ ) { 1497 b->cowners[i] += b->cowners[i-1]; 1498 } 1499 b->cstart = b->cowners[b->rank]; 1500 b->cend = b->cowners[b->rank+1]; 1501 1502 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1503 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1504 PLogObjectParent(B,b->A); 1505 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1506 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1507 PLogObjectParent(B,b->B); 1508 1509 /* build cache for off array entries formed */ 1510 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1511 b->colmap = 0; 1512 b->garray = 0; 1513 b->roworiented = 1; 1514 1515 /* stuff used for matrix vector multiply */ 1516 b->lvec = 0; 1517 b->Mvctx = 0; 1518 1519 /* stuff for MatGetRow() */ 1520 b->rowindices = 0; 1521 b->rowvalues = 0; 1522 b->getrowactive = PETSC_FALSE; 1523 1524 *A = B; 1525 return 0; 1526 } 1527 1528 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1529 { 1530 Mat mat; 1531 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1532 int ierr, len=0, flg; 1533 1534 *newmat = 0; 1535 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1536 PLogObjectCreate(mat); 1537 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1538 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1539 mat->destroy = MatDestroy_MPIAIJ; 1540 mat->view = MatView_MPIAIJ; 1541 mat->factor = matin->factor; 1542 mat->assembled = PETSC_TRUE; 1543 1544 a->m = mat->m = oldmat->m; 1545 a->n = mat->n = oldmat->n; 1546 a->M = mat->M = oldmat->M; 1547 a->N = mat->N = oldmat->N; 1548 1549 a->rstart = oldmat->rstart; 1550 a->rend = oldmat->rend; 1551 a->cstart = oldmat->cstart; 1552 a->cend = oldmat->cend; 1553 a->size = oldmat->size; 1554 a->rank = oldmat->rank; 1555 a->insertmode = NOT_SET_VALUES; 1556 a->rowvalues = 0; 1557 a->getrowactive = PETSC_FALSE; 1558 1559 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1560 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1561 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1562 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1563 if (oldmat->colmap) { 1564 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1565 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1566 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1567 } else a->colmap = 0; 1568 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1569 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1570 PLogObjectMemory(mat,len*sizeof(int)); 1571 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1572 } else a->garray = 0; 1573 1574 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1575 PLogObjectParent(mat,a->lvec); 1576 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1577 PLogObjectParent(mat,a->Mvctx); 1578 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1579 PLogObjectParent(mat,a->A); 1580 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1581 PLogObjectParent(mat,a->B); 1582 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1583 if (flg) { 1584 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1585 } 1586 *newmat = mat; 1587 return 0; 1588 } 1589 1590 #include "sys.h" 1591 1592 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1593 { 1594 Mat A; 1595 int i, nz, ierr, j,rstart, rend, fd; 1596 Scalar *vals,*svals; 1597 MPI_Comm comm = ((PetscObject)viewer)->comm; 1598 MPI_Status status; 1599 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1600 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1601 int tag = ((PetscObject)viewer)->tag; 1602 1603 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1604 if (!rank) { 1605 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1606 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1607 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1608 } 1609 1610 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1611 M = header[1]; N = header[2]; 1612 /* determine ownership of all rows */ 1613 m = M/size + ((M % size) > rank); 1614 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1615 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1616 rowners[0] = 0; 1617 for ( i=2; i<=size; i++ ) { 1618 rowners[i] += rowners[i-1]; 1619 } 1620 rstart = rowners[rank]; 1621 rend = rowners[rank+1]; 1622 1623 /* distribute row lengths to all processors */ 1624 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1625 offlens = ourlens + (rend-rstart); 1626 if (!rank) { 1627 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1628 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1629 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1630 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1631 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1632 PetscFree(sndcounts); 1633 } 1634 else { 1635 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1636 } 1637 1638 if (!rank) { 1639 /* calculate the number of nonzeros on each processor */ 1640 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1641 PetscMemzero(procsnz,size*sizeof(int)); 1642 for ( i=0; i<size; i++ ) { 1643 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1644 procsnz[i] += rowlengths[j]; 1645 } 1646 } 1647 PetscFree(rowlengths); 1648 1649 /* determine max buffer needed and allocate it */ 1650 maxnz = 0; 1651 for ( i=0; i<size; i++ ) { 1652 maxnz = PetscMax(maxnz,procsnz[i]); 1653 } 1654 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1655 1656 /* read in my part of the matrix column indices */ 1657 nz = procsnz[0]; 1658 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1659 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1660 1661 /* read in every one elses and ship off */ 1662 for ( i=1; i<size; i++ ) { 1663 nz = procsnz[i]; 1664 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1665 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1666 } 1667 PetscFree(cols); 1668 } 1669 else { 1670 /* determine buffer space needed for message */ 1671 nz = 0; 1672 for ( i=0; i<m; i++ ) { 1673 nz += ourlens[i]; 1674 } 1675 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1676 1677 /* receive message of column indices*/ 1678 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1679 MPI_Get_count(&status,MPI_INT,&maxnz); 1680 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1681 } 1682 1683 /* loop over local rows, determining number of off diagonal entries */ 1684 PetscMemzero(offlens,m*sizeof(int)); 1685 jj = 0; 1686 for ( i=0; i<m; i++ ) { 1687 for ( j=0; j<ourlens[i]; j++ ) { 1688 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1689 jj++; 1690 } 1691 } 1692 1693 /* create our matrix */ 1694 for ( i=0; i<m; i++ ) { 1695 ourlens[i] -= offlens[i]; 1696 } 1697 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1698 A = *newmat; 1699 MatSetOption(A,COLUMNS_SORTED); 1700 for ( i=0; i<m; i++ ) { 1701 ourlens[i] += offlens[i]; 1702 } 1703 1704 if (!rank) { 1705 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1706 1707 /* read in my part of the matrix numerical values */ 1708 nz = procsnz[0]; 1709 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1710 1711 /* insert into matrix */ 1712 jj = rstart; 1713 smycols = mycols; 1714 svals = vals; 1715 for ( i=0; i<m; i++ ) { 1716 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1717 smycols += ourlens[i]; 1718 svals += ourlens[i]; 1719 jj++; 1720 } 1721 1722 /* read in other processors and ship out */ 1723 for ( i=1; i<size; i++ ) { 1724 nz = procsnz[i]; 1725 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1726 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1727 } 1728 PetscFree(procsnz); 1729 } 1730 else { 1731 /* receive numeric values */ 1732 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1733 1734 /* receive message of values*/ 1735 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1736 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1737 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1738 1739 /* insert into matrix */ 1740 jj = rstart; 1741 smycols = mycols; 1742 svals = vals; 1743 for ( i=0; i<m; i++ ) { 1744 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1745 smycols += ourlens[i]; 1746 svals += ourlens[i]; 1747 jj++; 1748 } 1749 } 1750 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1751 1752 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1753 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1754 return 0; 1755 } 1756