1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.70 1995/08/24 22:28:22 bsmith Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_AIJ *B = (Mat_AIJ*) aij->B->data; 18 int n = B->n,i; 19 aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 20 PLogObjectMemory(mat,aij->N*sizeof(int)); 21 PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int)); 22 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 23 return 0; 24 } 25 26 extern int DisAssemble_MPIAIJ(Mat); 27 28 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 29 { 30 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 31 int ierr; 32 if (aij->numtids == 1) { 33 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 34 } else 35 SETERRQ(1,"MatGetReordering_MPIAIJ: not yet supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 40 int *idxn,Scalar *v,InsertMode addv) 41 { 42 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 43 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 44 int cstart = aij->cstart, cend = aij->cend,row,col; 45 46 if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) { 47 SETERRQ(1,"MatSetValues_MPIAIJ:You cannot mix inserts and adds"); 48 } 49 aij->insertmode = addv; 50 for ( i=0; i<m; i++ ) { 51 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 52 if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 53 if (idxm[i] >= rstart && idxm[i] < rend) { 54 row = idxm[i] - rstart; 55 for ( j=0; j<n; j++ ) { 56 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 57 if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 58 if (idxn[j] >= cstart && idxn[j] < cend){ 59 col = idxn[j] - cstart; 60 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 61 } 62 else { 63 if (aij->assembled) { 64 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 65 col = aij->colmap[idxn[j]] - 1; 66 if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 67 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 68 col = idxn[j]; 69 } 70 } 71 else col = idxn[j]; 72 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 73 } 74 } 75 } 76 else { 77 ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv); 78 CHKERRQ(ierr); 79 } 80 } 81 return 0; 82 } 83 84 /* 85 the assembly code is a lot like the code for vectors, we should 86 sometime derive a single assembly code that can be used for 87 either case. 88 */ 89 90 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 91 { 92 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 93 MPI_Comm comm = mat->comm; 94 int numtids = aij->numtids, *owners = aij->rowners; 95 int mytid = aij->mytid; 96 MPI_Request *send_waits,*recv_waits; 97 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 98 int tag = mat->tag, *owner,*starts,count,ierr; 99 InsertMode addv; 100 Scalar *rvalues,*svalues; 101 102 /* make sure all processors are either in INSERTMODE or ADDMODE */ 103 MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 104 MPI_BOR,comm); 105 if (addv == (ADDVALUES|INSERTVALUES)) { 106 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 107 } 108 aij->insertmode = addv; /* in case this processor had no cache */ 109 110 /* first count number of contributors to each processor */ 111 nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 112 PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 113 owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 114 for ( i=0; i<aij->stash.n; i++ ) { 115 idx = aij->stash.idx[i]; 116 for ( j=0; j<numtids; j++ ) { 117 if (idx >= owners[j] && idx < owners[j+1]) { 118 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 119 } 120 } 121 } 122 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 123 124 /* inform other processors of number of messages and max length*/ 125 work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 126 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 127 nreceives = work[mytid]; 128 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 129 nmax = work[mytid]; 130 PETSCFREE(work); 131 132 /* post receives: 133 1) each message will consist of ordered pairs 134 (global index,value) we store the global index as a double 135 to simplify the message passing. 136 2) since we don't know how long each individual message is we 137 allocate the largest needed buffer for each receive. Potentially 138 this is a lot of wasted space. 139 140 141 This could be done better. 142 */ 143 rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 144 CHKPTRQ(rvalues); 145 recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 146 CHKPTRQ(recv_waits); 147 for ( i=0; i<nreceives; i++ ) { 148 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 149 comm,recv_waits+i); 150 } 151 152 /* do sends: 153 1) starts[i] gives the starting index in svalues for stuff going to 154 the ith processor 155 */ 156 svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 157 CHKPTRQ(svalues); 158 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 159 CHKPTRQ(send_waits); 160 starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts); 161 starts[0] = 0; 162 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 163 for ( i=0; i<aij->stash.n; i++ ) { 164 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 165 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 166 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 167 } 168 PETSCFREE(owner); 169 starts[0] = 0; 170 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 171 count = 0; 172 for ( i=0; i<numtids; i++ ) { 173 if (procs[i]) { 174 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 175 comm,send_waits+count++); 176 } 177 } 178 PETSCFREE(starts); PETSCFREE(nprocs); 179 180 /* Free cache space */ 181 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 182 183 aij->svalues = svalues; aij->rvalues = rvalues; 184 aij->nsends = nsends; aij->nrecvs = nreceives; 185 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 186 aij->rmax = nmax; 187 188 return 0; 189 } 190 extern int MatSetUpMultiply_MPIAIJ(Mat); 191 192 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 193 { 194 int ierr; 195 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 196 MPI_Status *send_status,recv_status; 197 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 198 int row,col,other_disassembled; 199 Scalar *values,val; 200 InsertMode addv = aij->insertmode; 201 202 /* wait on receives */ 203 while (count) { 204 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 205 /* unpack receives into our local space */ 206 values = aij->rvalues + 3*imdex*aij->rmax; 207 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 208 n = n/3; 209 for ( i=0; i<n; i++ ) { 210 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 211 col = (int) PETSCREAL(values[3*i+1]); 212 val = values[3*i+2]; 213 if (col >= aij->cstart && col < aij->cend) { 214 col -= aij->cstart; 215 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 216 } 217 else { 218 if (aij->assembled) { 219 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 220 col = aij->colmap[col] - 1; 221 if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 222 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 223 col = (int) PETSCREAL(values[3*i+1]); 224 } 225 } 226 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 227 } 228 } 229 count--; 230 } 231 PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 232 233 /* wait on sends */ 234 if (aij->nsends) { 235 send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) ); 236 CHKPTRQ(send_status); 237 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 238 PETSCFREE(send_status); 239 } 240 PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 241 242 aij->insertmode = NOTSETVALUES; 243 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 244 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 245 246 /* determine if any processor has disassembled, if so we must 247 also disassemble ourselfs, in order that we may reassemble. */ 248 MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1, 249 MPI_INT,MPI_PROD,mat->comm); 250 if (aij->assembled && !other_disassembled) { 251 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 252 } 253 254 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 255 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 256 aij->assembled = 1; 257 } 258 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 259 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 260 261 return 0; 262 } 263 264 static int MatZeroEntries_MPIAIJ(Mat A) 265 { 266 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 267 int ierr; 268 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 269 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 270 return 0; 271 } 272 273 /* again this uses the same basic stratagy as in the assembly and 274 scatter create routines, we should try to do it systemamatically 275 if we can figure out the proper level of generality. */ 276 277 /* the code does not do the diagonal entries correctly unless the 278 matrix is square and the column and row owerships are identical. 279 This is a BUG. The only way to fix it seems to be to access 280 aij->A and aij->B directly and not through the MatZeroRows() 281 routine. 282 */ 283 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 284 { 285 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 286 int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 287 int *procs,*nprocs,j,found,idx,nsends,*work; 288 int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 289 int *rvalues,tag = A->tag,count,base,slen,n,*source; 290 int *lens,imdex,*lrows,*values; 291 MPI_Comm comm = A->comm; 292 MPI_Request *send_waits,*recv_waits; 293 MPI_Status recv_status,*send_status; 294 IS istmp; 295 296 if (!l->assembled) 297 SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix first"); 298 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 299 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 300 301 /* first count number of contributors to each processor */ 302 nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 303 PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 304 owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 305 for ( i=0; i<N; i++ ) { 306 idx = rows[i]; 307 found = 0; 308 for ( j=0; j<numtids; j++ ) { 309 if (idx >= owners[j] && idx < owners[j+1]) { 310 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 311 } 312 } 313 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 314 } 315 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 316 317 /* inform other processors of number of messages and max length*/ 318 work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 319 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 320 nrecvs = work[mytid]; 321 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 322 nmax = work[mytid]; 323 PETSCFREE(work); 324 325 /* post receives: */ 326 rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 327 CHKPTRQ(rvalues); 328 recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 329 CHKPTRQ(recv_waits); 330 for ( i=0; i<nrecvs; i++ ) { 331 MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 332 comm,recv_waits+i); 333 } 334 335 /* do sends: 336 1) starts[i] gives the starting index in svalues for stuff going to 337 the ith processor 338 */ 339 svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 340 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 341 CHKPTRQ(send_waits); 342 starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts); 343 starts[0] = 0; 344 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 345 for ( i=0; i<N; i++ ) { 346 svalues[starts[owner[i]]++] = rows[i]; 347 } 348 ISRestoreIndices(is,&rows); 349 350 starts[0] = 0; 351 for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 352 count = 0; 353 for ( i=0; i<numtids; i++ ) { 354 if (procs[i]) { 355 MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 356 comm,send_waits+count++); 357 } 358 } 359 PETSCFREE(starts); 360 361 base = owners[mytid]; 362 363 /* wait on receives */ 364 lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 365 source = lens + nrecvs; 366 count = nrecvs; slen = 0; 367 while (count) { 368 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 369 /* unpack receives into our local space */ 370 MPI_Get_count(&recv_status,MPI_INT,&n); 371 source[imdex] = recv_status.MPI_SOURCE; 372 lens[imdex] = n; 373 slen += n; 374 count--; 375 } 376 PETSCFREE(recv_waits); 377 378 /* move the data into the send scatter */ 379 lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 380 count = 0; 381 for ( i=0; i<nrecvs; i++ ) { 382 values = rvalues + i*nmax; 383 for ( j=0; j<lens[i]; j++ ) { 384 lrows[count++] = values[j] - base; 385 } 386 } 387 PETSCFREE(rvalues); PETSCFREE(lens); 388 PETSCFREE(owner); PETSCFREE(nprocs); 389 390 /* actually zap the local rows */ 391 ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp); 392 PLogObjectParent(A,istmp); 393 CHKERRQ(ierr); PETSCFREE(lrows); 394 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 395 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 396 ierr = ISDestroy(istmp); CHKERRQ(ierr); 397 398 /* wait on sends */ 399 if (nsends) { 400 send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) ); 401 CHKPTRQ(send_status); 402 MPI_Waitall(nsends,send_waits,send_status); 403 PETSCFREE(send_status); 404 } 405 PETSCFREE(send_waits); PETSCFREE(svalues); 406 407 408 return 0; 409 } 410 411 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy) 412 { 413 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 414 int ierr; 415 if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first"); 416 ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 417 CHKERRQ(ierr); 418 ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr); 419 ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 420 CHKERRQ(ierr); 421 ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr); 422 return 0; 423 } 424 425 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 426 { 427 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 428 int ierr; 429 if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first"); 430 ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 431 CHKERRQ(ierr); 432 ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 433 ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 434 CHKERRQ(ierr); 435 ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr); 436 return 0; 437 } 438 439 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy) 440 { 441 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 442 int ierr; 443 444 if (!aij->assembled) 445 SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix first"); 446 /* do nondiagonal part */ 447 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 448 /* send it on its way */ 449 ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES, 450 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 451 /* do local part */ 452 ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr); 453 /* receive remote parts: note this assumes the values are not actually */ 454 /* inserted in yy until the next line, which is true for my implementation*/ 455 /* but is not perhaps always true. */ 456 ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES, 457 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 458 return 0; 459 } 460 461 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 462 { 463 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 464 int ierr; 465 466 if (!aij->assembled) 467 SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix first"); 468 /* do nondiagonal part */ 469 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 470 /* send it on its way */ 471 ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES, 472 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 473 /* do local part */ 474 ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 475 /* receive remote parts: note this assumes the values are not actually */ 476 /* inserted in yy until the next line, which is true for my implementation*/ 477 /* but is not perhaps always true. */ 478 ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES, 479 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 480 return 0; 481 } 482 483 /* 484 This only works correctly for square matrices where the subblock A->A is the 485 diagonal block 486 */ 487 static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v) 488 { 489 Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data; 490 if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix first"); 491 return MatGetDiagonal(A->A,v); 492 } 493 494 static int MatDestroy_MPIAIJ(PetscObject obj) 495 { 496 Mat mat = (Mat) obj; 497 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 498 int ierr; 499 #if defined(PETSC_LOG) 500 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 501 #endif 502 PETSCFREE(aij->rowners); 503 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 504 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 505 if (aij->colmap) PETSCFREE(aij->colmap); 506 if (aij->garray) PETSCFREE(aij->garray); 507 if (aij->lvec) VecDestroy(aij->lvec); 508 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 509 PETSCFREE(aij); 510 PLogObjectDestroy(mat); 511 PETSCHEADERDESTROY(mat); 512 return 0; 513 } 514 #include "draw.h" 515 #include "pinclude/pviewer.h" 516 517 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 518 { 519 Mat mat = (Mat) obj; 520 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 521 int ierr, format; 522 PetscObject vobj = (PetscObject) viewer; 523 524 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix first"); 525 if (!viewer) { /* so that viewers may be used from debuggers */ 526 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 527 } 528 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 529 format = ViewerFileGetFormat_Private(viewer); 530 if (vobj->cookie == VIEWER_COOKIE && format == FILE_FORMAT_INFO && 531 (vobj->type == FILE_VIEWER || vobj->type == FILES_VIEWER)) { 532 /* do nothing for now */ 533 } 534 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) { 535 FILE *fd = ViewerFileGetPointer_Private(viewer); 536 MPIU_Seq_begin(mat->comm,1); 537 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 538 aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 539 aij->cend); 540 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 541 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 542 fflush(fd); 543 MPIU_Seq_end(mat->comm,1); 544 } 545 else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) || 546 vobj->cookie == DRAW_COOKIE) { 547 int numtids = aij->numtids, mytid = aij->mytid; 548 if (numtids == 1) { 549 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 550 } 551 else { 552 /* assemble the entire matrix onto first processor. */ 553 Mat A; 554 Mat_AIJ *Aloc; 555 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 556 Scalar *a; 557 558 if (!mytid) { 559 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A); 560 } 561 else { 562 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A); 563 } 564 PLogObjectParent(mat,A); 565 CHKERRQ(ierr); 566 567 /* copy over the A part */ 568 Aloc = (Mat_AIJ*) aij->A->data; 569 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 570 row = aij->rstart; 571 for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;} 572 for ( i=0; i<m; i++ ) { 573 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES); 574 CHKERRQ(ierr); 575 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 576 } 577 aj = Aloc->j; 578 for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;} 579 580 /* copy over the B part */ 581 Aloc = (Mat_AIJ*) aij->B->data; 582 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 583 row = aij->rstart; 584 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 585 for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];} 586 for ( i=0; i<m; i++ ) { 587 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES); 588 CHKERRQ(ierr); 589 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 590 } 591 PETSCFREE(ct); 592 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 593 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 594 if (!mytid) { 595 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 596 } 597 ierr = MatDestroy(A); CHKERRQ(ierr); 598 } 599 } 600 return 0; 601 } 602 603 extern int MatMarkDiag_AIJ(Mat); 604 /* 605 This has to provide several versions. 606 607 1) per sequential 608 2) a) use only local smoothing updating outer values only once. 609 b) local smoothing updating outer values each inner iteration 610 3) color updating out values betwen colors. 611 */ 612 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 613 double shift,int its,Vec xx) 614 { 615 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 616 Mat AA = mat->A, BB = mat->B; 617 Mat_AIJ *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data; 618 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 619 int ierr,*idx, *diag; 620 int n = mat->n, m = mat->m, i; 621 Vec tt; 622 623 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix first"); 624 625 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 626 xs = x -1; /* shift by one for index start of 1 */ 627 ls--; 628 if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(AA))) return ierr;} 629 diag = A->diag; 630 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 631 SETERRQ(1,"MatRelax_MPIAIJ:Option not yet supported"); 632 } 633 if (flag & SOR_EISENSTAT) { 634 /* Let A = L + U + D; where L is lower trianglar, 635 U is upper triangular, E is diagonal; This routine applies 636 637 (L + E)^{-1} A (U + E)^{-1} 638 639 to a vector efficiently using Eisenstat's trick. This is for 640 the case of SSOR preconditioner, so E is D/omega where omega 641 is the relaxation factor. 642 */ 643 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 644 PLogObjectParent(matin,tt); 645 VecGetArray(tt,&t); 646 scale = (2.0/omega) - 1.0; 647 /* x = (E + U)^{-1} b */ 648 VecSet(&zero,mat->lvec); 649 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 650 mat->Mvctx); CHKERRQ(ierr); 651 for ( i=m-1; i>-1; i-- ) { 652 n = A->i[i+1] - diag[i] - 1; 653 idx = A->j + diag[i]; 654 v = A->a + diag[i]; 655 sum = b[i]; 656 SPARSEDENSEMDOT(sum,xs,v,idx,n); 657 d = shift + A->a[diag[i]-1]; 658 n = B->i[i+1] - B->i[i]; 659 idx = B->j + B->i[i] - 1; 660 v = B->a + B->i[i] - 1; 661 SPARSEDENSEMDOT(sum,ls,v,idx,n); 662 x[i] = omega*(sum/d); 663 } 664 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 665 mat->Mvctx); CHKERRQ(ierr); 666 667 /* t = b - (2*E - D)x */ 668 v = A->a; 669 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 670 671 /* t = (E + L)^{-1}t */ 672 ts = t - 1; /* shifted by one for index start of a or mat->j*/ 673 diag = A->diag; 674 VecSet(&zero,mat->lvec); 675 ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 676 mat->Mvctx); CHKERRQ(ierr); 677 for ( i=0; i<m; i++ ) { 678 n = diag[i] - A->i[i]; 679 idx = A->j + A->i[i] - 1; 680 v = A->a + A->i[i] - 1; 681 sum = t[i]; 682 SPARSEDENSEMDOT(sum,ts,v,idx,n); 683 d = shift + A->a[diag[i]-1]; 684 n = B->i[i+1] - B->i[i]; 685 idx = B->j + B->i[i] - 1; 686 v = B->a + B->i[i] - 1; 687 SPARSEDENSEMDOT(sum,ls,v,idx,n); 688 t[i] = omega*(sum/d); 689 } 690 ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 691 mat->Mvctx); CHKERRQ(ierr); 692 /* x = x + t */ 693 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 694 VecDestroy(tt); 695 return 0; 696 } 697 698 699 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 700 if (flag & SOR_ZERO_INITIAL_GUESS) { 701 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 702 } 703 else { 704 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 705 CHKERRQ(ierr); 706 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 707 CHKERRQ(ierr); 708 } 709 while (its--) { 710 /* go down through the rows */ 711 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 712 mat->Mvctx); CHKERRQ(ierr); 713 for ( i=0; i<m; i++ ) { 714 n = A->i[i+1] - A->i[i]; 715 idx = A->j + A->i[i] - 1; 716 v = A->a + A->i[i] - 1; 717 sum = b[i]; 718 SPARSEDENSEMDOT(sum,xs,v,idx,n); 719 d = shift + A->a[diag[i]-1]; 720 n = B->i[i+1] - B->i[i]; 721 idx = B->j + B->i[i] - 1; 722 v = B->a + B->i[i] - 1; 723 SPARSEDENSEMDOT(sum,ls,v,idx,n); 724 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 725 } 726 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 727 mat->Mvctx); CHKERRQ(ierr); 728 /* come up through the rows */ 729 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 730 mat->Mvctx); CHKERRQ(ierr); 731 for ( i=m-1; i>-1; i-- ) { 732 n = A->i[i+1] - A->i[i]; 733 idx = A->j + A->i[i] - 1; 734 v = A->a + A->i[i] - 1; 735 sum = b[i]; 736 SPARSEDENSEMDOT(sum,xs,v,idx,n); 737 d = shift + A->a[diag[i]-1]; 738 n = B->i[i+1] - B->i[i]; 739 idx = B->j + B->i[i] - 1; 740 v = B->a + B->i[i] - 1; 741 SPARSEDENSEMDOT(sum,ls,v,idx,n); 742 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 743 } 744 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 745 mat->Mvctx); CHKERRQ(ierr); 746 } 747 } 748 else if (flag & SOR_FORWARD_SWEEP){ 749 if (flag & SOR_ZERO_INITIAL_GUESS) { 750 VecSet(&zero,mat->lvec); 751 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 752 mat->Mvctx); CHKERRQ(ierr); 753 for ( i=0; i<m; i++ ) { 754 n = diag[i] - A->i[i]; 755 idx = A->j + A->i[i] - 1; 756 v = A->a + A->i[i] - 1; 757 sum = b[i]; 758 SPARSEDENSEMDOT(sum,xs,v,idx,n); 759 d = shift + A->a[diag[i]-1]; 760 n = B->i[i+1] - B->i[i]; 761 idx = B->j + B->i[i] - 1; 762 v = B->a + B->i[i] - 1; 763 SPARSEDENSEMDOT(sum,ls,v,idx,n); 764 x[i] = omega*(sum/d); 765 } 766 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 767 mat->Mvctx); CHKERRQ(ierr); 768 its--; 769 } 770 while (its--) { 771 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 772 CHKERRQ(ierr); 773 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 774 CHKERRQ(ierr); 775 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 776 mat->Mvctx); CHKERRQ(ierr); 777 for ( i=0; i<m; i++ ) { 778 n = A->i[i+1] - A->i[i]; 779 idx = A->j + A->i[i] - 1; 780 v = A->a + A->i[i] - 1; 781 sum = b[i]; 782 SPARSEDENSEMDOT(sum,xs,v,idx,n); 783 d = shift + A->a[diag[i]-1]; 784 n = B->i[i+1] - B->i[i]; 785 idx = B->j + B->i[i] - 1; 786 v = B->a + B->i[i] - 1; 787 SPARSEDENSEMDOT(sum,ls,v,idx,n); 788 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 789 } 790 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 791 mat->Mvctx); CHKERRQ(ierr); 792 } 793 } 794 else if (flag & SOR_BACKWARD_SWEEP){ 795 if (flag & SOR_ZERO_INITIAL_GUESS) { 796 VecSet(&zero,mat->lvec); 797 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 798 mat->Mvctx); CHKERRQ(ierr); 799 for ( i=m-1; i>-1; i-- ) { 800 n = A->i[i+1] - diag[i] - 1; 801 idx = A->j + diag[i]; 802 v = A->a + diag[i]; 803 sum = b[i]; 804 SPARSEDENSEMDOT(sum,xs,v,idx,n); 805 d = shift + A->a[diag[i]-1]; 806 n = B->i[i+1] - B->i[i]; 807 idx = B->j + B->i[i] - 1; 808 v = B->a + B->i[i] - 1; 809 SPARSEDENSEMDOT(sum,ls,v,idx,n); 810 x[i] = omega*(sum/d); 811 } 812 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 813 mat->Mvctx); CHKERRQ(ierr); 814 its--; 815 } 816 while (its--) { 817 ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 818 mat->Mvctx); CHKERRQ(ierr); 819 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 820 mat->Mvctx); CHKERRQ(ierr); 821 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 822 mat->Mvctx); CHKERRQ(ierr); 823 for ( i=m-1; i>-1; i-- ) { 824 n = A->i[i+1] - A->i[i]; 825 idx = A->j + A->i[i] - 1; 826 v = A->a + A->i[i] - 1; 827 sum = b[i]; 828 SPARSEDENSEMDOT(sum,xs,v,idx,n); 829 d = shift + A->a[diag[i]-1]; 830 n = B->i[i+1] - B->i[i]; 831 idx = B->j + B->i[i] - 1; 832 v = B->a + B->i[i] - 1; 833 SPARSEDENSEMDOT(sum,ls,v,idx,n); 834 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 835 } 836 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 837 mat->Mvctx); CHKERRQ(ierr); 838 } 839 } 840 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 841 if (flag & SOR_ZERO_INITIAL_GUESS) { 842 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 843 } 844 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 845 CHKERRQ(ierr); 846 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 847 CHKERRQ(ierr); 848 while (its--) { 849 /* go down through the rows */ 850 for ( i=0; i<m; i++ ) { 851 n = A->i[i+1] - A->i[i]; 852 idx = A->j + A->i[i] - 1; 853 v = A->a + A->i[i] - 1; 854 sum = b[i]; 855 SPARSEDENSEMDOT(sum,xs,v,idx,n); 856 d = shift + A->a[diag[i]-1]; 857 n = B->i[i+1] - B->i[i]; 858 idx = B->j + B->i[i] - 1; 859 v = B->a + B->i[i] - 1; 860 SPARSEDENSEMDOT(sum,ls,v,idx,n); 861 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 862 } 863 /* come up through the rows */ 864 for ( i=m-1; i>-1; i-- ) { 865 n = A->i[i+1] - A->i[i]; 866 idx = A->j + A->i[i] - 1; 867 v = A->a + A->i[i] - 1; 868 sum = b[i]; 869 SPARSEDENSEMDOT(sum,xs,v,idx,n); 870 d = shift + A->a[diag[i]-1]; 871 n = B->i[i+1] - B->i[i]; 872 idx = B->j + B->i[i] - 1; 873 v = B->a + B->i[i] - 1; 874 SPARSEDENSEMDOT(sum,ls,v,idx,n); 875 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 876 } 877 } 878 } 879 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 880 if (flag & SOR_ZERO_INITIAL_GUESS) { 881 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 882 } 883 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 884 CHKERRQ(ierr); 885 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 886 CHKERRQ(ierr); 887 while (its--) { 888 for ( i=0; i<m; i++ ) { 889 n = A->i[i+1] - A->i[i]; 890 idx = A->j + A->i[i] - 1; 891 v = A->a + A->i[i] - 1; 892 sum = b[i]; 893 SPARSEDENSEMDOT(sum,xs,v,idx,n); 894 d = shift + A->a[diag[i]-1]; 895 n = B->i[i+1] - B->i[i]; 896 idx = B->j + B->i[i] - 1; 897 v = B->a + B->i[i] - 1; 898 SPARSEDENSEMDOT(sum,ls,v,idx,n); 899 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 900 } 901 } 902 } 903 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 904 if (flag & SOR_ZERO_INITIAL_GUESS) { 905 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 906 } 907 ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL, 908 mat->Mvctx); CHKERRQ(ierr); 909 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL, 910 mat->Mvctx); CHKERRQ(ierr); 911 while (its--) { 912 for ( i=m-1; i>-1; i-- ) { 913 n = A->i[i+1] - A->i[i]; 914 idx = A->j + A->i[i] - 1; 915 v = A->a + A->i[i] - 1; 916 sum = b[i]; 917 SPARSEDENSEMDOT(sum,xs,v,idx,n); 918 d = shift + A->a[diag[i]-1]; 919 n = B->i[i+1] - B->i[i]; 920 idx = B->j + B->i[i] - 1; 921 v = B->a + B->i[i] - 1; 922 SPARSEDENSEMDOT(sum,ls,v,idx,n); 923 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 924 } 925 } 926 } 927 return 0; 928 } 929 930 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 931 int *nzalloc,int *mem) 932 { 933 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 934 Mat A = mat->A, B = mat->B; 935 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 936 937 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 938 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 939 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 940 if (flag == MAT_LOCAL) { 941 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 942 } else if (flag == MAT_GLOBAL_MAX) { 943 MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm); 944 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 945 } else if (flag == MAT_GLOBAL_SUM) { 946 MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm); 947 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 948 } 949 return 0; 950 } 951 952 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 953 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 954 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 955 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 956 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 957 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 958 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 959 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 960 961 static int MatSetOption_MPIAIJ(Mat aijin,MatOption op) 962 { 963 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 964 965 if (op == NO_NEW_NONZERO_LOCATIONS) { 966 MatSetOption(aij->A,op); 967 MatSetOption(aij->B,op); 968 } 969 else if (op == YES_NEW_NONZERO_LOCATIONS) { 970 MatSetOption(aij->A,op); 971 MatSetOption(aij->B,op); 972 } 973 else if (op == COLUMN_ORIENTED) 974 SETERRQ(1,"MatSetOption_MPIAIJ:Column oriented not supported"); 975 return 0; 976 } 977 978 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 979 { 980 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 981 *m = mat->M; *n = mat->N; 982 return 0; 983 } 984 985 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 986 { 987 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 988 *m = mat->m; *n = mat->N; 989 return 0; 990 } 991 992 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 993 { 994 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 995 *m = mat->rstart; *n = mat->rend; 996 return 0; 997 } 998 999 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1000 { 1001 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1002 Scalar *vworkA, *vworkB, **pvA, **pvB; 1003 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1004 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1005 1006 if (row < rstart || row >= rend) 1007 SETERRQ(1,"MatGetRow_MPIAIJ:Currently you can get only local rows") 1008 lrow = row - rstart; 1009 1010 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1011 if (!v) {pvA = 0; pvB = 0;} 1012 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1013 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1014 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1015 nztot = nzA + nzB; 1016 1017 if (v || idx) { 1018 if (nztot) { 1019 /* Sort by increasing column numbers, assuming A and B already sorted */ 1020 int imark; 1021 if (mat->assembled) { 1022 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1023 } 1024 if (v) { 1025 *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1026 for ( i=0; i<nzB; i++ ) { 1027 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1028 else break; 1029 } 1030 imark = i; 1031 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1032 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1033 } 1034 if (idx) { 1035 *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1036 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1037 for ( i=0; i<nzB; i++ ) { 1038 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1039 else break; 1040 } 1041 imark = i; 1042 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1043 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1044 } 1045 } 1046 else {*idx = 0; *v=0;} 1047 } 1048 *nz = nztot; 1049 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1050 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1051 return 0; 1052 } 1053 1054 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1055 { 1056 if (idx) PETSCFREE(*idx); 1057 if (v) PETSCFREE(*v); 1058 return 0; 1059 } 1060 1061 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm) 1062 { 1063 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1064 int ierr; 1065 if (aij->numtids == 1) { 1066 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1067 } else { 1068 SETERRQ(1,"MatNorm_MPIAIJ: no parallel norms yet."); 1069 } 1070 return 0; 1071 } 1072 1073 static int MatTranspose_MPIAIJ(Mat A,Mat *Bin) 1074 { 1075 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1076 int ierr; 1077 Mat B; 1078 Mat_AIJ *Aloc; 1079 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1080 Scalar *array; 1081 1082 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1083 CHKERRQ(ierr); 1084 1085 /* copy over the A part */ 1086 Aloc = (Mat_AIJ*) a->A->data; 1087 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1088 row = a->rstart; 1089 for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;} 1090 for ( i=0; i<m; i++ ) { 1091 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES); 1092 CHKERRQ(ierr); 1093 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1094 } 1095 aj = Aloc->j; 1096 for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;} 1097 1098 /* copy over the B part */ 1099 Aloc = (Mat_AIJ*) a->B->data; 1100 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1101 row = a->rstart; 1102 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 1103 for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];} 1104 for ( i=0; i<m; i++ ) { 1105 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES); 1106 CHKERRQ(ierr); 1107 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1108 } 1109 PETSCFREE(ct); 1110 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1111 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1112 *Bin = B; 1113 return 0; 1114 } 1115 1116 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1117 static int MatCopyPrivate_MPIAIJ(Mat,Mat *); 1118 1119 /* -------------------------------------------------------------------*/ 1120 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1121 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1122 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1123 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1124 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1125 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1126 MatLUFactor_MPIAIJ,0, 1127 MatRelax_MPIAIJ, 1128 MatTranspose_MPIAIJ, 1129 MatGetInfo_MPIAIJ,0, 1130 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1131 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1132 0, 1133 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1134 MatGetReordering_MPIAIJ, 1135 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1136 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1137 MatILUFactorSymbolic_MPIAIJ,0, 1138 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 1139 1140 /*@ 1141 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1142 (the default parallel PETSc format). 1143 1144 Input Parameters: 1145 . comm - MPI communicator 1146 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1147 . n - number of local columns (or PETSC_DECIDE to have calculated 1148 if N is given) 1149 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1150 . N - number of global columns (or PETSC_DECIDE to have calculated 1151 if n is given) 1152 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1153 (same for all local rows) 1154 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1155 or null (possibly different for each row). You must leave room 1156 for the diagonal entry even if it is zero. 1157 . o_nz - number of nonzeros per row in off-diagonal portion of local 1158 submatrix (same for all local rows). 1159 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1160 submatrix or null (possibly different for each row). 1161 1162 Output Parameter: 1163 . newmat - the matrix 1164 1165 Notes: 1166 The AIJ format (also called the Yale sparse matrix format or 1167 compressed row storage), is fully compatible with standard Fortran 77 1168 storage. That is, the stored row and column indices begin at 1169 one, not zero. See the users manual for further details. 1170 1171 The user MUST specify either the local or global matrix dimensions 1172 (possibly both). 1173 1174 Storage Information: 1175 For a square global matrix we define each processor's diagonal portion 1176 to be its local rows and the corresponding columns (a square submatrix); 1177 each processor's off-diagonal portion encompasses the remainder of the 1178 local matrix (a rectangular submatrix). 1179 1180 The user can specify preallocated storage for the diagonal part of 1181 the local submatrix with either d_nz or d_nnz (not both). Set both 1182 d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1183 Likewise, specify preallocated storage for the off-diagonal part of 1184 the local submatrix with o_nz or o_nnz (not both). 1185 1186 .keywords: matrix, aij, compressed row, sparse, parallel 1187 1188 .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues() 1189 @*/ 1190 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1191 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1192 { 1193 Mat mat; 1194 Mat_MPIAIJ *aij; 1195 int ierr, i,sum[2],work[2]; 1196 *newmat = 0; 1197 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1198 PLogObjectCreate(mat); 1199 mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1200 mat->ops = &MatOps; 1201 mat->destroy = MatDestroy_MPIAIJ; 1202 mat->view = MatView_MPIAIJ; 1203 mat->factor = 0; 1204 1205 aij->insertmode = NOTSETVALUES; 1206 MPI_Comm_rank(comm,&aij->mytid); 1207 MPI_Comm_size(comm,&aij->numtids); 1208 1209 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1210 work[0] = m; work[1] = n; 1211 MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 1212 if (M == PETSC_DECIDE) M = sum[0]; 1213 if (N == PETSC_DECIDE) N = sum[1]; 1214 } 1215 if (m == PETSC_DECIDE) 1216 {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 1217 if (n == PETSC_DECIDE) 1218 {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 1219 aij->m = m; 1220 aij->n = n; 1221 aij->N = N; 1222 aij->M = M; 1223 1224 /* build local table of row and column ownerships */ 1225 aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int)); 1226 CHKPTRQ(aij->rowners); 1227 PLogObjectMemory(mat,2*(aij->numtids+2)*sizeof(int)+sizeof(struct _Mat)+ 1228 sizeof(Mat_MPIAIJ)); 1229 aij->cowners = aij->rowners + aij->numtids + 1; 1230 MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 1231 aij->rowners[0] = 0; 1232 for ( i=2; i<=aij->numtids; i++ ) { 1233 aij->rowners[i] += aij->rowners[i-1]; 1234 } 1235 aij->rstart = aij->rowners[aij->mytid]; 1236 aij->rend = aij->rowners[aij->mytid+1]; 1237 MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 1238 aij->cowners[0] = 0; 1239 for ( i=2; i<=aij->numtids; i++ ) { 1240 aij->cowners[i] += aij->cowners[i-1]; 1241 } 1242 aij->cstart = aij->cowners[aij->mytid]; 1243 aij->cend = aij->cowners[aij->mytid+1]; 1244 1245 1246 ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A); 1247 CHKERRQ(ierr); 1248 PLogObjectParent(mat,aij->A); 1249 ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B); 1250 CHKERRQ(ierr); 1251 PLogObjectParent(mat,aij->B); 1252 1253 /* build cache for off array entries formed */ 1254 ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr); 1255 aij->colmap = 0; 1256 aij->garray = 0; 1257 1258 /* stuff used for matrix vector multiply */ 1259 aij->lvec = 0; 1260 aij->Mvctx = 0; 1261 aij->assembled = 0; 1262 1263 *newmat = mat; 1264 return 0; 1265 } 1266 1267 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat) 1268 { 1269 Mat mat; 1270 Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data; 1271 int ierr, len; 1272 *newmat = 0; 1273 1274 if (!oldmat->assembled) 1275 SETERRQ(1,"MatCopyPrivate_MPIAIJ:Cannot copy unassembled matrix"); 1276 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1277 PLogObjectCreate(mat); 1278 mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1279 mat->ops = &MatOps; 1280 mat->destroy = MatDestroy_MPIAIJ; 1281 mat->view = MatView_MPIAIJ; 1282 mat->factor = matin->factor; 1283 1284 aij->m = oldmat->m; 1285 aij->n = oldmat->n; 1286 aij->M = oldmat->M; 1287 aij->N = oldmat->N; 1288 1289 aij->assembled = 1; 1290 aij->rstart = oldmat->rstart; 1291 aij->rend = oldmat->rend; 1292 aij->cstart = oldmat->cstart; 1293 aij->cend = oldmat->cend; 1294 aij->numtids = oldmat->numtids; 1295 aij->mytid = oldmat->mytid; 1296 aij->insertmode = NOTSETVALUES; 1297 1298 aij->rowners = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) ); 1299 CHKPTRQ(aij->rowners); 1300 PLogObjectMemory(mat,(aij->numtids+1)*sizeof(int)+sizeof(struct _Mat)+ 1301 sizeof(Mat_MPIAIJ)); 1302 PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 1303 ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr); 1304 if (oldmat->colmap) { 1305 aij->colmap = (int *) PETSCMALLOC( (aij->N)*sizeof(int) ); 1306 CHKPTRQ(aij->colmap); 1307 PLogObjectMemory(mat,(aij->N)*sizeof(int)); 1308 PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int)); 1309 } else aij->colmap = 0; 1310 if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) { 1311 aij->garray = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray); 1312 PLogObjectMemory(mat,len*sizeof(int)); 1313 PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int)); 1314 } else aij->garray = 0; 1315 1316 ierr = VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr); 1317 PLogObjectParent(mat,aij->lvec); 1318 ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr); 1319 PLogObjectParent(mat,aij->Mvctx); 1320 ierr = MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr); 1321 PLogObjectParent(mat,aij->A); 1322 ierr = MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr); 1323 PLogObjectParent(mat,aij->B); 1324 *newmat = mat; 1325 return 0; 1326 } 1327