1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.87 1995/10/11 17:55:12 curfman Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscZero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->numtids == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 40 int *idxn,Scalar *v,InsertMode addv) 41 { 42 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 43 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (idxm[i] >= rstart && idxm[i] < rend) { 56 row = idxm[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (idxn[j] >= cstart && idxn[j] < cend){ 61 col = idxn[j] - cstart; 62 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 63 } 64 else { 65 if (aij->assembled) { 66 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 67 col = aij->colmap[idxn[j]] + shift; 68 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 69 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 70 col = idxn[j]; 71 } 72 } 73 else col = idxn[j]; 74 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 75 } 76 } 77 } 78 else { 79 ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr); 80 } 81 } 82 return 0; 83 } 84 85 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 86 { 87 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 88 MPI_Comm comm = mat->comm; 89 int numtids = aij->numtids, *owners = aij->rowners; 90 int mytid = aij->mytid,tag = mat->tag, *owner,*starts,count,ierr; 91 MPI_Request *send_waits,*recv_waits; 92 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 93 InsertMode addv; 94 Scalar *rvalues,*svalues; 95 96 /* make sure all processors are either in INSERTMODE or ADDMODE */ 97 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 98 if (addv == (ADD_VALUES|INSERT_VALUES)) { 99 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 100 } 101 aij->insertmode = addv; /* in case this processor had no cache */ 102 103 /* first count number of contributors to each processor */ 104 nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 105 PetscZero(nprocs,2*numtids*sizeof(int)); procs = nprocs + numtids; 106 owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 107 for ( i=0; i<aij->stash.n; i++ ) { 108 idx = aij->stash.idx[i]; 109 for ( j=0; j<numtids; j++ ) { 110 if (idx >= owners[j] && idx < owners[j+1]) { 111 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 112 } 113 } 114 } 115 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 116 117 /* inform other processors of number of messages and max length*/ 118 work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 119 MPI_Allreduce(procs, work,numtids,MPI_INT,MPI_SUM,comm); 120 nreceives = work[mytid]; 121 MPI_Allreduce( nprocs, work,numtids,MPI_INT,MPI_MAX,comm); 122 nmax = work[mytid]; 123 PETSCFREE(work); 124 125 /* post receives: 126 1) each message will consist of ordered pairs 127 (global index,value) we store the global index as a double 128 to simplify the message passing. 129 2) since we don't know how long each individual message is we 130 allocate the largest needed buffer for each receive. Potentially 131 this is a lot of wasted space. 132 133 134 This could be done better. 135 */ 136 rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 137 CHKPTRQ(rvalues); 138 recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 139 CHKPTRQ(recv_waits); 140 for ( i=0; i<nreceives; i++ ) { 141 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 142 comm,recv_waits+i); 143 } 144 145 /* do sends: 146 1) starts[i] gives the starting index in svalues for stuff going to 147 the ith processor 148 */ 149 svalues = (Scalar *) PETSCMALLOC(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 150 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 151 CHKPTRQ(send_waits); 152 starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts); 153 starts[0] = 0; 154 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 155 for ( i=0; i<aij->stash.n; i++ ) { 156 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 157 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 158 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 159 } 160 PETSCFREE(owner); 161 starts[0] = 0; 162 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 163 count = 0; 164 for ( i=0; i<numtids; i++ ) { 165 if (procs[i]) { 166 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 167 comm,send_waits+count++); 168 } 169 } 170 PETSCFREE(starts); PETSCFREE(nprocs); 171 172 /* Free cache space */ 173 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 174 175 aij->svalues = svalues; aij->rvalues = rvalues; 176 aij->nsends = nsends; aij->nrecvs = nreceives; 177 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 178 aij->rmax = nmax; 179 180 return 0; 181 } 182 extern int MatSetUpMultiply_MPIAIJ(Mat); 183 184 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 187 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 188 MPI_Status *send_status,recv_status; 189 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 190 int row,col,other_disassembled,shift = C->indexshift; 191 Scalar *values,val; 192 InsertMode addv = aij->insertmode; 193 194 /* wait on receives */ 195 while (count) { 196 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 197 /* unpack receives into our local space */ 198 values = aij->rvalues + 3*imdex*aij->rmax; 199 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 200 n = n/3; 201 for ( i=0; i<n; i++ ) { 202 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 203 col = (int) PETSCREAL(values[3*i+1]); 204 val = values[3*i+2]; 205 if (col >= aij->cstart && col < aij->cend) { 206 col -= aij->cstart; 207 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 208 } 209 else { 210 if (aij->assembled) { 211 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 212 col = aij->colmap[col] + shift; 213 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 214 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 215 col = (int) PETSCREAL(values[3*i+1]); 216 } 217 } 218 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 219 } 220 } 221 count--; 222 } 223 PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 224 225 /* wait on sends */ 226 if (aij->nsends) { 227 send_status = (MPI_Status *) PETSCMALLOC(aij->nsends*sizeof(MPI_Status)); 228 CHKPTRQ(send_status); 229 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 230 PETSCFREE(send_status); 231 } 232 PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 233 234 aij->insertmode = NOT_SET_VALUES; 235 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 236 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 237 238 /* determine if any processor has disassembled, if so we must 239 also disassemble ourselfs, in order that we may reassemble. */ 240 MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 241 if (aij->assembled && !other_disassembled) { 242 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 243 } 244 245 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 246 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 247 aij->assembled = 1; 248 } 249 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 250 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 251 252 return 0; 253 } 254 255 static int MatZeroEntries_MPIAIJ(Mat A) 256 { 257 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 258 int ierr; 259 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 260 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 261 return 0; 262 } 263 264 /* the code does not do the diagonal entries correctly unless the 265 matrix is square and the column and row owerships are identical. 266 This is a BUG. The only way to fix it seems to be to access 267 aij->A and aij->B directly and not through the MatZeroRows() 268 routine. 269 */ 270 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 271 { 272 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 273 int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 274 int *procs,*nprocs,j,found,idx,nsends,*work; 275 int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 276 int *rvalues,tag = A->tag,count,base,slen,n,*source; 277 int *lens,imdex,*lrows,*values; 278 MPI_Comm comm = A->comm; 279 MPI_Request *send_waits,*recv_waits; 280 MPI_Status recv_status,*send_status; 281 IS istmp; 282 283 if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix"); 284 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 285 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 286 287 /* first count number of contributors to each processor */ 288 nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 289 PetscZero(nprocs,2*numtids*sizeof(int)); procs = nprocs + numtids; 290 owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 291 for ( i=0; i<N; i++ ) { 292 idx = rows[i]; 293 found = 0; 294 for ( j=0; j<numtids; j++ ) { 295 if (idx >= owners[j] && idx < owners[j+1]) { 296 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 297 } 298 } 299 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 300 } 301 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 302 303 /* inform other processors of number of messages and max length*/ 304 work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 305 MPI_Allreduce( procs, work,numtids,MPI_INT,MPI_SUM,comm); 306 nrecvs = work[mytid]; 307 MPI_Allreduce( nprocs, work,numtids,MPI_INT,MPI_MAX,comm); 308 nmax = work[mytid]; 309 PETSCFREE(work); 310 311 /* post receives: */ 312 rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 313 CHKPTRQ(rvalues); 314 recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 315 CHKPTRQ(recv_waits); 316 for ( i=0; i<nrecvs; i++ ) { 317 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 318 } 319 320 /* do sends: 321 1) starts[i] gives the starting index in svalues for stuff going to 322 the ith processor 323 */ 324 svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 325 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 326 CHKPTRQ(send_waits); 327 starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts); 328 starts[0] = 0; 329 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 330 for ( i=0; i<N; i++ ) { 331 svalues[starts[owner[i]]++] = rows[i]; 332 } 333 ISRestoreIndices(is,&rows); 334 335 starts[0] = 0; 336 for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 337 count = 0; 338 for ( i=0; i<numtids; i++ ) { 339 if (procs[i]) { 340 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 341 } 342 } 343 PETSCFREE(starts); 344 345 base = owners[mytid]; 346 347 /* wait on receives */ 348 lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 349 source = lens + nrecvs; 350 count = nrecvs; slen = 0; 351 while (count) { 352 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 353 /* unpack receives into our local space */ 354 MPI_Get_count(&recv_status,MPI_INT,&n); 355 source[imdex] = recv_status.MPI_SOURCE; 356 lens[imdex] = n; 357 slen += n; 358 count--; 359 } 360 PETSCFREE(recv_waits); 361 362 /* move the data into the send scatter */ 363 lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 364 count = 0; 365 for ( i=0; i<nrecvs; i++ ) { 366 values = rvalues + i*nmax; 367 for ( j=0; j<lens[i]; j++ ) { 368 lrows[count++] = values[j] - base; 369 } 370 } 371 PETSCFREE(rvalues); PETSCFREE(lens); 372 PETSCFREE(owner); PETSCFREE(nprocs); 373 374 /* actually zap the local rows */ 375 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 376 PLogObjectParent(A,istmp); 377 PETSCFREE(lrows); 378 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 379 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 380 ierr = ISDestroy(istmp); CHKERRQ(ierr); 381 382 /* wait on sends */ 383 if (nsends) { 384 send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status)); 385 CHKPTRQ(send_status); 386 MPI_Waitall(nsends,send_waits,send_status); 387 PETSCFREE(send_status); 388 } 389 PETSCFREE(send_waits); PETSCFREE(svalues); 390 391 return 0; 392 } 393 394 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 395 { 396 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 397 int ierr; 398 399 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 400 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 401 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 402 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 403 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 404 return 0; 405 } 406 407 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 408 { 409 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 410 int ierr; 411 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 412 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 413 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 414 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 415 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 416 return 0; 417 } 418 419 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 420 { 421 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 422 int ierr; 423 424 if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix"); 425 /* do nondiagonal part */ 426 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 427 /* send it on its way */ 428 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 429 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 430 /* do local part */ 431 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 432 /* receive remote parts: note this assumes the values are not actually */ 433 /* inserted in yy until the next line, which is true for my implementation*/ 434 /* but is not perhaps always true. */ 435 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 436 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 437 return 0; 438 } 439 440 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 441 { 442 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 443 int ierr; 444 445 if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix"); 446 /* do nondiagonal part */ 447 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 448 /* send it on its way */ 449 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 450 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 451 /* do local part */ 452 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 453 /* receive remote parts: note this assumes the values are not actually */ 454 /* inserted in yy until the next line, which is true for my implementation*/ 455 /* but is not perhaps always true. */ 456 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 457 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 458 return 0; 459 } 460 461 /* 462 This only works correctly for square matrices where the subblock A->A is the 463 diagonal block 464 */ 465 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 466 { 467 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 468 if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix"); 469 return MatGetDiagonal(a->A,v); 470 } 471 472 static int MatDestroy_MPIAIJ(PetscObject obj) 473 { 474 Mat mat = (Mat) obj; 475 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 476 int ierr; 477 #if defined(PETSC_LOG) 478 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 479 #endif 480 PETSCFREE(aij->rowners); 481 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 482 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 483 if (aij->colmap) PETSCFREE(aij->colmap); 484 if (aij->garray) PETSCFREE(aij->garray); 485 if (aij->lvec) VecDestroy(aij->lvec); 486 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 487 PETSCFREE(aij); 488 PLogObjectDestroy(mat); 489 PETSCHEADERDESTROY(mat); 490 return 0; 491 } 492 #include "draw.h" 493 #include "pinclude/pviewer.h" 494 495 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 496 { 497 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 498 int ierr; 499 500 if (aij->numtids == 1) { 501 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 502 } 503 return 0; 504 } 505 506 static int MatView_MPIAIJ_ASCIIorDraw(Mat mat,Viewer viewer) 507 { 508 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 509 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 510 int ierr, format,shift = C->indexshift; 511 PetscObject vobj = (PetscObject) viewer; 512 FILE *fd; 513 514 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 515 ierr = ViewerFileGetFormat_Private(viewer,&format); 516 if (format == FILE_FORMAT_INFO) { 517 return 0; 518 } 519 } 520 521 if (vobj->type == ASCII_FILE_VIEWER) { 522 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 523 MPIU_Seq_begin(mat->comm,1); 524 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 525 aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 526 aij->cend); 527 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 528 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 529 fflush(fd); 530 MPIU_Seq_end(mat->comm,1); 531 } 532 else { 533 int numtids = aij->numtids, mytid = aij->mytid; 534 if (numtids == 1) { 535 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 536 } 537 else { 538 /* assemble the entire matrix onto first processor. */ 539 Mat A; 540 Mat_SeqAIJ *Aloc; 541 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 542 Scalar *a; 543 544 if (!mytid) { 545 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr); 546 } 547 else { 548 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr); 549 } 550 PLogObjectParent(mat,A); 551 552 553 /* copy over the A part */ 554 Aloc = (Mat_SeqAIJ*) aij->A->data; 555 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 556 row = aij->rstart; 557 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 558 for ( i=0; i<m; i++ ) { 559 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 560 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 561 } 562 aj = Aloc->j; 563 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 564 565 /* copy over the B part */ 566 Aloc = (Mat_SeqAIJ*) aij->B->data; 567 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 568 row = aij->rstart; 569 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 570 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 571 for ( i=0; i<m; i++ ) { 572 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 573 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 574 } 575 PETSCFREE(ct); 576 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 577 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 578 if (!mytid) { 579 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 580 } 581 ierr = MatDestroy(A); CHKERRQ(ierr); 582 } 583 } 584 return 0; 585 } 586 587 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 588 { 589 Mat mat = (Mat) obj; 590 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 591 int ierr; 592 PetscObject vobj = (PetscObject) viewer; 593 594 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix"); 595 if (!viewer) { 596 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 597 } 598 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 599 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 600 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 601 } 602 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 603 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 604 } 605 else if (vobj->cookie == DRAW_COOKIE) { 606 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 607 } 608 else if (vobj->type == BINARY_FILE_VIEWER) { 609 return MatView_MPIAIJ_Binary(mat,viewer); 610 } 611 return 0; 612 } 613 614 extern int MatMarkDiag_SeqAIJ(Mat); 615 /* 616 This has to provide several versions. 617 618 1) per sequential 619 2) a) use only local smoothing updating outer values only once. 620 b) local smoothing updating outer values each inner iteration 621 3) color updating out values betwen colors. 622 */ 623 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 624 double fshift,int its,Vec xx) 625 { 626 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 627 Mat AA = mat->A, BB = mat->B; 628 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 629 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 630 int ierr,*idx, *diag; 631 int n = mat->n, m = mat->m, i,shift = A->indexshift; 632 Vec tt; 633 634 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix"); 635 636 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 637 xs = x + shift; /* shift by one for index start of 1 */ 638 ls = ls + shift; 639 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 640 diag = A->diag; 641 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 642 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 643 } 644 if (flag & SOR_EISENSTAT) { 645 /* Let A = L + U + D; where L is lower trianglar, 646 U is upper triangular, E is diagonal; This routine applies 647 648 (L + E)^{-1} A (U + E)^{-1} 649 650 to a vector efficiently using Eisenstat's trick. This is for 651 the case of SSOR preconditioner, so E is D/omega where omega 652 is the relaxation factor. 653 */ 654 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 655 PLogObjectParent(matin,tt); 656 VecGetArray(tt,&t); 657 scale = (2.0/omega) - 1.0; 658 /* x = (E + U)^{-1} b */ 659 VecSet(&zero,mat->lvec); 660 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 661 mat->Mvctx); CHKERRQ(ierr); 662 for ( i=m-1; i>-1; i-- ) { 663 n = A->i[i+1] - diag[i] - 1; 664 idx = A->j + diag[i] + !shift; 665 v = A->a + diag[i] + !shift; 666 sum = b[i]; 667 SPARSEDENSEMDOT(sum,xs,v,idx,n); 668 d = fshift + A->a[diag[i]+shift]; 669 n = B->i[i+1] - B->i[i]; 670 idx = B->j + B->i[i] + shift; 671 v = B->a + B->i[i] + shift; 672 SPARSEDENSEMDOT(sum,ls,v,idx,n); 673 x[i] = omega*(sum/d); 674 } 675 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 676 mat->Mvctx); CHKERRQ(ierr); 677 678 /* t = b - (2*E - D)x */ 679 v = A->a; 680 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 681 682 /* t = (E + L)^{-1}t */ 683 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 684 diag = A->diag; 685 VecSet(&zero,mat->lvec); 686 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 687 mat->Mvctx); CHKERRQ(ierr); 688 for ( i=0; i<m; i++ ) { 689 n = diag[i] - A->i[i]; 690 idx = A->j + A->i[i] + shift; 691 v = A->a + A->i[i] + shift; 692 sum = t[i]; 693 SPARSEDENSEMDOT(sum,ts,v,idx,n); 694 d = fshift + A->a[diag[i]+shift]; 695 n = B->i[i+1] - B->i[i]; 696 idx = B->j + B->i[i] + shift; 697 v = B->a + B->i[i] + shift; 698 SPARSEDENSEMDOT(sum,ls,v,idx,n); 699 t[i] = omega*(sum/d); 700 } 701 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 702 mat->Mvctx); CHKERRQ(ierr); 703 /* x = x + t */ 704 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 705 VecDestroy(tt); 706 return 0; 707 } 708 709 710 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 711 if (flag & SOR_ZERO_INITIAL_GUESS) { 712 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 713 } 714 else { 715 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 716 CHKERRQ(ierr); 717 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 718 CHKERRQ(ierr); 719 } 720 while (its--) { 721 /* go down through the rows */ 722 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 723 mat->Mvctx); CHKERRQ(ierr); 724 for ( i=0; i<m; i++ ) { 725 n = A->i[i+1] - A->i[i]; 726 idx = A->j + A->i[i] + shift; 727 v = A->a + A->i[i] + shift; 728 sum = b[i]; 729 SPARSEDENSEMDOT(sum,xs,v,idx,n); 730 d = fshift + A->a[diag[i]+shift]; 731 n = B->i[i+1] - B->i[i]; 732 idx = B->j + B->i[i] + shift; 733 v = B->a + B->i[i] + shift; 734 SPARSEDENSEMDOT(sum,ls,v,idx,n); 735 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 736 } 737 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 738 mat->Mvctx); CHKERRQ(ierr); 739 /* come up through the rows */ 740 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 741 mat->Mvctx); CHKERRQ(ierr); 742 for ( i=m-1; i>-1; i-- ) { 743 n = A->i[i+1] - A->i[i]; 744 idx = A->j + A->i[i] + shift; 745 v = A->a + A->i[i] + shift; 746 sum = b[i]; 747 SPARSEDENSEMDOT(sum,xs,v,idx,n); 748 d = fshift + A->a[diag[i]+shift]; 749 n = B->i[i+1] - B->i[i]; 750 idx = B->j + B->i[i] + shift; 751 v = B->a + B->i[i] + shift; 752 SPARSEDENSEMDOT(sum,ls,v,idx,n); 753 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 754 } 755 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 756 mat->Mvctx); CHKERRQ(ierr); 757 } 758 } 759 else if (flag & SOR_FORWARD_SWEEP){ 760 if (flag & SOR_ZERO_INITIAL_GUESS) { 761 VecSet(&zero,mat->lvec); 762 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 763 mat->Mvctx); CHKERRQ(ierr); 764 for ( i=0; i<m; i++ ) { 765 n = diag[i] - A->i[i]; 766 idx = A->j + A->i[i] + shift; 767 v = A->a + A->i[i] + shift; 768 sum = b[i]; 769 SPARSEDENSEMDOT(sum,xs,v,idx,n); 770 d = fshift + A->a[diag[i]+shift]; 771 n = B->i[i+1] - B->i[i]; 772 idx = B->j + B->i[i] + shift; 773 v = B->a + B->i[i] + shift; 774 SPARSEDENSEMDOT(sum,ls,v,idx,n); 775 x[i] = omega*(sum/d); 776 } 777 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 778 mat->Mvctx); CHKERRQ(ierr); 779 its--; 780 } 781 while (its--) { 782 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 783 CHKERRQ(ierr); 784 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 785 CHKERRQ(ierr); 786 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 787 mat->Mvctx); CHKERRQ(ierr); 788 for ( i=0; i<m; i++ ) { 789 n = A->i[i+1] - A->i[i]; 790 idx = A->j + A->i[i] + shift; 791 v = A->a + A->i[i] + shift; 792 sum = b[i]; 793 SPARSEDENSEMDOT(sum,xs,v,idx,n); 794 d = fshift + A->a[diag[i]+shift]; 795 n = B->i[i+1] - B->i[i]; 796 idx = B->j + B->i[i] + shift; 797 v = B->a + B->i[i] + shift; 798 SPARSEDENSEMDOT(sum,ls,v,idx,n); 799 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 800 } 801 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 802 mat->Mvctx); CHKERRQ(ierr); 803 } 804 } 805 else if (flag & SOR_BACKWARD_SWEEP){ 806 if (flag & SOR_ZERO_INITIAL_GUESS) { 807 VecSet(&zero,mat->lvec); 808 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 809 mat->Mvctx); CHKERRQ(ierr); 810 for ( i=m-1; i>-1; i-- ) { 811 n = A->i[i+1] - diag[i] - 1; 812 idx = A->j + diag[i] + !shift; 813 v = A->a + diag[i] + !shift; 814 sum = b[i]; 815 SPARSEDENSEMDOT(sum,xs,v,idx,n); 816 d = fshift + A->a[diag[i]+shift]; 817 n = B->i[i+1] - B->i[i]; 818 idx = B->j + B->i[i] + shift; 819 v = B->a + B->i[i] + shift; 820 SPARSEDENSEMDOT(sum,ls,v,idx,n); 821 x[i] = omega*(sum/d); 822 } 823 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 824 mat->Mvctx); CHKERRQ(ierr); 825 its--; 826 } 827 while (its--) { 828 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 829 mat->Mvctx); CHKERRQ(ierr); 830 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 831 mat->Mvctx); CHKERRQ(ierr); 832 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 833 mat->Mvctx); CHKERRQ(ierr); 834 for ( i=m-1; i>-1; i-- ) { 835 n = A->i[i+1] - A->i[i]; 836 idx = A->j + A->i[i] + shift; 837 v = A->a + A->i[i] + shift; 838 sum = b[i]; 839 SPARSEDENSEMDOT(sum,xs,v,idx,n); 840 d = fshift + A->a[diag[i]+shift]; 841 n = B->i[i+1] - B->i[i]; 842 idx = B->j + B->i[i] + shift; 843 v = B->a + B->i[i] + shift; 844 SPARSEDENSEMDOT(sum,ls,v,idx,n); 845 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 846 } 847 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 848 mat->Mvctx); CHKERRQ(ierr); 849 } 850 } 851 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 852 if (flag & SOR_ZERO_INITIAL_GUESS) { 853 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 854 } 855 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 856 CHKERRQ(ierr); 857 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 858 CHKERRQ(ierr); 859 while (its--) { 860 /* go down through the rows */ 861 for ( i=0; i<m; i++ ) { 862 n = A->i[i+1] - A->i[i]; 863 idx = A->j + A->i[i] + shift; 864 v = A->a + A->i[i] + shift; 865 sum = b[i]; 866 SPARSEDENSEMDOT(sum,xs,v,idx,n); 867 d = fshift + A->a[diag[i]+shift]; 868 n = B->i[i+1] - B->i[i]; 869 idx = B->j + B->i[i] + shift; 870 v = B->a + B->i[i] + shift; 871 SPARSEDENSEMDOT(sum,ls,v,idx,n); 872 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 873 } 874 /* come up through the rows */ 875 for ( i=m-1; i>-1; i-- ) { 876 n = A->i[i+1] - A->i[i]; 877 idx = A->j + A->i[i] + shift; 878 v = A->a + A->i[i] + shift; 879 sum = b[i]; 880 SPARSEDENSEMDOT(sum,xs,v,idx,n); 881 d = fshift + A->a[diag[i]+shift]; 882 n = B->i[i+1] - B->i[i]; 883 idx = B->j + B->i[i] + shift; 884 v = B->a + B->i[i] + shift; 885 SPARSEDENSEMDOT(sum,ls,v,idx,n); 886 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 887 } 888 } 889 } 890 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 891 if (flag & SOR_ZERO_INITIAL_GUESS) { 892 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 893 } 894 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 895 CHKERRQ(ierr); 896 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 897 CHKERRQ(ierr); 898 while (its--) { 899 for ( i=0; i<m; i++ ) { 900 n = A->i[i+1] - A->i[i]; 901 idx = A->j + A->i[i] + shift; 902 v = A->a + A->i[i] + shift; 903 sum = b[i]; 904 SPARSEDENSEMDOT(sum,xs,v,idx,n); 905 d = fshift + A->a[diag[i]+shift]; 906 n = B->i[i+1] - B->i[i]; 907 idx = B->j + B->i[i] + shift; 908 v = B->a + B->i[i] + shift; 909 SPARSEDENSEMDOT(sum,ls,v,idx,n); 910 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 911 } 912 } 913 } 914 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 915 if (flag & SOR_ZERO_INITIAL_GUESS) { 916 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 917 } 918 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 919 mat->Mvctx); CHKERRQ(ierr); 920 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 921 mat->Mvctx); CHKERRQ(ierr); 922 while (its--) { 923 for ( i=m-1; i>-1; i-- ) { 924 n = A->i[i+1] - A->i[i]; 925 idx = A->j + A->i[i] + shift; 926 v = A->a + A->i[i] + shift; 927 sum = b[i]; 928 SPARSEDENSEMDOT(sum,xs,v,idx,n); 929 d = fshift + A->a[diag[i]+shift]; 930 n = B->i[i+1] - B->i[i]; 931 idx = B->j + B->i[i] + shift; 932 v = B->a + B->i[i] + shift; 933 SPARSEDENSEMDOT(sum,ls,v,idx,n); 934 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 935 } 936 } 937 } 938 return 0; 939 } 940 941 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 942 int *nzalloc,int *mem) 943 { 944 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 945 Mat A = mat->A, B = mat->B; 946 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 947 948 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 949 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 950 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 951 if (flag == MAT_LOCAL) { 952 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 953 } else if (flag == MAT_GLOBAL_MAX) { 954 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 955 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 956 } else if (flag == MAT_GLOBAL_SUM) { 957 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 958 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 959 } 960 return 0; 961 } 962 963 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 964 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 965 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 966 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 967 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 968 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 969 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 970 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 971 972 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 973 { 974 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 975 976 if (op == NO_NEW_NONZERO_LOCATIONS || 977 op == YES_NEW_NONZERO_LOCATIONS || 978 op == COLUMNS_SORTED || 979 op == ROW_ORIENTED) { 980 MatSetOption(a->A,op); 981 MatSetOption(a->B,op); 982 } 983 else if (op == ROWS_SORTED || 984 op == SYMMETRIC_MATRIX || 985 op == STRUCTURALLY_SYMMETRIC_MATRIX || 986 op == YES_NEW_DIAGONALS) 987 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 988 else if (op == COLUMN_ORIENTED) 989 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED not supported");} 990 else if (op == NO_NEW_DIAGONALS) 991 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS not supported");} 992 else 993 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:Option not supported");} 994 return 0; 995 } 996 997 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 998 { 999 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1000 *m = mat->M; *n = mat->N; 1001 return 0; 1002 } 1003 1004 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1005 { 1006 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1007 *m = mat->m; *n = mat->N; 1008 return 0; 1009 } 1010 1011 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1012 { 1013 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1014 *m = mat->rstart; *n = mat->rend; 1015 return 0; 1016 } 1017 1018 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1019 { 1020 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1021 Scalar *vworkA, *vworkB, **pvA, **pvB; 1022 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1023 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1024 1025 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows") 1026 lrow = row - rstart; 1027 1028 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1029 if (!v) {pvA = 0; pvB = 0;} 1030 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1031 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1032 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1033 nztot = nzA + nzB; 1034 1035 if (v || idx) { 1036 if (nztot) { 1037 /* Sort by increasing column numbers, assuming A and B already sorted */ 1038 int imark; 1039 if (mat->assembled) { 1040 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1041 } 1042 if (v) { 1043 *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1044 for ( i=0; i<nzB; i++ ) { 1045 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1046 else break; 1047 } 1048 imark = i; 1049 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1050 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1051 } 1052 if (idx) { 1053 *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1054 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1055 for ( i=0; i<nzB; i++ ) { 1056 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1057 else break; 1058 } 1059 imark = i; 1060 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1061 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1062 } 1063 } 1064 else {*idx = 0; *v=0;} 1065 } 1066 *nz = nztot; 1067 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1068 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1069 return 0; 1070 } 1071 1072 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1073 { 1074 if (idx) PETSCFREE(*idx); 1075 if (v) PETSCFREE(*v); 1076 return 0; 1077 } 1078 1079 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm) 1080 { 1081 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1082 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1083 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1084 double sum = 0.0; 1085 Scalar *v; 1086 1087 if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix"); 1088 if (aij->numtids == 1) { 1089 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1090 } else { 1091 if (type == NORM_FROBENIUS) { 1092 v = amat->a; 1093 for (i=0; i<amat->nz; i++ ) { 1094 #if defined(PETSC_COMPLEX) 1095 sum += real(conj(*v)*(*v)); v++; 1096 #else 1097 sum += (*v)*(*v); v++; 1098 #endif 1099 } 1100 v = bmat->a; 1101 for (i=0; i<bmat->nz; i++ ) { 1102 #if defined(PETSC_COMPLEX) 1103 sum += real(conj(*v)*(*v)); v++; 1104 #else 1105 sum += (*v)*(*v); v++; 1106 #endif 1107 } 1108 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1109 *norm = sqrt(*norm); 1110 } 1111 else if (type == NORM_1) { /* max column norm */ 1112 double *tmp, *tmp2; 1113 int *jj, *garray = aij->garray; 1114 tmp = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1115 tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1116 PetscZero(tmp,aij->N*sizeof(double)); 1117 *norm = 0.0; 1118 v = amat->a; jj = amat->j; 1119 for ( j=0; j<amat->nz; j++ ) { 1120 #if defined(PETSC_COMPLEX) 1121 tmp[cstart + *jj++ + shift] += abs(*v++); 1122 #else 1123 tmp[cstart + *jj++ + shift] += fabs(*v++); 1124 #endif 1125 } 1126 v = bmat->a; jj = bmat->j; 1127 for ( j=0; j<bmat->nz; j++ ) { 1128 #if defined(PETSC_COMPLEX) 1129 tmp[garray[*jj++ + shift]] += abs(*v++); 1130 #else 1131 tmp[garray[*jj++ + shift]] += fabs(*v++); 1132 #endif 1133 } 1134 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1135 for ( j=0; j<aij->N; j++ ) { 1136 if (tmp2[j] > *norm) *norm = tmp2[j]; 1137 } 1138 PETSCFREE(tmp); PETSCFREE(tmp2); 1139 } 1140 else if (type == NORM_INFINITY) { /* max row norm */ 1141 double normtemp = 0.0; 1142 for ( j=0; j<amat->m; j++ ) { 1143 v = amat->a + amat->i[j] + shift; 1144 sum = 0.0; 1145 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1146 #if defined(PETSC_COMPLEX) 1147 sum += abs(*v); v++; 1148 #else 1149 sum += fabs(*v); v++; 1150 #endif 1151 } 1152 v = bmat->a + bmat->i[j] + shift; 1153 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1154 #if defined(PETSC_COMPLEX) 1155 sum += abs(*v); v++; 1156 #else 1157 sum += fabs(*v); v++; 1158 #endif 1159 } 1160 if (sum > normtemp) normtemp = sum; 1161 MPI_Allreduce((void*)&normtemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1162 } 1163 } 1164 else { 1165 SETERRQ(1,"MatNorm_MPIRow:No support for two norm"); 1166 } 1167 } 1168 return 0; 1169 } 1170 1171 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1172 { 1173 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1174 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1175 int ierr,shift = Aloc->indexshift; 1176 Mat B; 1177 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1178 Scalar *array; 1179 1180 if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1181 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1182 CHKERRQ(ierr); 1183 1184 /* copy over the A part */ 1185 Aloc = (Mat_SeqAIJ*) a->A->data; 1186 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1187 row = a->rstart; 1188 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1189 for ( i=0; i<m; i++ ) { 1190 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1191 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1192 } 1193 aj = Aloc->j; 1194 for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1195 1196 /* copy over the B part */ 1197 Aloc = (Mat_SeqAIJ*) a->B->data; 1198 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1199 row = a->rstart; 1200 ct = cols = (int *) PETSCMALLOC( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1201 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1202 for ( i=0; i<m; i++ ) { 1203 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1204 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1205 } 1206 PETSCFREE(ct); 1207 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1208 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1209 if (matout) { 1210 *matout = B; 1211 } else { 1212 /* This isn't really an in-place transpose .... but free data structures from a */ 1213 PETSCFREE(a->rowners); 1214 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1215 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1216 if (a->colmap) PETSCFREE(a->colmap); 1217 if (a->garray) PETSCFREE(a->garray); 1218 if (a->lvec) VecDestroy(a->lvec); 1219 if (a->Mvctx) VecScatterCtxDestroy(a->Mvctx); 1220 PETSCFREE(a); 1221 PetscMemcpy(A,B,sizeof(struct _Mat)); 1222 PETSCHEADERDESTROY(B); 1223 } 1224 return 0; 1225 } 1226 1227 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1228 static int MatCopyPrivate_MPIAIJ(Mat,Mat *); 1229 1230 /* -------------------------------------------------------------------*/ 1231 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1232 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1233 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1234 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1235 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1236 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1237 MatLUFactor_MPIAIJ,0, 1238 MatRelax_MPIAIJ, 1239 MatTranspose_MPIAIJ, 1240 MatGetInfo_MPIAIJ,0, 1241 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1242 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1243 0, 1244 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1245 MatGetReordering_MPIAIJ, 1246 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1247 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1248 MatILUFactorSymbolic_MPIAIJ,0, 1249 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 1250 1251 /*@C 1252 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1253 (the default parallel PETSc format). 1254 1255 Input Parameters: 1256 . comm - MPI communicator 1257 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1258 . n - number of local columns (or PETSC_DECIDE to have calculated 1259 if N is given) 1260 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1261 . N - number of global columns (or PETSC_DECIDE to have calculated 1262 if n is given) 1263 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1264 (same for all local rows) 1265 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1266 or null (possibly different for each row). You must leave room 1267 for the diagonal entry even if it is zero. 1268 . o_nz - number of nonzeros per row in off-diagonal portion of local 1269 submatrix (same for all local rows). 1270 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1271 submatrix or null (possibly different for each row). 1272 1273 Output Parameter: 1274 . newmat - the matrix 1275 1276 Notes: 1277 The AIJ format (also called the Yale sparse matrix format or 1278 compressed row storage), is fully compatible with standard Fortran 77 1279 storage. That is, the stored row and column indices begin at 1280 one, not zero. See the users manual for further details. 1281 1282 The user MUST specify either the local or global matrix dimensions 1283 (possibly both). 1284 1285 Storage Information: 1286 For a square global matrix we define each processor's diagonal portion 1287 to be its local rows and the corresponding columns (a square submatrix); 1288 each processor's off-diagonal portion encompasses the remainder of the 1289 local matrix (a rectangular submatrix). 1290 1291 The user can specify preallocated storage for the diagonal part of 1292 the local submatrix with either d_nz or d_nnz (not both). Set both 1293 d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1294 Likewise, specify preallocated storage for the off-diagonal part of 1295 the local submatrix with o_nz or o_nnz (not both). 1296 1297 .keywords: matrix, aij, compressed row, sparse, parallel 1298 1299 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1300 @*/ 1301 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1302 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1303 { 1304 Mat mat; 1305 Mat_MPIAIJ *a; 1306 int ierr, i,sum[2],work[2]; 1307 1308 *newmat = 0; 1309 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1310 PLogObjectCreate(mat); 1311 mat->data = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a); 1312 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1313 mat->destroy = MatDestroy_MPIAIJ; 1314 mat->view = MatView_MPIAIJ; 1315 mat->factor = 0; 1316 1317 a->insertmode = NOT_SET_VALUES; 1318 MPI_Comm_rank(comm,&a->mytid); 1319 MPI_Comm_size(comm,&a->numtids); 1320 1321 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) 1322 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1323 1324 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1325 work[0] = m; work[1] = n; 1326 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1327 if (M == PETSC_DECIDE) M = sum[0]; 1328 if (N == PETSC_DECIDE) N = sum[1]; 1329 } 1330 if (m == PETSC_DECIDE) {m = M/a->numtids + ((M % a->numtids) > a->mytid);} 1331 if (n == PETSC_DECIDE) {n = N/a->numtids + ((N % a->numtids) > a->mytid);} 1332 a->m = m; 1333 a->n = n; 1334 a->N = N; 1335 a->M = M; 1336 1337 /* build local table of row and column ownerships */ 1338 a->rowners = (int *) PETSCMALLOC(2*(a->numtids+2)*sizeof(int)); CHKPTRQ(a->rowners); 1339 PLogObjectMemory(mat,2*(a->numtids+2)*sizeof(int)+sizeof(struct _Mat)+ 1340 sizeof(Mat_MPIAIJ)); 1341 a->cowners = a->rowners + a->numtids + 1; 1342 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1343 a->rowners[0] = 0; 1344 for ( i=2; i<=a->numtids; i++ ) { 1345 a->rowners[i] += a->rowners[i-1]; 1346 } 1347 a->rstart = a->rowners[a->mytid]; 1348 a->rend = a->rowners[a->mytid+1]; 1349 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1350 a->cowners[0] = 0; 1351 for ( i=2; i<=a->numtids; i++ ) { 1352 a->cowners[i] += a->cowners[i-1]; 1353 } 1354 a->cstart = a->cowners[a->mytid]; 1355 a->cend = a->cowners[a->mytid+1]; 1356 1357 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1358 PLogObjectParent(mat,a->A); 1359 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1360 PLogObjectParent(mat,a->B); 1361 1362 /* build cache for off array entries formed */ 1363 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1364 a->colmap = 0; 1365 a->garray = 0; 1366 1367 /* stuff used for matrix vector multiply */ 1368 a->lvec = 0; 1369 a->Mvctx = 0; 1370 a->assembled = 0; 1371 1372 *newmat = mat; 1373 return 0; 1374 } 1375 1376 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat) 1377 { 1378 Mat mat; 1379 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1380 int ierr, len; 1381 1382 if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix"); 1383 *newmat = 0; 1384 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1385 PLogObjectCreate(mat); 1386 mat->data = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a); 1387 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1388 mat->destroy = MatDestroy_MPIAIJ; 1389 mat->view = MatView_MPIAIJ; 1390 mat->factor = matin->factor; 1391 1392 a->m = oldmat->m; 1393 a->n = oldmat->n; 1394 a->M = oldmat->M; 1395 a->N = oldmat->N; 1396 1397 a->assembled = 1; 1398 a->rstart = oldmat->rstart; 1399 a->rend = oldmat->rend; 1400 a->cstart = oldmat->cstart; 1401 a->cend = oldmat->cend; 1402 a->numtids = oldmat->numtids; 1403 a->mytid = oldmat->mytid; 1404 a->insertmode = NOT_SET_VALUES; 1405 1406 a->rowners = (int *) PETSCMALLOC((a->numtids+1)*sizeof(int));CHKPTRQ(a->rowners); 1407 PLogObjectMemory(mat,(a->numtids+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1408 PetscMemcpy(a->rowners,oldmat->rowners,(a->numtids+1)*sizeof(int)); 1409 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1410 if (oldmat->colmap) { 1411 a->colmap = (int *) PETSCMALLOC( (a->N)*sizeof(int) );CHKPTRQ(a->colmap); 1412 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1413 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1414 } else a->colmap = 0; 1415 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1416 a->garray = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(a->garray); 1417 PLogObjectMemory(mat,len*sizeof(int)); 1418 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1419 } else a->garray = 0; 1420 1421 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1422 PLogObjectParent(mat,a->lvec); 1423 ierr = VecScatterCtxCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1424 PLogObjectParent(mat,a->Mvctx); 1425 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1426 PLogObjectParent(mat,a->A); 1427 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1428 PLogObjectParent(mat,a->B); 1429 *newmat = mat; 1430 return 0; 1431 } 1432 1433 #include "sysio.h" 1434 1435 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat) 1436 { 1437 Mat A; 1438 int i, nz, ierr, j,rstart, rend, fd; 1439 Scalar *vals,*svals; 1440 PetscObject vobj = (PetscObject) bview; 1441 MPI_Comm comm = vobj->comm; 1442 MPI_Status status; 1443 int header[4],mytid,numtid,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1444 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1445 int tag = ((PetscObject)bview)->tag; 1446 1447 MPI_Comm_size(comm,&numtid); MPI_Comm_rank(comm,&mytid); 1448 if (!mytid) { 1449 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1450 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1451 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object"); 1452 } 1453 1454 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1455 M = header[1]; N = header[2]; 1456 /* determine ownership of all rows */ 1457 m = M/numtid + ((M % numtid) > mytid); 1458 rowners = (int *) PETSCMALLOC((numtid+2)*sizeof(int)); CHKPTRQ(rowners); 1459 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1460 rowners[0] = 0; 1461 for ( i=2; i<=numtid; i++ ) { 1462 rowners[i] += rowners[i-1]; 1463 } 1464 rstart = rowners[mytid]; 1465 rend = rowners[mytid+1]; 1466 1467 /* distribute row lengths to all processors */ 1468 ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1469 offlens = ourlens + (rend-rstart); 1470 if (!mytid) { 1471 rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths); 1472 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1473 sndcounts = (int*) PETSCMALLOC( numtid*sizeof(int) ); CHKPTRQ(sndcounts); 1474 for ( i=0; i<numtid; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1475 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1476 PETSCFREE(sndcounts); 1477 } 1478 else { 1479 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1480 } 1481 1482 if (!mytid) { 1483 /* calculate the number of nonzeros on each processor */ 1484 procsnz = (int*) PETSCMALLOC( numtid*sizeof(int) ); CHKPTRQ(procsnz); 1485 PetscZero(procsnz,numtid*sizeof(int)); 1486 for ( i=0; i<numtid; i++ ) { 1487 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1488 procsnz[i] += rowlengths[j]; 1489 } 1490 } 1491 PETSCFREE(rowlengths); 1492 1493 /* determine max buffer needed and allocate it */ 1494 maxnz = 0; 1495 for ( i=0; i<numtid; i++ ) { 1496 maxnz = PETSCMAX(maxnz,procsnz[i]); 1497 } 1498 cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols); 1499 1500 /* read in my part of the matrix column indices */ 1501 nz = procsnz[0]; 1502 mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 1503 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1504 1505 /* read in every one elses and ship off */ 1506 for ( i=1; i<numtid; i++ ) { 1507 nz = procsnz[i]; 1508 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1509 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1510 } 1511 PETSCFREE(cols); 1512 } 1513 else { 1514 /* determine buffer space needed for message */ 1515 nz = 0; 1516 for ( i=0; i<m; i++ ) { 1517 nz += ourlens[i]; 1518 } 1519 mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 1520 1521 /* receive message of column indices*/ 1522 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1523 MPI_Get_count(&status,MPI_INT,&maxnz); 1524 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1525 } 1526 1527 /* loop over local rows, determining number of off diagonal entries */ 1528 PetscZero(offlens,m*sizeof(int)); 1529 jj = 0; 1530 for ( i=0; i<m; i++ ) { 1531 for ( j=0; j<ourlens[i]; j++ ) { 1532 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1533 jj++; 1534 } 1535 } 1536 1537 1538 /* create our matrix */ 1539 for ( i=0; i<m; i++ ) { 1540 ourlens[i] -= offlens[i]; 1541 } 1542 if (type == MATMPIAIJ) { 1543 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1544 } 1545 else if (type == MATMPIROW) { 1546 ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1547 } 1548 A = *newmat; 1549 MatSetOption(A,COLUMNS_SORTED); 1550 for ( i=0; i<m; i++ ) { 1551 ourlens[i] += offlens[i]; 1552 } 1553 1554 if (!mytid) { 1555 vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1556 1557 /* read in my part of the matrix numerical values */ 1558 nz = procsnz[0]; 1559 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1560 1561 /* insert into matrix */ 1562 jj = rstart; 1563 smycols = mycols; 1564 svals = vals; 1565 for ( i=0; i<m; i++ ) { 1566 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1567 smycols += ourlens[i]; 1568 svals += ourlens[i]; 1569 jj++; 1570 } 1571 1572 /* read in other processors and ship out */ 1573 for ( i=1; i<numtid; i++ ) { 1574 nz = procsnz[i]; 1575 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1576 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1577 } 1578 PETSCFREE(procsnz); 1579 } 1580 else { 1581 /* receive numeric values */ 1582 vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1583 1584 /* receive message of values*/ 1585 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1586 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1587 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1588 1589 /* insert into matrix */ 1590 jj = rstart; 1591 smycols = mycols; 1592 svals = vals; 1593 for ( i=0; i<m; i++ ) { 1594 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1595 smycols += ourlens[i]; 1596 svals += ourlens[i]; 1597 jj++; 1598 } 1599 } 1600 PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners); 1601 1602 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1603 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1604 return 0; 1605 } 1606