1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.174 1996/11/19 16:31:04 bsmith Exp curfman $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 int CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 30 PetscTruth *done) 31 { 32 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 33 int ierr; 34 if (aij->size == 1) { 35 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 36 } else SETERRQ(1,"MatGetRowIJ_MPIAIJ:not supported in parallel"); 37 return 0; 38 } 39 40 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 41 PetscTruth *done) 42 { 43 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 44 int ierr; 45 if (aij->size == 1) { 46 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 47 } else SETERRQ(1,"MatRestoreRowIJ_MPIAIJ:not supported in parallel"); 48 return 0; 49 } 50 51 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 52 53 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 54 { 55 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 56 Scalar value; 57 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 58 int cstart = aij->cstart, cend = aij->cend,row,col; 59 int roworiented = aij->roworiented; 60 61 #if defined(PETSC_BOPT_g) 62 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 63 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 64 } 65 #endif 66 aij->insertmode = addv; 67 for ( i=0; i<m; i++ ) { 68 #if defined(PETSC_BOPT_g) 69 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 70 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 71 #endif 72 if (im[i] >= rstart && im[i] < rend) { 73 row = im[i] - rstart; 74 for ( j=0; j<n; j++ ) { 75 if (in[j] >= cstart && in[j] < cend){ 76 col = in[j] - cstart; 77 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 78 ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 79 } 80 #if defined(PETSC_BOPT_g) 81 else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");} 82 else if (in[j] >= aij->N) {SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");} 83 #endif 84 else { 85 if (mat->was_assembled) { 86 if (!aij->colmap) { 87 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 88 } 89 col = aij->colmap[in[j]] - 1; 90 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 91 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 92 col = in[j]; 93 } 94 } 95 else col = in[j]; 96 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 97 ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 98 } 99 } 100 } 101 else { 102 if (roworiented && !aij->donotstash) { 103 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 104 } 105 else { 106 if (!aij->donotstash) { 107 row = im[i]; 108 for ( j=0; j<n; j++ ) { 109 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 110 } 111 } 112 } 113 } 114 } 115 return 0; 116 } 117 118 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 119 { 120 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 121 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 122 int cstart = aij->cstart, cend = aij->cend,row,col; 123 124 for ( i=0; i<m; i++ ) { 125 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 126 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 127 if (idxm[i] >= rstart && idxm[i] < rend) { 128 row = idxm[i] - rstart; 129 for ( j=0; j<n; j++ ) { 130 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 131 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 132 if (idxn[j] >= cstart && idxn[j] < cend){ 133 col = idxn[j] - cstart; 134 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 135 } 136 else { 137 if (!aij->colmap) { 138 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 139 } 140 col = aij->colmap[idxn[j]] - 1; 141 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 142 else { 143 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 144 } 145 } 146 } 147 } 148 else { 149 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 150 } 151 } 152 return 0; 153 } 154 155 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 156 { 157 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 158 MPI_Comm comm = mat->comm; 159 int size = aij->size, *owners = aij->rowners; 160 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 161 MPI_Request *send_waits,*recv_waits; 162 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 163 InsertMode addv; 164 Scalar *rvalues,*svalues; 165 166 /* make sure all processors are either in INSERTMODE or ADDMODE */ 167 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 168 if (addv == (ADD_VALUES|INSERT_VALUES)) { 169 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 170 } 171 aij->insertmode = addv; /* in case this processor had no cache */ 172 173 /* first count number of contributors to each processor */ 174 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 175 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 176 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 177 for ( i=0; i<aij->stash.n; i++ ) { 178 idx = aij->stash.idx[i]; 179 for ( j=0; j<size; j++ ) { 180 if (idx >= owners[j] && idx < owners[j+1]) { 181 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 182 } 183 } 184 } 185 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 186 187 /* inform other processors of number of messages and max length*/ 188 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 189 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 190 nreceives = work[rank]; 191 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 192 nmax = work[rank]; 193 PetscFree(work); 194 195 /* post receives: 196 1) each message will consist of ordered pairs 197 (global index,value) we store the global index as a double 198 to simplify the message passing. 199 2) since we don't know how long each individual message is we 200 allocate the largest needed buffer for each receive. Potentially 201 this is a lot of wasted space. 202 203 204 This could be done better. 205 */ 206 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 207 CHKPTRQ(rvalues); 208 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 209 CHKPTRQ(recv_waits); 210 for ( i=0; i<nreceives; i++ ) { 211 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 212 comm,recv_waits+i); 213 } 214 215 /* do sends: 216 1) starts[i] gives the starting index in svalues for stuff going to 217 the ith processor 218 */ 219 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 220 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 221 CHKPTRQ(send_waits); 222 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 223 starts[0] = 0; 224 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 225 for ( i=0; i<aij->stash.n; i++ ) { 226 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 227 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 228 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 229 } 230 PetscFree(owner); 231 starts[0] = 0; 232 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 233 count = 0; 234 for ( i=0; i<size; i++ ) { 235 if (procs[i]) { 236 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 237 comm,send_waits+count++); 238 } 239 } 240 PetscFree(starts); PetscFree(nprocs); 241 242 /* Free cache space */ 243 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 244 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 245 246 aij->svalues = svalues; aij->rvalues = rvalues; 247 aij->nsends = nsends; aij->nrecvs = nreceives; 248 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 249 aij->rmax = nmax; 250 251 return 0; 252 } 253 extern int MatSetUpMultiply_MPIAIJ(Mat); 254 255 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 256 { 257 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 258 MPI_Status *send_status,recv_status; 259 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 260 int row,col,other_disassembled; 261 Scalar *values,val; 262 InsertMode addv = aij->insertmode; 263 264 /* wait on receives */ 265 while (count) { 266 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 267 /* unpack receives into our local space */ 268 values = aij->rvalues + 3*imdex*aij->rmax; 269 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 270 n = n/3; 271 for ( i=0; i<n; i++ ) { 272 row = (int) PetscReal(values[3*i]) - aij->rstart; 273 col = (int) PetscReal(values[3*i+1]); 274 val = values[3*i+2]; 275 if (col >= aij->cstart && col < aij->cend) { 276 col -= aij->cstart; 277 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 278 } 279 else { 280 if (mat->was_assembled || mat->assembled) { 281 if (!aij->colmap) { 282 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 283 } 284 col = aij->colmap[col] - 1; 285 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 col = (int) PetscReal(values[3*i+1]); 288 } 289 } 290 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 291 } 292 } 293 count--; 294 } 295 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 296 297 /* wait on sends */ 298 if (aij->nsends) { 299 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 300 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 301 PetscFree(send_status); 302 } 303 PetscFree(aij->send_waits); PetscFree(aij->svalues); 304 305 aij->insertmode = NOT_SET_VALUES; 306 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 307 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 308 309 /* determine if any processor has disassembled, if so we must 310 also disassemble ourselfs, in order that we may reassemble. */ 311 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 312 if (mat->was_assembled && !other_disassembled) { 313 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 314 } 315 316 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 317 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 318 } 319 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 321 322 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 323 return 0; 324 } 325 326 static int MatZeroEntries_MPIAIJ(Mat A) 327 { 328 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 329 int ierr; 330 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 331 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 332 return 0; 333 } 334 335 /* the code does not do the diagonal entries correctly unless the 336 matrix is square and the column and row owerships are identical. 337 This is a BUG. The only way to fix it seems to be to access 338 aij->A and aij->B directly and not through the MatZeroRows() 339 routine. 340 */ 341 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 342 { 343 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 344 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 345 int *procs,*nprocs,j,found,idx,nsends,*work; 346 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 347 int *rvalues,tag = A->tag,count,base,slen,n,*source; 348 int *lens,imdex,*lrows,*values; 349 MPI_Comm comm = A->comm; 350 MPI_Request *send_waits,*recv_waits; 351 MPI_Status recv_status,*send_status; 352 IS istmp; 353 354 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 355 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 356 357 /* first count number of contributors to each processor */ 358 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 359 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 360 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 361 for ( i=0; i<N; i++ ) { 362 idx = rows[i]; 363 found = 0; 364 for ( j=0; j<size; j++ ) { 365 if (idx >= owners[j] && idx < owners[j+1]) { 366 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 367 } 368 } 369 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 370 } 371 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 372 373 /* inform other processors of number of messages and max length*/ 374 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 375 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 376 nrecvs = work[rank]; 377 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 378 nmax = work[rank]; 379 PetscFree(work); 380 381 /* post receives: */ 382 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 383 CHKPTRQ(rvalues); 384 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 385 CHKPTRQ(recv_waits); 386 for ( i=0; i<nrecvs; i++ ) { 387 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 388 } 389 390 /* do sends: 391 1) starts[i] gives the starting index in svalues for stuff going to 392 the ith processor 393 */ 394 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 395 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 396 CHKPTRQ(send_waits); 397 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 398 starts[0] = 0; 399 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 400 for ( i=0; i<N; i++ ) { 401 svalues[starts[owner[i]]++] = rows[i]; 402 } 403 ISRestoreIndices(is,&rows); 404 405 starts[0] = 0; 406 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 407 count = 0; 408 for ( i=0; i<size; i++ ) { 409 if (procs[i]) { 410 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 411 } 412 } 413 PetscFree(starts); 414 415 base = owners[rank]; 416 417 /* wait on receives */ 418 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 419 source = lens + nrecvs; 420 count = nrecvs; slen = 0; 421 while (count) { 422 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 423 /* unpack receives into our local space */ 424 MPI_Get_count(&recv_status,MPI_INT,&n); 425 source[imdex] = recv_status.MPI_SOURCE; 426 lens[imdex] = n; 427 slen += n; 428 count--; 429 } 430 PetscFree(recv_waits); 431 432 /* move the data into the send scatter */ 433 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 434 count = 0; 435 for ( i=0; i<nrecvs; i++ ) { 436 values = rvalues + i*nmax; 437 for ( j=0; j<lens[i]; j++ ) { 438 lrows[count++] = values[j] - base; 439 } 440 } 441 PetscFree(rvalues); PetscFree(lens); 442 PetscFree(owner); PetscFree(nprocs); 443 444 /* actually zap the local rows */ 445 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 446 PLogObjectParent(A,istmp); 447 PetscFree(lrows); 448 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 449 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 450 ierr = ISDestroy(istmp); CHKERRQ(ierr); 451 452 /* wait on sends */ 453 if (nsends) { 454 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 455 CHKPTRQ(send_status); 456 MPI_Waitall(nsends,send_waits,send_status); 457 PetscFree(send_status); 458 } 459 PetscFree(send_waits); PetscFree(svalues); 460 461 return 0; 462 } 463 464 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 465 { 466 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 467 int ierr,nt; 468 469 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 470 if (nt != a->n) { 471 SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx"); 472 } 473 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 474 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 475 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 476 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 477 return 0; 478 } 479 480 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 483 int ierr; 484 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 485 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 486 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 487 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 488 return 0; 489 } 490 491 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 492 { 493 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 494 int ierr; 495 496 /* do nondiagonal part */ 497 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 498 /* send it on its way */ 499 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 500 /* do local part */ 501 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 502 /* receive remote parts: note this assumes the values are not actually */ 503 /* inserted in yy until the next line, which is true for my implementation*/ 504 /* but is not perhaps always true. */ 505 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 506 return 0; 507 } 508 509 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 510 { 511 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 512 int ierr; 513 514 /* do nondiagonal part */ 515 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 516 /* send it on its way */ 517 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 518 /* do local part */ 519 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 520 /* receive remote parts: note this assumes the values are not actually */ 521 /* inserted in yy until the next line, which is true for my implementation*/ 522 /* but is not perhaps always true. */ 523 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 524 return 0; 525 } 526 527 /* 528 This only works correctly for square matrices where the subblock A->A is the 529 diagonal block 530 */ 531 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 532 { 533 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 534 if (a->M != a->N) 535 SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 536 if (a->rstart != a->cstart || a->rend != a->cend) { 537 SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition"); } 538 return MatGetDiagonal(a->A,v); 539 } 540 541 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 542 { 543 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 544 int ierr; 545 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 546 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 547 return 0; 548 } 549 550 static int MatDestroy_MPIAIJ(PetscObject obj) 551 { 552 Mat mat = (Mat) obj; 553 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 554 int ierr; 555 #if defined(PETSC_LOG) 556 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 557 #endif 558 PetscFree(aij->rowners); 559 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 560 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 561 if (aij->colmap) PetscFree(aij->colmap); 562 if (aij->garray) PetscFree(aij->garray); 563 if (aij->lvec) VecDestroy(aij->lvec); 564 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 565 if (aij->rowvalues) PetscFree(aij->rowvalues); 566 PetscFree(aij); 567 if (mat->mapping) { 568 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 569 } 570 PLogObjectDestroy(mat); 571 PetscHeaderDestroy(mat); 572 return 0; 573 } 574 #include "draw.h" 575 #include "pinclude/pviewer.h" 576 577 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 578 { 579 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 580 int ierr; 581 582 if (aij->size == 1) { 583 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 584 } 585 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 586 return 0; 587 } 588 589 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 590 { 591 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 592 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 593 int ierr, format,shift = C->indexshift,rank; 594 FILE *fd; 595 ViewerType vtype; 596 597 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 598 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 599 ierr = ViewerGetFormat(viewer,&format); 600 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 601 MatInfo info; 602 int flg; 603 MPI_Comm_rank(mat->comm,&rank); 604 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 605 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 606 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 607 PetscSequentialPhaseBegin(mat->comm,1); 608 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 609 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 610 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 611 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 612 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 613 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 614 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 615 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 616 fflush(fd); 617 PetscSequentialPhaseEnd(mat->comm,1); 618 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 619 return 0; 620 } 621 else if (format == VIEWER_FORMAT_ASCII_INFO) { 622 return 0; 623 } 624 } 625 626 if (vtype == DRAW_VIEWER) { 627 Draw draw; 628 PetscTruth isnull; 629 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 630 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 631 } 632 633 if (vtype == ASCII_FILE_VIEWER) { 634 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 635 PetscSequentialPhaseBegin(mat->comm,1); 636 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 637 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 638 aij->cend); 639 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 640 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 641 fflush(fd); 642 PetscSequentialPhaseEnd(mat->comm,1); 643 } 644 else { 645 int size = aij->size; 646 rank = aij->rank; 647 if (size == 1) { 648 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 649 } 650 else { 651 /* assemble the entire matrix onto first processor. */ 652 Mat A; 653 Mat_SeqAIJ *Aloc; 654 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 655 Scalar *a; 656 657 if (!rank) { 658 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 659 CHKERRQ(ierr); 660 } 661 else { 662 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 663 CHKERRQ(ierr); 664 } 665 PLogObjectParent(mat,A); 666 667 /* copy over the A part */ 668 Aloc = (Mat_SeqAIJ*) aij->A->data; 669 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 670 row = aij->rstart; 671 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 672 for ( i=0; i<m; i++ ) { 673 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 674 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 675 } 676 aj = Aloc->j; 677 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 678 679 /* copy over the B part */ 680 Aloc = (Mat_SeqAIJ*) aij->B->data; 681 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 682 row = aij->rstart; 683 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 684 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 685 for ( i=0; i<m; i++ ) { 686 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 687 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 688 } 689 PetscFree(ct); 690 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 691 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 692 if (!rank) { 693 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 694 } 695 ierr = MatDestroy(A); CHKERRQ(ierr); 696 } 697 } 698 return 0; 699 } 700 701 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 702 { 703 Mat mat = (Mat) obj; 704 int ierr; 705 ViewerType vtype; 706 707 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 708 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 709 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 710 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 711 } 712 else if (vtype == BINARY_FILE_VIEWER) { 713 return MatView_MPIAIJ_Binary(mat,viewer); 714 } 715 return 0; 716 } 717 718 /* 719 This has to provide several versions. 720 721 2) a) use only local smoothing updating outer values only once. 722 b) local smoothing updating outer values each inner iteration 723 3) color updating out values betwen colors. 724 */ 725 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 726 double fshift,int its,Vec xx) 727 { 728 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 729 Mat AA = mat->A, BB = mat->B; 730 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 731 Scalar *b,*x,*xs,*ls,d,*v,sum; 732 int ierr,*idx, *diag; 733 int n = mat->n, m = mat->m, i,shift = A->indexshift; 734 735 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 736 xs = x + shift; /* shift by one for index start of 1 */ 737 ls = ls + shift; 738 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 739 diag = A->diag; 740 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 741 if (flag & SOR_ZERO_INITIAL_GUESS) { 742 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 743 } 744 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 745 CHKERRQ(ierr); 746 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 747 CHKERRQ(ierr); 748 while (its--) { 749 /* go down through the rows */ 750 for ( i=0; i<m; i++ ) { 751 n = A->i[i+1] - A->i[i]; 752 idx = A->j + A->i[i] + shift; 753 v = A->a + A->i[i] + shift; 754 sum = b[i]; 755 SPARSEDENSEMDOT(sum,xs,v,idx,n); 756 d = fshift + A->a[diag[i]+shift]; 757 n = B->i[i+1] - B->i[i]; 758 idx = B->j + B->i[i] + shift; 759 v = B->a + B->i[i] + shift; 760 SPARSEDENSEMDOT(sum,ls,v,idx,n); 761 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 762 } 763 /* come up through the rows */ 764 for ( i=m-1; i>-1; i-- ) { 765 n = A->i[i+1] - A->i[i]; 766 idx = A->j + A->i[i] + shift; 767 v = A->a + A->i[i] + shift; 768 sum = b[i]; 769 SPARSEDENSEMDOT(sum,xs,v,idx,n); 770 d = fshift + A->a[diag[i]+shift]; 771 n = B->i[i+1] - B->i[i]; 772 idx = B->j + B->i[i] + shift; 773 v = B->a + B->i[i] + shift; 774 SPARSEDENSEMDOT(sum,ls,v,idx,n); 775 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 776 } 777 } 778 } 779 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 780 if (flag & SOR_ZERO_INITIAL_GUESS) { 781 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 782 } 783 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 784 CHKERRQ(ierr); 785 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 786 CHKERRQ(ierr); 787 while (its--) { 788 for ( i=0; i<m; i++ ) { 789 n = A->i[i+1] - A->i[i]; 790 idx = A->j + A->i[i] + shift; 791 v = A->a + A->i[i] + shift; 792 sum = b[i]; 793 SPARSEDENSEMDOT(sum,xs,v,idx,n); 794 d = fshift + A->a[diag[i]+shift]; 795 n = B->i[i+1] - B->i[i]; 796 idx = B->j + B->i[i] + shift; 797 v = B->a + B->i[i] + shift; 798 SPARSEDENSEMDOT(sum,ls,v,idx,n); 799 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 800 } 801 } 802 } 803 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 804 if (flag & SOR_ZERO_INITIAL_GUESS) { 805 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 806 } 807 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 808 mat->Mvctx); CHKERRQ(ierr); 809 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 810 mat->Mvctx); CHKERRQ(ierr); 811 while (its--) { 812 for ( i=m-1; i>-1; i-- ) { 813 n = A->i[i+1] - A->i[i]; 814 idx = A->j + A->i[i] + shift; 815 v = A->a + A->i[i] + shift; 816 sum = b[i]; 817 SPARSEDENSEMDOT(sum,xs,v,idx,n); 818 d = fshift + A->a[diag[i]+shift]; 819 n = B->i[i+1] - B->i[i]; 820 idx = B->j + B->i[i] + shift; 821 v = B->a + B->i[i] + shift; 822 SPARSEDENSEMDOT(sum,ls,v,idx,n); 823 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 824 } 825 } 826 } 827 else { 828 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 829 } 830 return 0; 831 } 832 833 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 834 { 835 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 836 Mat A = mat->A, B = mat->B; 837 int ierr; 838 double isend[5], irecv[5]; 839 840 info->rows_global = (double)mat->M; 841 info->columns_global = (double)mat->N; 842 info->rows_local = (double)mat->m; 843 info->columns_local = (double)mat->N; 844 info->block_size = 1.0; 845 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 846 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 847 isend[3] = info->memory; isend[4] = info->mallocs; 848 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 849 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 850 isend[3] += info->memory; isend[4] += info->mallocs; 851 if (flag == MAT_LOCAL) { 852 info->nz_used = isend[0]; 853 info->nz_allocated = isend[1]; 854 info->nz_unneeded = isend[2]; 855 info->memory = isend[3]; 856 info->mallocs = isend[4]; 857 } else if (flag == MAT_GLOBAL_MAX) { 858 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 859 info->nz_used = irecv[0]; 860 info->nz_allocated = irecv[1]; 861 info->nz_unneeded = irecv[2]; 862 info->memory = irecv[3]; 863 info->mallocs = irecv[4]; 864 } else if (flag == MAT_GLOBAL_SUM) { 865 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 866 info->nz_used = irecv[0]; 867 info->nz_allocated = irecv[1]; 868 info->nz_unneeded = irecv[2]; 869 info->memory = irecv[3]; 870 info->mallocs = irecv[4]; 871 } 872 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 873 info->fill_ratio_needed = 0; 874 info->factor_mallocs = 0; 875 876 return 0; 877 } 878 879 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 880 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 881 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 882 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 883 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 884 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 885 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 886 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 887 888 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 889 { 890 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 891 892 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 893 op == MAT_YES_NEW_NONZERO_LOCATIONS || 894 op == MAT_COLUMNS_SORTED || 895 op == MAT_ROW_ORIENTED) { 896 a->roworiented = 1; 897 MatSetOption(a->A,op); 898 MatSetOption(a->B,op); 899 } 900 else if (op == MAT_ROWS_SORTED || 901 op == MAT_SYMMETRIC || 902 op == MAT_STRUCTURALLY_SYMMETRIC || 903 op == MAT_YES_NEW_DIAGONALS) 904 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 905 else if (op == MAT_COLUMN_ORIENTED) { 906 a->roworiented = 0; 907 MatSetOption(a->A,op); 908 MatSetOption(a->B,op); 909 } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 910 a->donotstash = 1; 911 } else if (op == MAT_NO_NEW_DIAGONALS) 912 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");} 913 else 914 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 915 return 0; 916 } 917 918 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 919 { 920 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 921 *m = mat->M; *n = mat->N; 922 return 0; 923 } 924 925 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 926 { 927 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 928 *m = mat->m; *n = mat->N; 929 return 0; 930 } 931 932 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 933 { 934 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 935 *m = mat->rstart; *n = mat->rend; 936 return 0; 937 } 938 939 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 940 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 941 942 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 943 { 944 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 945 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 946 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 947 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 948 int *cmap, *idx_p; 949 950 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 951 mat->getrowactive = PETSC_TRUE; 952 953 if (!mat->rowvalues && (idx || v)) { 954 /* 955 allocate enough space to hold information from the longest row. 956 */ 957 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 958 int max = 1,m = mat->m,tmp; 959 for ( i=0; i<m; i++ ) { 960 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 961 if (max < tmp) { max = tmp; } 962 } 963 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 964 CHKPTRQ(mat->rowvalues); 965 mat->rowindices = (int *) (mat->rowvalues + max); 966 } 967 968 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 969 lrow = row - rstart; 970 971 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 972 if (!v) {pvA = 0; pvB = 0;} 973 if (!idx) {pcA = 0; if (!v) pcB = 0;} 974 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 975 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 976 nztot = nzA + nzB; 977 978 cmap = mat->garray; 979 if (v || idx) { 980 if (nztot) { 981 /* Sort by increasing column numbers, assuming A and B already sorted */ 982 int imark = -1; 983 if (v) { 984 *v = v_p = mat->rowvalues; 985 for ( i=0; i<nzB; i++ ) { 986 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 987 else break; 988 } 989 imark = i; 990 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 991 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 992 } 993 if (idx) { 994 *idx = idx_p = mat->rowindices; 995 if (imark > -1) { 996 for ( i=0; i<imark; i++ ) { 997 idx_p[i] = cmap[cworkB[i]]; 998 } 999 } else { 1000 for ( i=0; i<nzB; i++ ) { 1001 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1002 else break; 1003 } 1004 imark = i; 1005 } 1006 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1007 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1008 } 1009 } 1010 else { 1011 if (idx) *idx = 0; 1012 if (v) *v = 0; 1013 } 1014 } 1015 *nz = nztot; 1016 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1017 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1018 return 0; 1019 } 1020 1021 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1022 { 1023 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1024 if (aij->getrowactive == PETSC_FALSE) { 1025 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1026 } 1027 aij->getrowactive = PETSC_FALSE; 1028 return 0; 1029 } 1030 1031 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1032 { 1033 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1034 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1035 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1036 double sum = 0.0; 1037 Scalar *v; 1038 1039 if (aij->size == 1) { 1040 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1041 } else { 1042 if (type == NORM_FROBENIUS) { 1043 v = amat->a; 1044 for (i=0; i<amat->nz; i++ ) { 1045 #if defined(PETSC_COMPLEX) 1046 sum += real(conj(*v)*(*v)); v++; 1047 #else 1048 sum += (*v)*(*v); v++; 1049 #endif 1050 } 1051 v = bmat->a; 1052 for (i=0; i<bmat->nz; i++ ) { 1053 #if defined(PETSC_COMPLEX) 1054 sum += real(conj(*v)*(*v)); v++; 1055 #else 1056 sum += (*v)*(*v); v++; 1057 #endif 1058 } 1059 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1060 *norm = sqrt(*norm); 1061 } 1062 else if (type == NORM_1) { /* max column norm */ 1063 double *tmp, *tmp2; 1064 int *jj, *garray = aij->garray; 1065 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1066 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1067 PetscMemzero(tmp,aij->N*sizeof(double)); 1068 *norm = 0.0; 1069 v = amat->a; jj = amat->j; 1070 for ( j=0; j<amat->nz; j++ ) { 1071 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1072 } 1073 v = bmat->a; jj = bmat->j; 1074 for ( j=0; j<bmat->nz; j++ ) { 1075 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1076 } 1077 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1078 for ( j=0; j<aij->N; j++ ) { 1079 if (tmp2[j] > *norm) *norm = tmp2[j]; 1080 } 1081 PetscFree(tmp); PetscFree(tmp2); 1082 } 1083 else if (type == NORM_INFINITY) { /* max row norm */ 1084 double ntemp = 0.0; 1085 for ( j=0; j<amat->m; j++ ) { 1086 v = amat->a + amat->i[j] + shift; 1087 sum = 0.0; 1088 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1089 sum += PetscAbsScalar(*v); v++; 1090 } 1091 v = bmat->a + bmat->i[j] + shift; 1092 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1093 sum += PetscAbsScalar(*v); v++; 1094 } 1095 if (sum > ntemp) ntemp = sum; 1096 } 1097 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1098 } 1099 else { 1100 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1101 } 1102 } 1103 return 0; 1104 } 1105 1106 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1107 { 1108 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1109 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1110 int ierr,shift = Aloc->indexshift; 1111 Mat B; 1112 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1113 Scalar *array; 1114 1115 if (matout == PETSC_NULL && M != N) 1116 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1117 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1118 PETSC_NULL,&B); CHKERRQ(ierr); 1119 1120 /* copy over the A part */ 1121 Aloc = (Mat_SeqAIJ*) a->A->data; 1122 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1123 row = a->rstart; 1124 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1125 for ( i=0; i<m; i++ ) { 1126 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1127 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1128 } 1129 aj = Aloc->j; 1130 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1131 1132 /* copy over the B part */ 1133 Aloc = (Mat_SeqAIJ*) a->B->data; 1134 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1135 row = a->rstart; 1136 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1137 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1138 for ( i=0; i<m; i++ ) { 1139 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1140 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1141 } 1142 PetscFree(ct); 1143 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1144 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1145 if (matout != PETSC_NULL) { 1146 *matout = B; 1147 } else { 1148 /* This isn't really an in-place transpose .... but free data structures from a */ 1149 PetscFree(a->rowners); 1150 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1151 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1152 if (a->colmap) PetscFree(a->colmap); 1153 if (a->garray) PetscFree(a->garray); 1154 if (a->lvec) VecDestroy(a->lvec); 1155 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1156 PetscFree(a); 1157 PetscMemcpy(A,B,sizeof(struct _Mat)); 1158 PetscHeaderDestroy(B); 1159 } 1160 return 0; 1161 } 1162 1163 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1164 { 1165 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1166 Mat a = aij->A, b = aij->B; 1167 int ierr,s1,s2,s3; 1168 1169 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1170 if (rr) { 1171 s3 = aij->n; 1172 VecGetLocalSize_Fast(rr,s1); 1173 if (s1!=s3) SETERRQ(1,"MatDiagonalScale: right vector non-conforming local size"); 1174 /* Overlap communication with computation. */ 1175 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_ALL,aij->Mvctx); CHKERRQ(ierr); 1176 } 1177 if (ll) { 1178 VecGetLocalSize_Fast(ll,s1); 1179 if (s1!=s2) SETERRQ(1,"MatDiagonalScale: left vector non-conforming local size"); 1180 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1181 } 1182 /* scale the diagonal block */ 1183 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1184 1185 if (rr) { 1186 /* Do a scatter end and then right scale the off-diagonal block */ 1187 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_ALL,aij->Mvctx); CHKERRQ(ierr); 1188 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1189 } 1190 1191 return 0; 1192 } 1193 1194 1195 extern int MatPrintHelp_SeqAIJ(Mat); 1196 static int MatPrintHelp_MPIAIJ(Mat A) 1197 { 1198 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1199 1200 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1201 else return 0; 1202 } 1203 1204 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1205 { 1206 *bs = 1; 1207 return 0; 1208 } 1209 1210 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1211 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1212 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1213 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1214 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1215 /* -------------------------------------------------------------------*/ 1216 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1217 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1218 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1219 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1220 0,0, 1221 0,0, 1222 0,0, 1223 MatRelax_MPIAIJ, 1224 MatTranspose_MPIAIJ, 1225 MatGetInfo_MPIAIJ,0, 1226 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1227 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1228 0, 1229 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1230 0,0,0,0, 1231 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1232 0,0, 1233 0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0, 1234 0,0,0, 1235 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1236 MatPrintHelp_MPIAIJ, 1237 MatScale_MPIAIJ,0,0,0, 1238 MatGetBlockSize_MPIAIJ,0,0,0,0, 1239 MatFDColoringCreate_MPIAIJ}; 1240 1241 1242 /*@C 1243 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1244 (the default parallel PETSc format). For good matrix assembly performance 1245 the user should preallocate the matrix storage by setting the parameters 1246 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1247 performance can be increased by more than a factor of 50. 1248 1249 Input Parameters: 1250 . comm - MPI communicator 1251 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1252 This value should be the same as the local size used in creating the 1253 y vector for the matrix-vector product y = Ax. 1254 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1255 This value should be the same as the local size used in creating the 1256 x vector for the matrix-vector product y = Ax. 1257 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1258 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1259 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1260 (same for all local rows) 1261 . d_nzz - array containing the number of nonzeros in the various rows of the 1262 diagonal portion of the local submatrix (possibly different for each row) 1263 or PETSC_NULL. You must leave room for the diagonal entry even if 1264 it is zero. 1265 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1266 submatrix (same for all local rows). 1267 . o_nzz - array containing the number of nonzeros in the various rows of the 1268 off-diagonal portion of the local submatrix (possibly different for 1269 each row) or PETSC_NULL. 1270 1271 Output Parameter: 1272 . A - the matrix 1273 1274 Notes: 1275 The AIJ format (also called the Yale sparse matrix format or 1276 compressed row storage), is fully compatible with standard Fortran 77 1277 storage. That is, the stored row and column indices can begin at 1278 either one (as in Fortran) or zero. See the users manual for details. 1279 1280 The user MUST specify either the local or global matrix dimensions 1281 (possibly both). 1282 1283 By default, this format uses inodes (identical nodes) when possible. 1284 We search for consecutive rows with the same nonzero structure, thereby 1285 reusing matrix information to achieve increased efficiency. 1286 1287 Options Database Keys: 1288 $ -mat_aij_no_inode - Do not use inodes 1289 $ -mat_aij_inode_limit <limit> - Set inode limit. 1290 $ (max limit=5) 1291 $ -mat_aij_oneindex - Internally use indexing starting at 1 1292 $ rather than 0. Note: When calling MatSetValues(), 1293 $ the user still MUST index entries starting at 0! 1294 1295 Storage Information: 1296 For a square global matrix we define each processor's diagonal portion 1297 to be its local rows and the corresponding columns (a square submatrix); 1298 each processor's off-diagonal portion encompasses the remainder of the 1299 local matrix (a rectangular submatrix). 1300 1301 The user can specify preallocated storage for the diagonal part of 1302 the local submatrix with either d_nz or d_nnz (not both). Set 1303 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1304 memory allocation. Likewise, specify preallocated storage for the 1305 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1306 1307 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1308 the figure below we depict these three local rows and all columns (0-11). 1309 1310 $ 0 1 2 3 4 5 6 7 8 9 10 11 1311 $ ------------------- 1312 $ row 3 | o o o d d d o o o o o o 1313 $ row 4 | o o o d d d o o o o o o 1314 $ row 5 | o o o d d d o o o o o o 1315 $ ------------------- 1316 $ 1317 1318 Thus, any entries in the d locations are stored in the d (diagonal) 1319 submatrix, and any entries in the o locations are stored in the 1320 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1321 stored simply in the MATSEQAIJ format for compressed row storage. 1322 1323 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1324 and o_nz should indicate the number of nonzeros per row in the o matrix. 1325 In general, for PDE problems in which most nonzeros are near the diagonal, 1326 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1327 or you will get TERRIBLE performance; see the users' manual chapter on 1328 matrices and the file $(PETSC_DIR)/Performance. 1329 1330 .keywords: matrix, aij, compressed row, sparse, parallel 1331 1332 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1333 @*/ 1334 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1335 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1336 { 1337 Mat B; 1338 Mat_MPIAIJ *b; 1339 int ierr, i,sum[2],work[2],size; 1340 1341 *A = 0; 1342 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1343 PLogObjectCreate(B); 1344 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1345 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1346 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1347 MPI_Comm_size(comm,&size); 1348 if (size == 1) { 1349 B->ops.getrowij = MatGetRowIJ_MPIAIJ; 1350 B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 1351 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 1352 B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 1353 B->ops.lufactor = MatLUFactor_MPIAIJ; 1354 B->ops.solve = MatSolve_MPIAIJ; 1355 B->ops.solveadd = MatSolveAdd_MPIAIJ; 1356 B->ops.solvetrans = MatSolveTrans_MPIAIJ; 1357 B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 1358 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 1359 } 1360 B->destroy = MatDestroy_MPIAIJ; 1361 B->view = MatView_MPIAIJ; 1362 B->factor = 0; 1363 B->assembled = PETSC_FALSE; 1364 B->mapping = 0; 1365 1366 b->insertmode = NOT_SET_VALUES; 1367 MPI_Comm_rank(comm,&b->rank); 1368 MPI_Comm_size(comm,&b->size); 1369 1370 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1371 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1372 1373 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1374 work[0] = m; work[1] = n; 1375 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1376 if (M == PETSC_DECIDE) M = sum[0]; 1377 if (N == PETSC_DECIDE) N = sum[1]; 1378 } 1379 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1380 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1381 b->m = m; B->m = m; 1382 b->n = n; B->n = n; 1383 b->N = N; B->N = N; 1384 b->M = M; B->M = M; 1385 1386 /* build local table of row and column ownerships */ 1387 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1388 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1389 b->cowners = b->rowners + b->size + 2; 1390 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1391 b->rowners[0] = 0; 1392 for ( i=2; i<=b->size; i++ ) { 1393 b->rowners[i] += b->rowners[i-1]; 1394 } 1395 b->rstart = b->rowners[b->rank]; 1396 b->rend = b->rowners[b->rank+1]; 1397 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1398 b->cowners[0] = 0; 1399 for ( i=2; i<=b->size; i++ ) { 1400 b->cowners[i] += b->cowners[i-1]; 1401 } 1402 b->cstart = b->cowners[b->rank]; 1403 b->cend = b->cowners[b->rank+1]; 1404 1405 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1406 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1407 PLogObjectParent(B,b->A); 1408 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1409 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1410 PLogObjectParent(B,b->B); 1411 1412 /* build cache for off array entries formed */ 1413 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1414 b->donotstash = 0; 1415 b->colmap = 0; 1416 b->garray = 0; 1417 b->roworiented = 1; 1418 1419 /* stuff used for matrix vector multiply */ 1420 b->lvec = 0; 1421 b->Mvctx = 0; 1422 1423 /* stuff for MatGetRow() */ 1424 b->rowindices = 0; 1425 b->rowvalues = 0; 1426 b->getrowactive = PETSC_FALSE; 1427 1428 *A = B; 1429 return 0; 1430 } 1431 1432 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1433 { 1434 Mat mat; 1435 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1436 int ierr, len=0, flg; 1437 1438 *newmat = 0; 1439 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1440 PLogObjectCreate(mat); 1441 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1442 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1443 mat->destroy = MatDestroy_MPIAIJ; 1444 mat->view = MatView_MPIAIJ; 1445 mat->factor = matin->factor; 1446 mat->assembled = PETSC_TRUE; 1447 1448 a->m = mat->m = oldmat->m; 1449 a->n = mat->n = oldmat->n; 1450 a->M = mat->M = oldmat->M; 1451 a->N = mat->N = oldmat->N; 1452 1453 a->rstart = oldmat->rstart; 1454 a->rend = oldmat->rend; 1455 a->cstart = oldmat->cstart; 1456 a->cend = oldmat->cend; 1457 a->size = oldmat->size; 1458 a->rank = oldmat->rank; 1459 a->insertmode = NOT_SET_VALUES; 1460 a->rowvalues = 0; 1461 a->getrowactive = PETSC_FALSE; 1462 1463 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1464 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1465 a->cowners = a->rowners + a->size + 2; 1466 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1467 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1468 if (oldmat->colmap) { 1469 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1470 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1471 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1472 } else a->colmap = 0; 1473 if (oldmat->garray) { 1474 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1475 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1476 PLogObjectMemory(mat,len*sizeof(int)); 1477 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1478 } else a->garray = 0; 1479 1480 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1481 PLogObjectParent(mat,a->lvec); 1482 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1483 PLogObjectParent(mat,a->Mvctx); 1484 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1485 PLogObjectParent(mat,a->A); 1486 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1487 PLogObjectParent(mat,a->B); 1488 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1489 if (flg) { 1490 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1491 } 1492 *newmat = mat; 1493 return 0; 1494 } 1495 1496 #include "sys.h" 1497 1498 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1499 { 1500 Mat A; 1501 int i, nz, ierr, j,rstart, rend, fd; 1502 Scalar *vals,*svals; 1503 MPI_Comm comm = ((PetscObject)viewer)->comm; 1504 MPI_Status status; 1505 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1506 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1507 int tag = ((PetscObject)viewer)->tag; 1508 1509 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1510 if (!rank) { 1511 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1512 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1513 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1514 } 1515 1516 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1517 M = header[1]; N = header[2]; 1518 /* determine ownership of all rows */ 1519 m = M/size + ((M % size) > rank); 1520 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1521 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1522 rowners[0] = 0; 1523 for ( i=2; i<=size; i++ ) { 1524 rowners[i] += rowners[i-1]; 1525 } 1526 rstart = rowners[rank]; 1527 rend = rowners[rank+1]; 1528 1529 /* distribute row lengths to all processors */ 1530 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1531 offlens = ourlens + (rend-rstart); 1532 if (!rank) { 1533 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1534 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1535 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1536 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1537 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1538 PetscFree(sndcounts); 1539 } 1540 else { 1541 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1542 } 1543 1544 if (!rank) { 1545 /* calculate the number of nonzeros on each processor */ 1546 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1547 PetscMemzero(procsnz,size*sizeof(int)); 1548 for ( i=0; i<size; i++ ) { 1549 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1550 procsnz[i] += rowlengths[j]; 1551 } 1552 } 1553 PetscFree(rowlengths); 1554 1555 /* determine max buffer needed and allocate it */ 1556 maxnz = 0; 1557 for ( i=0; i<size; i++ ) { 1558 maxnz = PetscMax(maxnz,procsnz[i]); 1559 } 1560 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1561 1562 /* read in my part of the matrix column indices */ 1563 nz = procsnz[0]; 1564 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1565 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1566 1567 /* read in every one elses and ship off */ 1568 for ( i=1; i<size; i++ ) { 1569 nz = procsnz[i]; 1570 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1571 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1572 } 1573 PetscFree(cols); 1574 } 1575 else { 1576 /* determine buffer space needed for message */ 1577 nz = 0; 1578 for ( i=0; i<m; i++ ) { 1579 nz += ourlens[i]; 1580 } 1581 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1582 1583 /* receive message of column indices*/ 1584 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1585 MPI_Get_count(&status,MPI_INT,&maxnz); 1586 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1587 } 1588 1589 /* loop over local rows, determining number of off diagonal entries */ 1590 PetscMemzero(offlens,m*sizeof(int)); 1591 jj = 0; 1592 for ( i=0; i<m; i++ ) { 1593 for ( j=0; j<ourlens[i]; j++ ) { 1594 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1595 jj++; 1596 } 1597 } 1598 1599 /* create our matrix */ 1600 for ( i=0; i<m; i++ ) { 1601 ourlens[i] -= offlens[i]; 1602 } 1603 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1604 A = *newmat; 1605 MatSetOption(A,MAT_COLUMNS_SORTED); 1606 for ( i=0; i<m; i++ ) { 1607 ourlens[i] += offlens[i]; 1608 } 1609 1610 if (!rank) { 1611 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1612 1613 /* read in my part of the matrix numerical values */ 1614 nz = procsnz[0]; 1615 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1616 1617 /* insert into matrix */ 1618 jj = rstart; 1619 smycols = mycols; 1620 svals = vals; 1621 for ( i=0; i<m; i++ ) { 1622 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1623 smycols += ourlens[i]; 1624 svals += ourlens[i]; 1625 jj++; 1626 } 1627 1628 /* read in other processors and ship out */ 1629 for ( i=1; i<size; i++ ) { 1630 nz = procsnz[i]; 1631 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1632 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1633 } 1634 PetscFree(procsnz); 1635 } 1636 else { 1637 /* receive numeric values */ 1638 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1639 1640 /* receive message of values*/ 1641 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1642 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1643 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1644 1645 /* insert into matrix */ 1646 jj = rstart; 1647 smycols = mycols; 1648 svals = vals; 1649 for ( i=0; i<m; i++ ) { 1650 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1651 smycols += ourlens[i]; 1652 svals += ourlens[i]; 1653 jj++; 1654 } 1655 } 1656 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1657 1658 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1659 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1660 return 0; 1661 } 1662