1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.172 1996/11/01 23:25:33 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 int CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 30 PetscTruth *done) 31 { 32 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 33 int ierr; 34 if (aij->size == 1) { 35 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 36 } else SETERRQ(1,"MatGetRowIJ_MPIAIJ:not supported in parallel"); 37 return 0; 38 } 39 40 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 41 PetscTruth *done) 42 { 43 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 44 int ierr; 45 if (aij->size == 1) { 46 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 47 } else SETERRQ(1,"MatRestoreRowIJ_MPIAIJ:not supported in parallel"); 48 return 0; 49 } 50 51 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 52 53 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 54 { 55 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 56 Scalar value; 57 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 58 int cstart = aij->cstart, cend = aij->cend,row,col; 59 int roworiented = aij->roworiented; 60 61 #if defined(PETSC_BOPT_g) 62 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 63 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 64 } 65 #endif 66 aij->insertmode = addv; 67 for ( i=0; i<m; i++ ) { 68 #if defined(PETSC_BOPT_g) 69 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 70 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 71 #endif 72 if (im[i] >= rstart && im[i] < rend) { 73 row = im[i] - rstart; 74 for ( j=0; j<n; j++ ) { 75 if (in[j] >= cstart && in[j] < cend){ 76 col = in[j] - cstart; 77 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 78 ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 79 } 80 #if defined(PETSC_BOPT_g) 81 else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");} 82 else if (in[j] >= aij->N) {SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");} 83 #endif 84 else { 85 if (mat->was_assembled) { 86 if (!aij->colmap) { 87 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 88 } 89 col = aij->colmap[in[j]] - 1; 90 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 91 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 92 col = in[j]; 93 } 94 } 95 else col = in[j]; 96 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 97 ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 98 } 99 } 100 } 101 else { 102 if (roworiented) { 103 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 104 } 105 else { 106 row = im[i]; 107 for ( j=0; j<n; j++ ) { 108 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 109 } 110 } 111 } 112 } 113 return 0; 114 } 115 116 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 117 { 118 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 119 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 120 int cstart = aij->cstart, cend = aij->cend,row,col; 121 122 for ( i=0; i<m; i++ ) { 123 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 124 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 125 if (idxm[i] >= rstart && idxm[i] < rend) { 126 row = idxm[i] - rstart; 127 for ( j=0; j<n; j++ ) { 128 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 129 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 130 if (idxn[j] >= cstart && idxn[j] < cend){ 131 col = idxn[j] - cstart; 132 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 133 } 134 else { 135 if (!aij->colmap) { 136 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 137 } 138 col = aij->colmap[idxn[j]] - 1; 139 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 140 else { 141 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 142 } 143 } 144 } 145 } 146 else { 147 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 148 } 149 } 150 return 0; 151 } 152 153 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 154 { 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 156 MPI_Comm comm = mat->comm; 157 int size = aij->size, *owners = aij->rowners; 158 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 159 MPI_Request *send_waits,*recv_waits; 160 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 161 InsertMode addv; 162 Scalar *rvalues,*svalues; 163 164 /* make sure all processors are either in INSERTMODE or ADDMODE */ 165 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 166 if (addv == (ADD_VALUES|INSERT_VALUES)) { 167 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 168 } 169 aij->insertmode = addv; /* in case this processor had no cache */ 170 171 /* first count number of contributors to each processor */ 172 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 173 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 174 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 175 for ( i=0; i<aij->stash.n; i++ ) { 176 idx = aij->stash.idx[i]; 177 for ( j=0; j<size; j++ ) { 178 if (idx >= owners[j] && idx < owners[j+1]) { 179 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 180 } 181 } 182 } 183 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 184 185 /* inform other processors of number of messages and max length*/ 186 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 187 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 188 nreceives = work[rank]; 189 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 190 nmax = work[rank]; 191 PetscFree(work); 192 193 /* post receives: 194 1) each message will consist of ordered pairs 195 (global index,value) we store the global index as a double 196 to simplify the message passing. 197 2) since we don't know how long each individual message is we 198 allocate the largest needed buffer for each receive. Potentially 199 this is a lot of wasted space. 200 201 202 This could be done better. 203 */ 204 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 205 CHKPTRQ(rvalues); 206 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 207 CHKPTRQ(recv_waits); 208 for ( i=0; i<nreceives; i++ ) { 209 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 210 comm,recv_waits+i); 211 } 212 213 /* do sends: 214 1) starts[i] gives the starting index in svalues for stuff going to 215 the ith processor 216 */ 217 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 218 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 219 CHKPTRQ(send_waits); 220 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 221 starts[0] = 0; 222 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 223 for ( i=0; i<aij->stash.n; i++ ) { 224 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 225 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 226 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 227 } 228 PetscFree(owner); 229 starts[0] = 0; 230 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 231 count = 0; 232 for ( i=0; i<size; i++ ) { 233 if (procs[i]) { 234 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 235 comm,send_waits+count++); 236 } 237 } 238 PetscFree(starts); PetscFree(nprocs); 239 240 /* Free cache space */ 241 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 242 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 243 244 aij->svalues = svalues; aij->rvalues = rvalues; 245 aij->nsends = nsends; aij->nrecvs = nreceives; 246 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 247 aij->rmax = nmax; 248 249 return 0; 250 } 251 extern int MatSetUpMultiply_MPIAIJ(Mat); 252 253 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 254 { 255 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 256 MPI_Status *send_status,recv_status; 257 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 258 int row,col,other_disassembled; 259 Scalar *values,val; 260 InsertMode addv = aij->insertmode; 261 262 /* wait on receives */ 263 while (count) { 264 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 265 /* unpack receives into our local space */ 266 values = aij->rvalues + 3*imdex*aij->rmax; 267 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 268 n = n/3; 269 for ( i=0; i<n; i++ ) { 270 row = (int) PetscReal(values[3*i]) - aij->rstart; 271 col = (int) PetscReal(values[3*i+1]); 272 val = values[3*i+2]; 273 if (col >= aij->cstart && col < aij->cend) { 274 col -= aij->cstart; 275 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 276 } 277 else { 278 if (mat->was_assembled || mat->assembled) { 279 if (!aij->colmap) { 280 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 281 } 282 col = aij->colmap[col] - 1; 283 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 284 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 285 col = (int) PetscReal(values[3*i+1]); 286 } 287 } 288 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 289 } 290 } 291 count--; 292 } 293 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 294 295 /* wait on sends */ 296 if (aij->nsends) { 297 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 298 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 299 PetscFree(send_status); 300 } 301 PetscFree(aij->send_waits); PetscFree(aij->svalues); 302 303 aij->insertmode = NOT_SET_VALUES; 304 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 305 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 306 307 /* determine if any processor has disassembled, if so we must 308 also disassemble ourselfs, in order that we may reassemble. */ 309 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 310 if (mat->was_assembled && !other_disassembled) { 311 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 312 } 313 314 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 315 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 316 } 317 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 318 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 319 320 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 321 return 0; 322 } 323 324 static int MatZeroEntries_MPIAIJ(Mat A) 325 { 326 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 327 int ierr; 328 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 329 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 330 return 0; 331 } 332 333 /* the code does not do the diagonal entries correctly unless the 334 matrix is square and the column and row owerships are identical. 335 This is a BUG. The only way to fix it seems to be to access 336 aij->A and aij->B directly and not through the MatZeroRows() 337 routine. 338 */ 339 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 340 { 341 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 342 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 343 int *procs,*nprocs,j,found,idx,nsends,*work; 344 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 345 int *rvalues,tag = A->tag,count,base,slen,n,*source; 346 int *lens,imdex,*lrows,*values; 347 MPI_Comm comm = A->comm; 348 MPI_Request *send_waits,*recv_waits; 349 MPI_Status recv_status,*send_status; 350 IS istmp; 351 352 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 353 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 354 355 /* first count number of contributors to each processor */ 356 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 357 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 358 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 359 for ( i=0; i<N; i++ ) { 360 idx = rows[i]; 361 found = 0; 362 for ( j=0; j<size; j++ ) { 363 if (idx >= owners[j] && idx < owners[j+1]) { 364 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 365 } 366 } 367 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 368 } 369 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 370 371 /* inform other processors of number of messages and max length*/ 372 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 373 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 374 nrecvs = work[rank]; 375 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 376 nmax = work[rank]; 377 PetscFree(work); 378 379 /* post receives: */ 380 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 381 CHKPTRQ(rvalues); 382 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 383 CHKPTRQ(recv_waits); 384 for ( i=0; i<nrecvs; i++ ) { 385 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 386 } 387 388 /* do sends: 389 1) starts[i] gives the starting index in svalues for stuff going to 390 the ith processor 391 */ 392 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 393 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 394 CHKPTRQ(send_waits); 395 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 396 starts[0] = 0; 397 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 398 for ( i=0; i<N; i++ ) { 399 svalues[starts[owner[i]]++] = rows[i]; 400 } 401 ISRestoreIndices(is,&rows); 402 403 starts[0] = 0; 404 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 405 count = 0; 406 for ( i=0; i<size; i++ ) { 407 if (procs[i]) { 408 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 409 } 410 } 411 PetscFree(starts); 412 413 base = owners[rank]; 414 415 /* wait on receives */ 416 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 417 source = lens + nrecvs; 418 count = nrecvs; slen = 0; 419 while (count) { 420 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 421 /* unpack receives into our local space */ 422 MPI_Get_count(&recv_status,MPI_INT,&n); 423 source[imdex] = recv_status.MPI_SOURCE; 424 lens[imdex] = n; 425 slen += n; 426 count--; 427 } 428 PetscFree(recv_waits); 429 430 /* move the data into the send scatter */ 431 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 432 count = 0; 433 for ( i=0; i<nrecvs; i++ ) { 434 values = rvalues + i*nmax; 435 for ( j=0; j<lens[i]; j++ ) { 436 lrows[count++] = values[j] - base; 437 } 438 } 439 PetscFree(rvalues); PetscFree(lens); 440 PetscFree(owner); PetscFree(nprocs); 441 442 /* actually zap the local rows */ 443 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 444 PLogObjectParent(A,istmp); 445 PetscFree(lrows); 446 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 447 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 448 ierr = ISDestroy(istmp); CHKERRQ(ierr); 449 450 /* wait on sends */ 451 if (nsends) { 452 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 453 CHKPTRQ(send_status); 454 MPI_Waitall(nsends,send_waits,send_status); 455 PetscFree(send_status); 456 } 457 PetscFree(send_waits); PetscFree(svalues); 458 459 return 0; 460 } 461 462 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 463 { 464 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 465 int ierr,nt; 466 467 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 468 if (nt != a->n) { 469 SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx"); 470 } 471 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 472 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 473 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 474 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 475 return 0; 476 } 477 478 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 479 { 480 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 481 int ierr; 482 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 483 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 484 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 485 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 486 return 0; 487 } 488 489 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 490 { 491 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 492 int ierr; 493 494 /* do nondiagonal part */ 495 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 496 /* send it on its way */ 497 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 498 /* do local part */ 499 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 500 /* receive remote parts: note this assumes the values are not actually */ 501 /* inserted in yy until the next line, which is true for my implementation*/ 502 /* but is not perhaps always true. */ 503 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 504 return 0; 505 } 506 507 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 508 { 509 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 510 int ierr; 511 512 /* do nondiagonal part */ 513 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 514 /* send it on its way */ 515 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 516 /* do local part */ 517 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 518 /* receive remote parts: note this assumes the values are not actually */ 519 /* inserted in yy until the next line, which is true for my implementation*/ 520 /* but is not perhaps always true. */ 521 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 522 return 0; 523 } 524 525 /* 526 This only works correctly for square matrices where the subblock A->A is the 527 diagonal block 528 */ 529 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 530 { 531 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 532 if (a->M != a->N) 533 SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 534 if (a->rstart != a->cstart || a->rend != a->cend) { 535 SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition"); } 536 return MatGetDiagonal(a->A,v); 537 } 538 539 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 540 { 541 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 542 int ierr; 543 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 544 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 545 return 0; 546 } 547 548 static int MatDestroy_MPIAIJ(PetscObject obj) 549 { 550 Mat mat = (Mat) obj; 551 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 552 int ierr; 553 #if defined(PETSC_LOG) 554 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 555 #endif 556 PetscFree(aij->rowners); 557 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 558 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 559 if (aij->colmap) PetscFree(aij->colmap); 560 if (aij->garray) PetscFree(aij->garray); 561 if (aij->lvec) VecDestroy(aij->lvec); 562 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 563 if (aij->rowvalues) PetscFree(aij->rowvalues); 564 PetscFree(aij); 565 PLogObjectDestroy(mat); 566 PetscHeaderDestroy(mat); 567 return 0; 568 } 569 #include "draw.h" 570 #include "pinclude/pviewer.h" 571 572 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 573 { 574 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 575 int ierr; 576 577 if (aij->size == 1) { 578 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 579 } 580 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 581 return 0; 582 } 583 584 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 587 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 588 int ierr, format,shift = C->indexshift,rank; 589 FILE *fd; 590 ViewerType vtype; 591 592 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 593 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 594 ierr = ViewerGetFormat(viewer,&format); 595 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 596 MatInfo info; 597 int flg; 598 MPI_Comm_rank(mat->comm,&rank); 599 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 600 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 601 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 602 PetscSequentialPhaseBegin(mat->comm,1); 603 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 604 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 605 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 606 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 607 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 608 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 609 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 610 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 611 fflush(fd); 612 PetscSequentialPhaseEnd(mat->comm,1); 613 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 614 return 0; 615 } 616 else if (format == VIEWER_FORMAT_ASCII_INFO) { 617 return 0; 618 } 619 } 620 621 if (vtype == DRAW_VIEWER) { 622 Draw draw; 623 PetscTruth isnull; 624 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 625 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 626 } 627 628 if (vtype == ASCII_FILE_VIEWER) { 629 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 630 PetscSequentialPhaseBegin(mat->comm,1); 631 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 632 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 633 aij->cend); 634 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 635 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 636 fflush(fd); 637 PetscSequentialPhaseEnd(mat->comm,1); 638 } 639 else { 640 int size = aij->size; 641 rank = aij->rank; 642 if (size == 1) { 643 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 644 } 645 else { 646 /* assemble the entire matrix onto first processor. */ 647 Mat A; 648 Mat_SeqAIJ *Aloc; 649 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 650 Scalar *a; 651 652 if (!rank) { 653 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 654 CHKERRQ(ierr); 655 } 656 else { 657 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 658 CHKERRQ(ierr); 659 } 660 PLogObjectParent(mat,A); 661 662 /* copy over the A part */ 663 Aloc = (Mat_SeqAIJ*) aij->A->data; 664 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 665 row = aij->rstart; 666 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 667 for ( i=0; i<m; i++ ) { 668 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 669 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 670 } 671 aj = Aloc->j; 672 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 673 674 /* copy over the B part */ 675 Aloc = (Mat_SeqAIJ*) aij->B->data; 676 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 677 row = aij->rstart; 678 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 679 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 680 for ( i=0; i<m; i++ ) { 681 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 682 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 683 } 684 PetscFree(ct); 685 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 686 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 687 if (!rank) { 688 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 689 } 690 ierr = MatDestroy(A); CHKERRQ(ierr); 691 } 692 } 693 return 0; 694 } 695 696 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 697 { 698 Mat mat = (Mat) obj; 699 int ierr; 700 ViewerType vtype; 701 702 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 703 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 704 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 705 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 706 } 707 else if (vtype == BINARY_FILE_VIEWER) { 708 return MatView_MPIAIJ_Binary(mat,viewer); 709 } 710 return 0; 711 } 712 713 /* 714 This has to provide several versions. 715 716 2) a) use only local smoothing updating outer values only once. 717 b) local smoothing updating outer values each inner iteration 718 3) color updating out values betwen colors. 719 */ 720 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 721 double fshift,int its,Vec xx) 722 { 723 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 724 Mat AA = mat->A, BB = mat->B; 725 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 726 Scalar *b,*x,*xs,*ls,d,*v,sum; 727 int ierr,*idx, *diag; 728 int n = mat->n, m = mat->m, i,shift = A->indexshift; 729 730 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 731 xs = x + shift; /* shift by one for index start of 1 */ 732 ls = ls + shift; 733 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 734 diag = A->diag; 735 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 736 if (flag & SOR_ZERO_INITIAL_GUESS) { 737 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 738 } 739 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 740 CHKERRQ(ierr); 741 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 742 CHKERRQ(ierr); 743 while (its--) { 744 /* go down through the rows */ 745 for ( i=0; i<m; i++ ) { 746 n = A->i[i+1] - A->i[i]; 747 idx = A->j + A->i[i] + shift; 748 v = A->a + A->i[i] + shift; 749 sum = b[i]; 750 SPARSEDENSEMDOT(sum,xs,v,idx,n); 751 d = fshift + A->a[diag[i]+shift]; 752 n = B->i[i+1] - B->i[i]; 753 idx = B->j + B->i[i] + shift; 754 v = B->a + B->i[i] + shift; 755 SPARSEDENSEMDOT(sum,ls,v,idx,n); 756 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 757 } 758 /* come up through the rows */ 759 for ( i=m-1; i>-1; i-- ) { 760 n = A->i[i+1] - A->i[i]; 761 idx = A->j + A->i[i] + shift; 762 v = A->a + A->i[i] + shift; 763 sum = b[i]; 764 SPARSEDENSEMDOT(sum,xs,v,idx,n); 765 d = fshift + A->a[diag[i]+shift]; 766 n = B->i[i+1] - B->i[i]; 767 idx = B->j + B->i[i] + shift; 768 v = B->a + B->i[i] + shift; 769 SPARSEDENSEMDOT(sum,ls,v,idx,n); 770 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 771 } 772 } 773 } 774 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 775 if (flag & SOR_ZERO_INITIAL_GUESS) { 776 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 777 } 778 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 779 CHKERRQ(ierr); 780 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 781 CHKERRQ(ierr); 782 while (its--) { 783 for ( i=0; i<m; i++ ) { 784 n = A->i[i+1] - A->i[i]; 785 idx = A->j + A->i[i] + shift; 786 v = A->a + A->i[i] + shift; 787 sum = b[i]; 788 SPARSEDENSEMDOT(sum,xs,v,idx,n); 789 d = fshift + A->a[diag[i]+shift]; 790 n = B->i[i+1] - B->i[i]; 791 idx = B->j + B->i[i] + shift; 792 v = B->a + B->i[i] + shift; 793 SPARSEDENSEMDOT(sum,ls,v,idx,n); 794 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 795 } 796 } 797 } 798 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 799 if (flag & SOR_ZERO_INITIAL_GUESS) { 800 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 801 } 802 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 803 mat->Mvctx); CHKERRQ(ierr); 804 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 805 mat->Mvctx); CHKERRQ(ierr); 806 while (its--) { 807 for ( i=m-1; i>-1; i-- ) { 808 n = A->i[i+1] - A->i[i]; 809 idx = A->j + A->i[i] + shift; 810 v = A->a + A->i[i] + shift; 811 sum = b[i]; 812 SPARSEDENSEMDOT(sum,xs,v,idx,n); 813 d = fshift + A->a[diag[i]+shift]; 814 n = B->i[i+1] - B->i[i]; 815 idx = B->j + B->i[i] + shift; 816 v = B->a + B->i[i] + shift; 817 SPARSEDENSEMDOT(sum,ls,v,idx,n); 818 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 819 } 820 } 821 } 822 else { 823 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 824 } 825 return 0; 826 } 827 828 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 829 { 830 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 831 Mat A = mat->A, B = mat->B; 832 int ierr; 833 double isend[5], irecv[5]; 834 835 info->rows_global = (double)mat->M; 836 info->columns_global = (double)mat->N; 837 info->rows_local = (double)mat->m; 838 info->columns_local = (double)mat->N; 839 info->block_size = 1.0; 840 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 841 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 842 isend[3] = info->memory; isend[4] = info->mallocs; 843 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 844 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 845 isend[3] += info->memory; isend[4] += info->mallocs; 846 if (flag == MAT_LOCAL) { 847 info->nz_used = isend[0]; 848 info->nz_allocated = isend[1]; 849 info->nz_unneeded = isend[2]; 850 info->memory = isend[3]; 851 info->mallocs = isend[4]; 852 } else if (flag == MAT_GLOBAL_MAX) { 853 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 854 info->nz_used = irecv[0]; 855 info->nz_allocated = irecv[1]; 856 info->nz_unneeded = irecv[2]; 857 info->memory = irecv[3]; 858 info->mallocs = irecv[4]; 859 } else if (flag == MAT_GLOBAL_SUM) { 860 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 861 info->nz_used = irecv[0]; 862 info->nz_allocated = irecv[1]; 863 info->nz_unneeded = irecv[2]; 864 info->memory = irecv[3]; 865 info->mallocs = irecv[4]; 866 } 867 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 868 info->fill_ratio_needed = 0; 869 info->factor_mallocs = 0; 870 871 return 0; 872 } 873 874 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 875 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 876 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 877 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 878 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 879 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 880 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 881 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 882 883 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 884 { 885 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 886 887 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 888 op == MAT_YES_NEW_NONZERO_LOCATIONS || 889 op == MAT_COLUMNS_SORTED || 890 op == MAT_ROW_ORIENTED) { 891 MatSetOption(a->A,op); 892 MatSetOption(a->B,op); 893 } 894 else if (op == MAT_ROWS_SORTED || 895 op == MAT_SYMMETRIC || 896 op == MAT_STRUCTURALLY_SYMMETRIC || 897 op == MAT_YES_NEW_DIAGONALS) 898 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 899 else if (op == MAT_COLUMN_ORIENTED) { 900 a->roworiented = 0; 901 MatSetOption(a->A,op); 902 MatSetOption(a->B,op); 903 } 904 else if (op == MAT_NO_NEW_DIAGONALS) 905 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");} 906 else 907 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 908 return 0; 909 } 910 911 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 912 { 913 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 914 *m = mat->M; *n = mat->N; 915 return 0; 916 } 917 918 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 919 { 920 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 921 *m = mat->m; *n = mat->N; 922 return 0; 923 } 924 925 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 926 { 927 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 928 *m = mat->rstart; *n = mat->rend; 929 return 0; 930 } 931 932 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 933 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 934 935 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 936 { 937 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 938 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 939 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 940 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 941 int *cmap, *idx_p; 942 943 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 944 mat->getrowactive = PETSC_TRUE; 945 946 if (!mat->rowvalues && (idx || v)) { 947 /* 948 allocate enough space to hold information from the longest row. 949 */ 950 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 951 int max = 1,m = mat->m,tmp; 952 for ( i=0; i<m; i++ ) { 953 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 954 if (max < tmp) { max = tmp; } 955 } 956 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 957 CHKPTRQ(mat->rowvalues); 958 mat->rowindices = (int *) (mat->rowvalues + max); 959 } 960 961 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 962 lrow = row - rstart; 963 964 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 965 if (!v) {pvA = 0; pvB = 0;} 966 if (!idx) {pcA = 0; if (!v) pcB = 0;} 967 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 968 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 969 nztot = nzA + nzB; 970 971 cmap = mat->garray; 972 if (v || idx) { 973 if (nztot) { 974 /* Sort by increasing column numbers, assuming A and B already sorted */ 975 int imark = -1; 976 if (v) { 977 *v = v_p = mat->rowvalues; 978 for ( i=0; i<nzB; i++ ) { 979 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 980 else break; 981 } 982 imark = i; 983 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 984 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 985 } 986 if (idx) { 987 *idx = idx_p = mat->rowindices; 988 if (imark > -1) { 989 for ( i=0; i<imark; i++ ) { 990 idx_p[i] = cmap[cworkB[i]]; 991 } 992 } else { 993 for ( i=0; i<nzB; i++ ) { 994 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 995 else break; 996 } 997 imark = i; 998 } 999 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1000 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1001 } 1002 } 1003 else { 1004 if (idx) *idx = 0; 1005 if (v) *v = 0; 1006 } 1007 } 1008 *nz = nztot; 1009 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1010 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1011 return 0; 1012 } 1013 1014 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1015 { 1016 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1017 if (aij->getrowactive == PETSC_FALSE) { 1018 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1019 } 1020 aij->getrowactive = PETSC_FALSE; 1021 return 0; 1022 } 1023 1024 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1025 { 1026 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1027 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1028 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1029 double sum = 0.0; 1030 Scalar *v; 1031 1032 if (aij->size == 1) { 1033 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1034 } else { 1035 if (type == NORM_FROBENIUS) { 1036 v = amat->a; 1037 for (i=0; i<amat->nz; i++ ) { 1038 #if defined(PETSC_COMPLEX) 1039 sum += real(conj(*v)*(*v)); v++; 1040 #else 1041 sum += (*v)*(*v); v++; 1042 #endif 1043 } 1044 v = bmat->a; 1045 for (i=0; i<bmat->nz; i++ ) { 1046 #if defined(PETSC_COMPLEX) 1047 sum += real(conj(*v)*(*v)); v++; 1048 #else 1049 sum += (*v)*(*v); v++; 1050 #endif 1051 } 1052 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1053 *norm = sqrt(*norm); 1054 } 1055 else if (type == NORM_1) { /* max column norm */ 1056 double *tmp, *tmp2; 1057 int *jj, *garray = aij->garray; 1058 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1059 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1060 PetscMemzero(tmp,aij->N*sizeof(double)); 1061 *norm = 0.0; 1062 v = amat->a; jj = amat->j; 1063 for ( j=0; j<amat->nz; j++ ) { 1064 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1065 } 1066 v = bmat->a; jj = bmat->j; 1067 for ( j=0; j<bmat->nz; j++ ) { 1068 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1069 } 1070 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1071 for ( j=0; j<aij->N; j++ ) { 1072 if (tmp2[j] > *norm) *norm = tmp2[j]; 1073 } 1074 PetscFree(tmp); PetscFree(tmp2); 1075 } 1076 else if (type == NORM_INFINITY) { /* max row norm */ 1077 double ntemp = 0.0; 1078 for ( j=0; j<amat->m; j++ ) { 1079 v = amat->a + amat->i[j] + shift; 1080 sum = 0.0; 1081 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1082 sum += PetscAbsScalar(*v); v++; 1083 } 1084 v = bmat->a + bmat->i[j] + shift; 1085 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1086 sum += PetscAbsScalar(*v); v++; 1087 } 1088 if (sum > ntemp) ntemp = sum; 1089 } 1090 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1091 } 1092 else { 1093 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1094 } 1095 } 1096 return 0; 1097 } 1098 1099 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1100 { 1101 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1102 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1103 int ierr,shift = Aloc->indexshift; 1104 Mat B; 1105 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1106 Scalar *array; 1107 1108 if (matout == PETSC_NULL && M != N) 1109 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1110 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1111 PETSC_NULL,&B); CHKERRQ(ierr); 1112 1113 /* copy over the A part */ 1114 Aloc = (Mat_SeqAIJ*) a->A->data; 1115 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1116 row = a->rstart; 1117 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1118 for ( i=0; i<m; i++ ) { 1119 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1120 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1121 } 1122 aj = Aloc->j; 1123 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1124 1125 /* copy over the B part */ 1126 Aloc = (Mat_SeqAIJ*) a->B->data; 1127 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1128 row = a->rstart; 1129 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1130 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1131 for ( i=0; i<m; i++ ) { 1132 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1133 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1134 } 1135 PetscFree(ct); 1136 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1137 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1138 if (matout != PETSC_NULL) { 1139 *matout = B; 1140 } else { 1141 /* This isn't really an in-place transpose .... but free data structures from a */ 1142 PetscFree(a->rowners); 1143 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1144 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1145 if (a->colmap) PetscFree(a->colmap); 1146 if (a->garray) PetscFree(a->garray); 1147 if (a->lvec) VecDestroy(a->lvec); 1148 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1149 PetscFree(a); 1150 PetscMemcpy(A,B,sizeof(struct _Mat)); 1151 PetscHeaderDestroy(B); 1152 } 1153 return 0; 1154 } 1155 1156 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1157 { 1158 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1159 Mat a = aij->A, b = aij->B; 1160 int ierr,s1,s2,s3; 1161 1162 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1163 if (rr) { 1164 s3 = aij->n; 1165 VecGetLocalSize_Fast(rr,s1); 1166 if (s1!=s3) SETERRQ(1,"MatDiagonalScale: right vector non-conforming local size"); 1167 /* Overlap communication with computation. */ 1168 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_ALL,aij->Mvctx); CHKERRQ(ierr); 1169 } 1170 if (ll) { 1171 VecGetLocalSize_Fast(ll,s1); 1172 if (s1!=s2) SETERRQ(1,"MatDiagonalScale: left vector non-conforming local size"); 1173 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1174 } 1175 /* scale the diagonal block */ 1176 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1177 1178 if (rr) { 1179 /* Do a scatter end and then right scale the off-diagonal block */ 1180 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_ALL,aij->Mvctx); CHKERRQ(ierr); 1181 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1182 } 1183 1184 return 0; 1185 } 1186 1187 1188 extern int MatPrintHelp_SeqAIJ(Mat); 1189 static int MatPrintHelp_MPIAIJ(Mat A) 1190 { 1191 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1192 1193 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1194 else return 0; 1195 } 1196 1197 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1198 { 1199 *bs = 1; 1200 return 0; 1201 } 1202 1203 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1204 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1205 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1206 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1207 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1208 /* -------------------------------------------------------------------*/ 1209 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1210 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1211 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1212 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1213 0,0, 1214 0,0, 1215 0,0, 1216 MatRelax_MPIAIJ, 1217 MatTranspose_MPIAIJ, 1218 MatGetInfo_MPIAIJ,0, 1219 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1220 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1221 0, 1222 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1223 0,0,0,0, 1224 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1225 0,0, 1226 0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0, 1227 0,0,0, 1228 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1229 MatPrintHelp_MPIAIJ, 1230 MatScale_MPIAIJ,0,0,0, 1231 MatGetBlockSize_MPIAIJ,0,0,0,0, 1232 MatFDColoringCreate_MPIAIJ}; 1233 1234 1235 /*@C 1236 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1237 (the default parallel PETSc format). For good matrix assembly performance 1238 the user should preallocate the matrix storage by setting the parameters 1239 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1240 performance can be increased by more than a factor of 50. 1241 1242 Input Parameters: 1243 . comm - MPI communicator 1244 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1245 This value should be the same as the local size used in creating the 1246 y vector for the matrix-vector product y = Ax. 1247 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1248 This value should be the same as the local size used in creating the 1249 x vector for the matrix-vector product y = Ax. 1250 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1251 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1252 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1253 (same for all local rows) 1254 . d_nzz - array containing the number of nonzeros in the various rows of the 1255 diagonal portion of the local submatrix (possibly different for each row) 1256 or PETSC_NULL. You must leave room for the diagonal entry even if 1257 it is zero. 1258 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1259 submatrix (same for all local rows). 1260 . o_nzz - array containing the number of nonzeros in the various rows of the 1261 off-diagonal portion of the local submatrix (possibly different for 1262 each row) or PETSC_NULL. 1263 1264 Output Parameter: 1265 . A - the matrix 1266 1267 Notes: 1268 The AIJ format (also called the Yale sparse matrix format or 1269 compressed row storage), is fully compatible with standard Fortran 77 1270 storage. That is, the stored row and column indices can begin at 1271 either one (as in Fortran) or zero. See the users manual for details. 1272 1273 The user MUST specify either the local or global matrix dimensions 1274 (possibly both). 1275 1276 By default, this format uses inodes (identical nodes) when possible. 1277 We search for consecutive rows with the same nonzero structure, thereby 1278 reusing matrix information to achieve increased efficiency. 1279 1280 Options Database Keys: 1281 $ -mat_aij_no_inode - Do not use inodes 1282 $ -mat_aij_inode_limit <limit> - Set inode limit. 1283 $ (max limit=5) 1284 $ -mat_aij_oneindex - Internally use indexing starting at 1 1285 $ rather than 0. Note: When calling MatSetValues(), 1286 $ the user still MUST index entries starting at 0! 1287 1288 Storage Information: 1289 For a square global matrix we define each processor's diagonal portion 1290 to be its local rows and the corresponding columns (a square submatrix); 1291 each processor's off-diagonal portion encompasses the remainder of the 1292 local matrix (a rectangular submatrix). 1293 1294 The user can specify preallocated storage for the diagonal part of 1295 the local submatrix with either d_nz or d_nnz (not both). Set 1296 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1297 memory allocation. Likewise, specify preallocated storage for the 1298 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1299 1300 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1301 the figure below we depict these three local rows and all columns (0-11). 1302 1303 $ 0 1 2 3 4 5 6 7 8 9 10 11 1304 $ ------------------- 1305 $ row 3 | o o o d d d o o o o o o 1306 $ row 4 | o o o d d d o o o o o o 1307 $ row 5 | o o o d d d o o o o o o 1308 $ ------------------- 1309 $ 1310 1311 Thus, any entries in the d locations are stored in the d (diagonal) 1312 submatrix, and any entries in the o locations are stored in the 1313 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1314 stored simply in the MATSEQAIJ format for compressed row storage. 1315 1316 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1317 and o_nz should indicate the number of nonzeros per row in the o matrix. 1318 In general, for PDE problems in which most nonzeros are near the diagonal, 1319 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1320 or you will get TERRIBLE performance; see the users' manual chapter on 1321 matrices and the file $(PETSC_DIR)/Performance. 1322 1323 .keywords: matrix, aij, compressed row, sparse, parallel 1324 1325 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1326 @*/ 1327 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1328 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1329 { 1330 Mat B; 1331 Mat_MPIAIJ *b; 1332 int ierr, i,sum[2],work[2],size; 1333 1334 *A = 0; 1335 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1336 PLogObjectCreate(B); 1337 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1338 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1339 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1340 MPI_Comm_size(comm,&size); 1341 if (size == 1) { 1342 B->ops.getrowij = MatGetRowIJ_MPIAIJ; 1343 B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 1344 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 1345 B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 1346 B->ops.lufactor = MatLUFactor_MPIAIJ; 1347 B->ops.solve = MatSolve_MPIAIJ; 1348 B->ops.solveadd = MatSolveAdd_MPIAIJ; 1349 B->ops.solvetrans = MatSolveTrans_MPIAIJ; 1350 B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 1351 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 1352 } 1353 B->destroy = MatDestroy_MPIAIJ; 1354 B->view = MatView_MPIAIJ; 1355 B->factor = 0; 1356 B->assembled = PETSC_FALSE; 1357 1358 b->insertmode = NOT_SET_VALUES; 1359 MPI_Comm_rank(comm,&b->rank); 1360 MPI_Comm_size(comm,&b->size); 1361 1362 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1363 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1364 1365 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1366 work[0] = m; work[1] = n; 1367 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1368 if (M == PETSC_DECIDE) M = sum[0]; 1369 if (N == PETSC_DECIDE) N = sum[1]; 1370 } 1371 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1372 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1373 b->m = m; B->m = m; 1374 b->n = n; B->n = n; 1375 b->N = N; B->N = N; 1376 b->M = M; B->M = M; 1377 1378 /* build local table of row and column ownerships */ 1379 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1380 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1381 b->cowners = b->rowners + b->size + 2; 1382 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1383 b->rowners[0] = 0; 1384 for ( i=2; i<=b->size; i++ ) { 1385 b->rowners[i] += b->rowners[i-1]; 1386 } 1387 b->rstart = b->rowners[b->rank]; 1388 b->rend = b->rowners[b->rank+1]; 1389 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1390 b->cowners[0] = 0; 1391 for ( i=2; i<=b->size; i++ ) { 1392 b->cowners[i] += b->cowners[i-1]; 1393 } 1394 b->cstart = b->cowners[b->rank]; 1395 b->cend = b->cowners[b->rank+1]; 1396 1397 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1398 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1399 PLogObjectParent(B,b->A); 1400 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1401 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1402 PLogObjectParent(B,b->B); 1403 1404 /* build cache for off array entries formed */ 1405 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1406 b->colmap = 0; 1407 b->garray = 0; 1408 b->roworiented = 1; 1409 1410 /* stuff used for matrix vector multiply */ 1411 b->lvec = 0; 1412 b->Mvctx = 0; 1413 1414 /* stuff for MatGetRow() */ 1415 b->rowindices = 0; 1416 b->rowvalues = 0; 1417 b->getrowactive = PETSC_FALSE; 1418 1419 *A = B; 1420 return 0; 1421 } 1422 1423 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1424 { 1425 Mat mat; 1426 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1427 int ierr, len=0, flg; 1428 1429 *newmat = 0; 1430 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1431 PLogObjectCreate(mat); 1432 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1433 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1434 mat->destroy = MatDestroy_MPIAIJ; 1435 mat->view = MatView_MPIAIJ; 1436 mat->factor = matin->factor; 1437 mat->assembled = PETSC_TRUE; 1438 1439 a->m = mat->m = oldmat->m; 1440 a->n = mat->n = oldmat->n; 1441 a->M = mat->M = oldmat->M; 1442 a->N = mat->N = oldmat->N; 1443 1444 a->rstart = oldmat->rstart; 1445 a->rend = oldmat->rend; 1446 a->cstart = oldmat->cstart; 1447 a->cend = oldmat->cend; 1448 a->size = oldmat->size; 1449 a->rank = oldmat->rank; 1450 a->insertmode = NOT_SET_VALUES; 1451 a->rowvalues = 0; 1452 a->getrowactive = PETSC_FALSE; 1453 1454 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1455 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1456 a->cowners = a->rowners + a->size + 2; 1457 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1458 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1459 if (oldmat->colmap) { 1460 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1461 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1462 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1463 } else a->colmap = 0; 1464 if (oldmat->garray) { 1465 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1466 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1467 PLogObjectMemory(mat,len*sizeof(int)); 1468 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1469 } else a->garray = 0; 1470 1471 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1472 PLogObjectParent(mat,a->lvec); 1473 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1474 PLogObjectParent(mat,a->Mvctx); 1475 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1476 PLogObjectParent(mat,a->A); 1477 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1478 PLogObjectParent(mat,a->B); 1479 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1480 if (flg) { 1481 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1482 } 1483 *newmat = mat; 1484 return 0; 1485 } 1486 1487 #include "sys.h" 1488 1489 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1490 { 1491 Mat A; 1492 int i, nz, ierr, j,rstart, rend, fd; 1493 Scalar *vals,*svals; 1494 MPI_Comm comm = ((PetscObject)viewer)->comm; 1495 MPI_Status status; 1496 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1497 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1498 int tag = ((PetscObject)viewer)->tag; 1499 1500 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1501 if (!rank) { 1502 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1503 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1504 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1505 } 1506 1507 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1508 M = header[1]; N = header[2]; 1509 /* determine ownership of all rows */ 1510 m = M/size + ((M % size) > rank); 1511 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1512 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1513 rowners[0] = 0; 1514 for ( i=2; i<=size; i++ ) { 1515 rowners[i] += rowners[i-1]; 1516 } 1517 rstart = rowners[rank]; 1518 rend = rowners[rank+1]; 1519 1520 /* distribute row lengths to all processors */ 1521 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1522 offlens = ourlens + (rend-rstart); 1523 if (!rank) { 1524 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1525 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1526 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1527 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1528 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1529 PetscFree(sndcounts); 1530 } 1531 else { 1532 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1533 } 1534 1535 if (!rank) { 1536 /* calculate the number of nonzeros on each processor */ 1537 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1538 PetscMemzero(procsnz,size*sizeof(int)); 1539 for ( i=0; i<size; i++ ) { 1540 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1541 procsnz[i] += rowlengths[j]; 1542 } 1543 } 1544 PetscFree(rowlengths); 1545 1546 /* determine max buffer needed and allocate it */ 1547 maxnz = 0; 1548 for ( i=0; i<size; i++ ) { 1549 maxnz = PetscMax(maxnz,procsnz[i]); 1550 } 1551 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1552 1553 /* read in my part of the matrix column indices */ 1554 nz = procsnz[0]; 1555 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1556 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1557 1558 /* read in every one elses and ship off */ 1559 for ( i=1; i<size; i++ ) { 1560 nz = procsnz[i]; 1561 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1562 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1563 } 1564 PetscFree(cols); 1565 } 1566 else { 1567 /* determine buffer space needed for message */ 1568 nz = 0; 1569 for ( i=0; i<m; i++ ) { 1570 nz += ourlens[i]; 1571 } 1572 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1573 1574 /* receive message of column indices*/ 1575 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1576 MPI_Get_count(&status,MPI_INT,&maxnz); 1577 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1578 } 1579 1580 /* loop over local rows, determining number of off diagonal entries */ 1581 PetscMemzero(offlens,m*sizeof(int)); 1582 jj = 0; 1583 for ( i=0; i<m; i++ ) { 1584 for ( j=0; j<ourlens[i]; j++ ) { 1585 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1586 jj++; 1587 } 1588 } 1589 1590 /* create our matrix */ 1591 for ( i=0; i<m; i++ ) { 1592 ourlens[i] -= offlens[i]; 1593 } 1594 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1595 A = *newmat; 1596 MatSetOption(A,MAT_COLUMNS_SORTED); 1597 for ( i=0; i<m; i++ ) { 1598 ourlens[i] += offlens[i]; 1599 } 1600 1601 if (!rank) { 1602 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1603 1604 /* read in my part of the matrix numerical values */ 1605 nz = procsnz[0]; 1606 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1607 1608 /* insert into matrix */ 1609 jj = rstart; 1610 smycols = mycols; 1611 svals = vals; 1612 for ( i=0; i<m; i++ ) { 1613 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1614 smycols += ourlens[i]; 1615 svals += ourlens[i]; 1616 jj++; 1617 } 1618 1619 /* read in other processors and ship out */ 1620 for ( i=1; i<size; i++ ) { 1621 nz = procsnz[i]; 1622 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1623 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1624 } 1625 PetscFree(procsnz); 1626 } 1627 else { 1628 /* receive numeric values */ 1629 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1630 1631 /* receive message of values*/ 1632 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1633 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1634 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1635 1636 /* insert into matrix */ 1637 jj = rstart; 1638 smycols = mycols; 1639 svals = vals; 1640 for ( i=0; i<m; i++ ) { 1641 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1642 smycols += ourlens[i]; 1643 svals += ourlens[i]; 1644 jj++; 1645 } 1646 } 1647 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1648 1649 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1650 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1651 return 0; 1652 } 1653