1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: mpiaij.c,v 1.245 1998/05/11 22:07:18 bsmith Exp bsmith $"; 4 #endif 5 6 #include "pinclude/pviewer.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 11 /* local utility routine that creates a mapping from the global column 12 number to the local number in the off-diagonal part of the local 13 storage of the matrix. This is done in a non scable way since the 14 length of colmap equals the global matrix length. 15 */ 16 #undef __FUNC__ 17 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 18 int CreateColmap_MPIAIJ_Private(Mat mat) 19 { 20 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 21 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 22 int n = B->n,i; 23 24 PetscFunctionBegin; 25 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 26 PLogObjectMemory(mat,aij->N*sizeof(int)); 27 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 28 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 29 PetscFunctionReturn(0); 30 } 31 32 extern int DisAssemble_MPIAIJ(Mat); 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for ( _i=0; _i<nrow; _i++ ) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 61 Scalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 67 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 68 new_j = (int *) (new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 73 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int)); \ 78 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 79 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(Scalar)); \ 81 /* free up old matrix storage */ \ 82 \ 83 PetscFree(a->a); \ 84 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = 1; \ 87 \ 88 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 89 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for ( ii=N; ii>=_i; ii-- ) { \ 97 rp[ii+1] = rp[ii]; \ 98 ap[ii+1] = ap[ii]; \ 99 } \ 100 rp[_i] = col1; \ 101 ap[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 \ 109 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 110 rmax = bimax[row]; nrow = bilen[row]; \ 111 col1 = col - shift; \ 112 \ 113 low = 0; high = nrow; \ 114 while (high-low > 5) { \ 115 t = (low+high)/2; \ 116 if (rp[t] > col) high = t; \ 117 else low = t; \ 118 } \ 119 for ( _i=0; _i<nrow; _i++ ) { \ 120 if (rp[_i] > col1) break; \ 121 if (rp[_i] == col1) { \ 122 if (addv == ADD_VALUES) ap[_i] += value; \ 123 else ap[_i] = value; \ 124 goto b_noinsert; \ 125 } \ 126 } \ 127 if (nonew == 1) goto b_noinsert; \ 128 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 129 if (nrow >= rmax) { \ 130 /* there is no extra room in row, therefore enlarge */ \ 131 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 132 Scalar *new_a; \ 133 \ 134 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 135 \ 136 /* malloc new storage space */ \ 137 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 138 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 139 new_j = (int *) (new_a + new_nz); \ 140 new_i = new_j + new_nz; \ 141 \ 142 /* copy over old data into new slots */ \ 143 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 144 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 145 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 146 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 147 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 148 len*sizeof(int)); \ 149 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 150 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 151 len*sizeof(Scalar)); \ 152 /* free up old matrix storage */ \ 153 \ 154 PetscFree(b->a); \ 155 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 156 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 157 b->singlemalloc = 1; \ 158 \ 159 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 160 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 161 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 162 b->maxnz += CHUNKSIZE; \ 163 b->reallocs++; \ 164 } \ 165 N = nrow++ - 1; b->nz++; \ 166 /* shift up all the later entries in this row */ \ 167 for ( ii=N; ii>=_i; ii-- ) { \ 168 rp[ii+1] = rp[ii]; \ 169 ap[ii+1] = ap[ii]; \ 170 } \ 171 rp[_i] = col1; \ 172 ap[_i] = value; \ 173 b_noinsert: ; \ 174 bilen[row] = nrow; \ 175 } 176 177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 178 #undef __FUNC__ 179 #define __FUNC__ "MatSetValues_MPIAIJ" 180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 181 { 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 183 Scalar value; 184 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 185 int cstart = aij->cstart, cend = aij->cend,row,col; 186 int roworiented = aij->roworiented; 187 188 /* Some Variables required in the macro */ 189 Mat A = aij->A; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 191 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 192 Scalar *aa = a->a; 193 194 Mat B = aij->B; 195 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 196 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 197 Scalar *ba = b->a; 198 199 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 200 int nonew = a->nonew,shift = a->indexshift; 201 Scalar *ap; 202 203 PetscFunctionBegin; 204 for ( i=0; i<m; i++ ) { 205 #if defined(USE_PETSC_BOPT_g) 206 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 207 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 208 #endif 209 if (im[i] >= rstart && im[i] < rend) { 210 row = im[i] - rstart; 211 for ( j=0; j<n; j++ ) { 212 if (in[j] >= cstart && in[j] < cend){ 213 col = in[j] - cstart; 214 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 215 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 216 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 217 } 218 #if defined(USE_PETSC_BOPT_g) 219 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 220 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 221 #endif 222 else { 223 if (mat->was_assembled) { 224 if (!aij->colmap) { 225 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 226 } 227 col = aij->colmap[in[j]] - 1; 228 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 229 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 232 B = aij->B; 233 b = (Mat_SeqAIJ *) B->data; 234 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 235 ba = b->a; 236 } 237 } else col = in[j]; 238 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 240 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 241 } 242 } 243 } 244 else { 245 if (roworiented && !aij->donotstash) { 246 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 247 } 248 else { 249 if (!aij->donotstash) { 250 row = im[i]; 251 for ( j=0; j<n; j++ ) { 252 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 253 } 254 } 255 } 256 } 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNC__ 262 #define __FUNC__ "MatGetValues_MPIAIJ" 263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 264 { 265 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 266 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 267 int cstart = aij->cstart, cend = aij->cend,row,col; 268 269 PetscFunctionBegin; 270 for ( i=0; i<m; i++ ) { 271 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 272 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 273 if (idxm[i] >= rstart && idxm[i] < rend) { 274 row = idxm[i] - rstart; 275 for ( j=0; j<n; j++ ) { 276 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 277 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 278 if (idxn[j] >= cstart && idxn[j] < cend){ 279 col = idxn[j] - cstart; 280 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281 } 282 else { 283 if (!aij->colmap) { 284 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 285 } 286 col = aij->colmap[idxn[j]] - 1; 287 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 288 else { 289 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290 } 291 } 292 } 293 } else { 294 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 295 } 296 } 297 PetscFunctionReturn(0); 298 } 299 300 #undef __FUNC__ 301 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 302 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 303 { 304 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 305 MPI_Comm comm = mat->comm; 306 int size = aij->size, *owners = aij->rowners; 307 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 308 MPI_Request *send_waits,*recv_waits; 309 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 310 InsertMode addv; 311 Scalar *rvalues,*svalues; 312 313 PetscFunctionBegin; 314 if (aij->donotstash) { 315 aij->svalues = 0; aij->rvalues = 0; 316 aij->nsends = 0; aij->nrecvs = 0; 317 aij->send_waits = 0; aij->recv_waits = 0; 318 aij->rmax = 0; 319 PetscFunctionReturn(0); 320 } 321 322 /* make sure all processors are either in INSERTMODE or ADDMODE */ 323 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 324 if (addv == (ADD_VALUES|INSERT_VALUES)) { 325 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 326 } 327 mat->insertmode = addv; /* in case this processor had no cache */ 328 329 /* first count number of contributors to each processor */ 330 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 331 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 332 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 333 for ( i=0; i<aij->stash.n; i++ ) { 334 idx = aij->stash.idx[i]; 335 for ( j=0; j<size; j++ ) { 336 if (idx >= owners[j] && idx < owners[j+1]) { 337 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 338 } 339 } 340 } 341 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 342 343 /* inform other processors of number of messages and max length*/ 344 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 345 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 346 nreceives = work[rank]; 347 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 348 nmax = work[rank]; 349 PetscFree(work); 350 351 /* post receives: 352 1) each message will consist of ordered pairs 353 (global index,value) we store the global index as a double 354 to simplify the message passing. 355 2) since we don't know how long each individual message is we 356 allocate the largest needed buffer for each receive. Potentially 357 this is a lot of wasted space. 358 359 360 This could be done better. 361 */ 362 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 363 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 364 for ( i=0; i<nreceives; i++ ) { 365 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 366 comm,recv_waits+i);CHKERRQ(ierr); 367 } 368 369 /* do sends: 370 1) starts[i] gives the starting index in svalues for stuff going to 371 the ith processor 372 */ 373 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 374 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 375 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 376 starts[0] = 0; 377 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 378 for ( i=0; i<aij->stash.n; i++ ) { 379 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 380 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 381 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 382 } 383 PetscFree(owner); 384 starts[0] = 0; 385 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 386 count = 0; 387 for ( i=0; i<size; i++ ) { 388 if (procs[i]) { 389 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 390 } 391 } 392 PetscFree(starts); 393 PetscFree(nprocs); 394 395 /* Free cache space */ 396 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 397 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 398 399 aij->svalues = svalues; aij->rvalues = rvalues; 400 aij->nsends = nsends; aij->nrecvs = nreceives; 401 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 402 aij->rmax = nmax; 403 404 PetscFunctionReturn(0); 405 } 406 extern int MatSetUpMultiply_MPIAIJ(Mat); 407 408 #undef __FUNC__ 409 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 410 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 413 MPI_Status *send_status,recv_status; 414 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 415 int row,col,other_disassembled; 416 Scalar *values,val; 417 InsertMode addv = mat->insertmode; 418 419 PetscFunctionBegin; 420 421 /* wait on receives */ 422 while (count) { 423 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 424 /* unpack receives into our local space */ 425 values = aij->rvalues + 3*imdex*aij->rmax; 426 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 427 n = n/3; 428 for ( i=0; i<n; i++ ) { 429 row = (int) PetscReal(values[3*i]) - aij->rstart; 430 col = (int) PetscReal(values[3*i+1]); 431 val = values[3*i+2]; 432 if (col >= aij->cstart && col < aij->cend) { 433 col -= aij->cstart; 434 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 435 } else { 436 if (mat->was_assembled || mat->assembled) { 437 if (!aij->colmap) { 438 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 439 } 440 col = aij->colmap[col] - 1; 441 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 442 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 443 col = (int) PetscReal(values[3*i+1]); 444 } 445 } 446 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 447 } 448 } 449 count--; 450 } 451 if (aij->recv_waits) PetscFree(aij->recv_waits); 452 if (aij->rvalues) PetscFree(aij->rvalues); 453 454 /* wait on sends */ 455 if (aij->nsends) { 456 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 457 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 458 PetscFree(send_status); 459 } 460 if (aij->send_waits) PetscFree(aij->send_waits); 461 if (aij->svalues) PetscFree(aij->svalues); 462 463 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 464 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 465 466 /* determine if any processor has disassembled, if so we must 467 also disassemble ourselfs, in order that we may reassemble. */ 468 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 469 if (mat->was_assembled && !other_disassembled) { 470 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 471 } 472 473 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 474 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 475 } 476 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 477 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 478 479 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNC__ 484 #define __FUNC__ "MatZeroEntries_MPIAIJ" 485 int MatZeroEntries_MPIAIJ(Mat A) 486 { 487 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 488 int ierr; 489 490 PetscFunctionBegin; 491 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 492 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNC__ 497 #define __FUNC__ "MatZeroRows_MPIAIJ" 498 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 499 { 500 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 501 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 502 int *procs,*nprocs,j,found,idx,nsends,*work,row; 503 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 504 int *rvalues,tag = A->tag,count,base,slen,n,*source; 505 int *lens,imdex,*lrows,*values,rstart=l->rstart; 506 MPI_Comm comm = A->comm; 507 MPI_Request *send_waits,*recv_waits; 508 MPI_Status recv_status,*send_status; 509 IS istmp; 510 PetscTruth localdiag; 511 512 PetscFunctionBegin; 513 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 514 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 515 516 /* first count number of contributors to each processor */ 517 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 518 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 519 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 520 for ( i=0; i<N; i++ ) { 521 idx = rows[i]; 522 found = 0; 523 for ( j=0; j<size; j++ ) { 524 if (idx >= owners[j] && idx < owners[j+1]) { 525 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 526 } 527 } 528 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 529 } 530 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 531 532 /* inform other processors of number of messages and max length*/ 533 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 534 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 535 nrecvs = work[rank]; 536 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 537 nmax = work[rank]; 538 PetscFree(work); 539 540 /* post receives: */ 541 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 542 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 543 for ( i=0; i<nrecvs; i++ ) { 544 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 545 } 546 547 /* do sends: 548 1) starts[i] gives the starting index in svalues for stuff going to 549 the ith processor 550 */ 551 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 552 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 553 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 554 starts[0] = 0; 555 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 556 for ( i=0; i<N; i++ ) { 557 svalues[starts[owner[i]]++] = rows[i]; 558 } 559 ISRestoreIndices(is,&rows); 560 561 starts[0] = 0; 562 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 563 count = 0; 564 for ( i=0; i<size; i++ ) { 565 if (procs[i]) { 566 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 567 } 568 } 569 PetscFree(starts); 570 571 base = owners[rank]; 572 573 /* wait on receives */ 574 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 575 source = lens + nrecvs; 576 count = nrecvs; slen = 0; 577 while (count) { 578 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 579 /* unpack receives into our local space */ 580 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 581 source[imdex] = recv_status.MPI_SOURCE; 582 lens[imdex] = n; 583 slen += n; 584 count--; 585 } 586 PetscFree(recv_waits); 587 588 /* move the data into the send scatter */ 589 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 590 count = 0; 591 for ( i=0; i<nrecvs; i++ ) { 592 values = rvalues + i*nmax; 593 for ( j=0; j<lens[i]; j++ ) { 594 lrows[count++] = values[j] - base; 595 } 596 } 597 PetscFree(rvalues); PetscFree(lens); 598 PetscFree(owner); PetscFree(nprocs); 599 600 /* actually zap the local rows */ 601 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 602 PLogObjectParent(A,istmp); 603 604 /* 605 Zero the required rows. If the "diagonal block" of the matrix 606 is square and the user wishes to set the diagonal we use seperate 607 code so that MatSetValues() is not called for each diagonal allocating 608 new memory, thus calling lots of mallocs and slowing things down. 609 610 Contributed by: Mathew Knepley 611 */ 612 localdiag = PETSC_FALSE; 613 if (diag && (l->A->M == l->A->N)) { 614 localdiag = PETSC_TRUE; 615 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 616 } else { 617 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 618 } 619 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 620 ierr = ISDestroy(istmp); CHKERRQ(ierr); 621 622 if (diag && (localdiag == PETSC_FALSE)) { 623 for ( i = 0; i < slen; i++ ) { 624 row = lrows[i] + rstart; 625 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 626 } 627 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 628 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 629 } 630 631 if (diag) { 632 for ( i = 0; i < slen; i++ ) { 633 row = lrows[i] + rstart; 634 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 635 } 636 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 637 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 638 } 639 PetscFree(lrows); 640 641 /* wait on sends */ 642 if (nsends) { 643 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 644 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 645 PetscFree(send_status); 646 } 647 PetscFree(send_waits); PetscFree(svalues); 648 649 PetscFunctionReturn(0); 650 } 651 652 #undef __FUNC__ 653 #define __FUNC__ "MatMult_MPIAIJ" 654 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 655 { 656 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 657 int ierr,nt; 658 659 PetscFunctionBegin; 660 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 661 if (nt != a->n) { 662 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 663 } 664 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 665 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 666 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 667 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 #undef __FUNC__ 672 #define __FUNC__ "MatMultAdd_MPIAIJ" 673 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 674 { 675 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 676 int ierr; 677 678 PetscFunctionBegin; 679 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 680 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 681 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 682 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 #undef __FUNC__ 687 #define __FUNC__ "MatMultTrans_MPIAIJ" 688 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 689 { 690 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 691 int ierr; 692 693 PetscFunctionBegin; 694 /* do nondiagonal part */ 695 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 696 /* send it on its way */ 697 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 698 /* do local part */ 699 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 700 /* receive remote parts: note this assumes the values are not actually */ 701 /* inserted in yy until the next line, which is true for my implementation*/ 702 /* but is not perhaps always true. */ 703 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNC__ 708 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 709 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 710 { 711 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 712 int ierr; 713 714 PetscFunctionBegin; 715 /* do nondiagonal part */ 716 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 717 /* send it on its way */ 718 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 719 /* do local part */ 720 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 721 /* receive remote parts: note this assumes the values are not actually */ 722 /* inserted in yy until the next line, which is true for my implementation*/ 723 /* but is not perhaps always true. */ 724 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 /* 729 This only works correctly for square matrices where the subblock A->A is the 730 diagonal block 731 */ 732 #undef __FUNC__ 733 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 734 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 735 { 736 int ierr; 737 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 738 739 PetscFunctionBegin; 740 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 741 if (a->rstart != a->cstart || a->rend != a->cend) { 742 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 743 } 744 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 745 PetscFunctionReturn(0); 746 } 747 748 #undef __FUNC__ 749 #define __FUNC__ "MatScale_MPIAIJ" 750 int MatScale_MPIAIJ(Scalar *aa,Mat A) 751 { 752 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 753 int ierr; 754 755 PetscFunctionBegin; 756 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 757 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 758 PetscFunctionReturn(0); 759 } 760 761 #undef __FUNC__ 762 #define __FUNC__ "MatDestroy_MPIAIJ" 763 int MatDestroy_MPIAIJ(Mat mat) 764 { 765 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 766 int ierr; 767 768 PetscFunctionBegin; 769 #if defined(USE_PETSC_LOG) 770 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 771 #endif 772 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 773 PetscFree(aij->rowners); 774 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 775 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 776 if (aij->colmap) PetscFree(aij->colmap); 777 if (aij->garray) PetscFree(aij->garray); 778 if (aij->lvec) VecDestroy(aij->lvec); 779 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 780 if (aij->rowvalues) PetscFree(aij->rowvalues); 781 PetscFree(aij); 782 PLogObjectDestroy(mat); 783 PetscHeaderDestroy(mat); 784 PetscFunctionReturn(0); 785 } 786 787 #undef __FUNC__ 788 #define __FUNC__ "MatView_MPIAIJ_Binary" 789 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 790 { 791 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 792 int ierr; 793 794 PetscFunctionBegin; 795 if (aij->size == 1) { 796 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 797 } 798 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 799 PetscFunctionReturn(0); 800 } 801 802 #undef __FUNC__ 803 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 804 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 805 { 806 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 807 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 808 int ierr, format,shift = C->indexshift,rank; 809 FILE *fd; 810 ViewerType vtype; 811 812 PetscFunctionBegin; 813 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 814 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 815 ierr = ViewerGetFormat(viewer,&format); 816 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 817 MatInfo info; 818 int flg; 819 MPI_Comm_rank(mat->comm,&rank); 820 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 821 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 822 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 823 PetscSequentialPhaseBegin(mat->comm,1); 824 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 825 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 826 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 827 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 828 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 829 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 830 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 831 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 832 fflush(fd); 833 PetscSequentialPhaseEnd(mat->comm,1); 834 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 837 PetscFunctionReturn(0); 838 } 839 } 840 841 if (vtype == DRAW_VIEWER) { 842 Draw draw; 843 PetscTruth isnull; 844 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 845 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 846 } 847 848 if (vtype == ASCII_FILE_VIEWER) { 849 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 850 PetscSequentialPhaseBegin(mat->comm,1); 851 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 852 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 853 aij->cend); 854 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 855 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 856 fflush(fd); 857 PetscSequentialPhaseEnd(mat->comm,1); 858 } else { 859 int size = aij->size; 860 rank = aij->rank; 861 if (size == 1) { 862 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 863 } else { 864 /* assemble the entire matrix onto first processor. */ 865 Mat A; 866 Mat_SeqAIJ *Aloc; 867 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 868 Scalar *a; 869 870 if (!rank) { 871 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 872 } else { 873 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 874 } 875 PLogObjectParent(mat,A); 876 877 /* copy over the A part */ 878 Aloc = (Mat_SeqAIJ*) aij->A->data; 879 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 880 row = aij->rstart; 881 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 882 for ( i=0; i<m; i++ ) { 883 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 884 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 885 } 886 aj = Aloc->j; 887 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 888 889 /* copy over the B part */ 890 Aloc = (Mat_SeqAIJ*) aij->B->data; 891 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 892 row = aij->rstart; 893 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 894 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 895 for ( i=0; i<m; i++ ) { 896 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 897 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 898 } 899 PetscFree(ct); 900 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 901 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 902 /* 903 Everyone has to call to draw the matrix since the graphics waits are 904 synchronized across all processors that share the Draw object 905 */ 906 if (!rank || vtype == DRAW_VIEWER) { 907 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 908 } 909 ierr = MatDestroy(A); CHKERRQ(ierr); 910 } 911 } 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNC__ 916 #define __FUNC__ "MatView_MPIAIJ" 917 int MatView_MPIAIJ(Mat mat,Viewer viewer) 918 { 919 int ierr; 920 ViewerType vtype; 921 922 PetscFunctionBegin; 923 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 924 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 925 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 926 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 927 } else if (vtype == BINARY_FILE_VIEWER) { 928 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 929 } else { 930 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 931 } 932 PetscFunctionReturn(0); 933 } 934 935 /* 936 This has to provide several versions. 937 938 2) a) use only local smoothing updating outer values only once. 939 b) local smoothing updating outer values each inner iteration 940 3) color updating out values betwen colors. 941 */ 942 #undef __FUNC__ 943 #define __FUNC__ "MatRelax_MPIAIJ" 944 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 945 double fshift,int its,Vec xx) 946 { 947 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 948 Mat AA = mat->A, BB = mat->B; 949 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 950 Scalar *b,*x,*xs,*ls,d,*v,sum; 951 int ierr,*idx, *diag; 952 int n = mat->n, m = mat->m, i,shift = A->indexshift; 953 954 PetscFunctionBegin; 955 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 956 xs = x + shift; /* shift by one for index start of 1 */ 957 ls = ls + shift; 958 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 959 diag = A->diag; 960 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 961 if (flag & SOR_ZERO_INITIAL_GUESS) { 962 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 966 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 967 while (its--) { 968 /* go down through the rows */ 969 for ( i=0; i<m; i++ ) { 970 n = A->i[i+1] - A->i[i]; 971 idx = A->j + A->i[i] + shift; 972 v = A->a + A->i[i] + shift; 973 sum = b[i]; 974 SPARSEDENSEMDOT(sum,xs,v,idx,n); 975 d = fshift + A->a[diag[i]+shift]; 976 n = B->i[i+1] - B->i[i]; 977 idx = B->j + B->i[i] + shift; 978 v = B->a + B->i[i] + shift; 979 SPARSEDENSEMDOT(sum,ls,v,idx,n); 980 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 981 } 982 /* come up through the rows */ 983 for ( i=m-1; i>-1; i-- ) { 984 n = A->i[i+1] - A->i[i]; 985 idx = A->j + A->i[i] + shift; 986 v = A->a + A->i[i] + shift; 987 sum = b[i]; 988 SPARSEDENSEMDOT(sum,xs,v,idx,n); 989 d = fshift + A->a[diag[i]+shift]; 990 n = B->i[i+1] - B->i[i]; 991 idx = B->j + B->i[i] + shift; 992 v = B->a + B->i[i] + shift; 993 SPARSEDENSEMDOT(sum,ls,v,idx,n); 994 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 995 } 996 } 997 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 998 if (flag & SOR_ZERO_INITIAL_GUESS) { 999 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1003 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1004 while (its--) { 1005 for ( i=0; i<m; i++ ) { 1006 n = A->i[i+1] - A->i[i]; 1007 idx = A->j + A->i[i] + shift; 1008 v = A->a + A->i[i] + shift; 1009 sum = b[i]; 1010 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1011 d = fshift + A->a[diag[i]+shift]; 1012 n = B->i[i+1] - B->i[i]; 1013 idx = B->j + B->i[i] + shift; 1014 v = B->a + B->i[i] + shift; 1015 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1016 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1017 } 1018 } 1019 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1020 if (flag & SOR_ZERO_INITIAL_GUESS) { 1021 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1025 mat->Mvctx); CHKERRQ(ierr); 1026 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1027 mat->Mvctx); CHKERRQ(ierr); 1028 while (its--) { 1029 for ( i=m-1; i>-1; i-- ) { 1030 n = A->i[i+1] - A->i[i]; 1031 idx = A->j + A->i[i] + shift; 1032 v = A->a + A->i[i] + shift; 1033 sum = b[i]; 1034 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1035 d = fshift + A->a[diag[i]+shift]; 1036 n = B->i[i+1] - B->i[i]; 1037 idx = B->j + B->i[i] + shift; 1038 v = B->a + B->i[i] + shift; 1039 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1040 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1041 } 1042 } 1043 } else { 1044 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1045 } 1046 PetscFunctionReturn(0); 1047 } 1048 1049 #undef __FUNC__ 1050 #define __FUNC__ "MatGetInfo_MPIAIJ" 1051 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1052 { 1053 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1054 Mat A = mat->A, B = mat->B; 1055 int ierr; 1056 double isend[5], irecv[5]; 1057 1058 PetscFunctionBegin; 1059 info->block_size = 1.0; 1060 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1061 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1062 isend[3] = info->memory; isend[4] = info->mallocs; 1063 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1064 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1065 isend[3] += info->memory; isend[4] += info->mallocs; 1066 if (flag == MAT_LOCAL) { 1067 info->nz_used = isend[0]; 1068 info->nz_allocated = isend[1]; 1069 info->nz_unneeded = isend[2]; 1070 info->memory = isend[3]; 1071 info->mallocs = isend[4]; 1072 } else if (flag == MAT_GLOBAL_MAX) { 1073 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1074 info->nz_used = irecv[0]; 1075 info->nz_allocated = irecv[1]; 1076 info->nz_unneeded = irecv[2]; 1077 info->memory = irecv[3]; 1078 info->mallocs = irecv[4]; 1079 } else if (flag == MAT_GLOBAL_SUM) { 1080 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1081 info->nz_used = irecv[0]; 1082 info->nz_allocated = irecv[1]; 1083 info->nz_unneeded = irecv[2]; 1084 info->memory = irecv[3]; 1085 info->mallocs = irecv[4]; 1086 } 1087 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1088 info->fill_ratio_needed = 0; 1089 info->factor_mallocs = 0; 1090 info->rows_global = (double)mat->M; 1091 info->columns_global = (double)mat->N; 1092 info->rows_local = (double)mat->m; 1093 info->columns_local = (double)mat->N; 1094 1095 PetscFunctionReturn(0); 1096 } 1097 1098 #undef __FUNC__ 1099 #define __FUNC__ "MatSetOption_MPIAIJ" 1100 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1101 { 1102 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1103 1104 PetscFunctionBegin; 1105 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1106 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1107 op == MAT_COLUMNS_UNSORTED || 1108 op == MAT_COLUMNS_SORTED || 1109 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1110 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1111 MatSetOption(a->A,op); 1112 MatSetOption(a->B,op); 1113 } else if (op == MAT_ROW_ORIENTED) { 1114 a->roworiented = 1; 1115 MatSetOption(a->A,op); 1116 MatSetOption(a->B,op); 1117 } else if (op == MAT_ROWS_SORTED || 1118 op == MAT_ROWS_UNSORTED || 1119 op == MAT_SYMMETRIC || 1120 op == MAT_STRUCTURALLY_SYMMETRIC || 1121 op == MAT_YES_NEW_DIAGONALS) 1122 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1123 else if (op == MAT_COLUMN_ORIENTED) { 1124 a->roworiented = 0; 1125 MatSetOption(a->A,op); 1126 MatSetOption(a->B,op); 1127 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1128 a->donotstash = 1; 1129 } else if (op == MAT_NO_NEW_DIAGONALS){ 1130 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1131 } else { 1132 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1133 } 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNC__ 1138 #define __FUNC__ "MatGetSize_MPIAIJ" 1139 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1140 { 1141 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1142 1143 PetscFunctionBegin; 1144 if (m) *m = mat->M; 1145 if (n) *n = mat->N; 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNC__ 1150 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1151 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1152 { 1153 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1154 1155 PetscFunctionBegin; 1156 if (m) *m = mat->m; 1157 if (n) *n = mat->n; 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNC__ 1162 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1163 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1164 { 1165 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1166 1167 PetscFunctionBegin; 1168 *m = mat->rstart; *n = mat->rend; 1169 PetscFunctionReturn(0); 1170 } 1171 1172 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1173 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1174 1175 #undef __FUNC__ 1176 #define __FUNC__ "MatGetRow_MPIAIJ" 1177 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1178 { 1179 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1180 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1181 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1182 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1183 int *cmap, *idx_p; 1184 1185 PetscFunctionBegin; 1186 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1187 mat->getrowactive = PETSC_TRUE; 1188 1189 if (!mat->rowvalues && (idx || v)) { 1190 /* 1191 allocate enough space to hold information from the longest row. 1192 */ 1193 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1194 int max = 1,m = mat->m,tmp; 1195 for ( i=0; i<m; i++ ) { 1196 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1197 if (max < tmp) { max = tmp; } 1198 } 1199 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1200 CHKPTRQ(mat->rowvalues); 1201 mat->rowindices = (int *) (mat->rowvalues + max); 1202 } 1203 1204 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1205 lrow = row - rstart; 1206 1207 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1208 if (!v) {pvA = 0; pvB = 0;} 1209 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1210 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1211 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1212 nztot = nzA + nzB; 1213 1214 cmap = mat->garray; 1215 if (v || idx) { 1216 if (nztot) { 1217 /* Sort by increasing column numbers, assuming A and B already sorted */ 1218 int imark = -1; 1219 if (v) { 1220 *v = v_p = mat->rowvalues; 1221 for ( i=0; i<nzB; i++ ) { 1222 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1223 else break; 1224 } 1225 imark = i; 1226 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1227 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1228 } 1229 if (idx) { 1230 *idx = idx_p = mat->rowindices; 1231 if (imark > -1) { 1232 for ( i=0; i<imark; i++ ) { 1233 idx_p[i] = cmap[cworkB[i]]; 1234 } 1235 } else { 1236 for ( i=0; i<nzB; i++ ) { 1237 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1238 else break; 1239 } 1240 imark = i; 1241 } 1242 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1243 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1244 } 1245 } 1246 else { 1247 if (idx) *idx = 0; 1248 if (v) *v = 0; 1249 } 1250 } 1251 *nz = nztot; 1252 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1253 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNC__ 1258 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1259 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1260 { 1261 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1262 1263 PetscFunctionBegin; 1264 if (aij->getrowactive == PETSC_FALSE) { 1265 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1266 } 1267 aij->getrowactive = PETSC_FALSE; 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNC__ 1272 #define __FUNC__ "MatNorm_MPIAIJ" 1273 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1274 { 1275 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1276 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1277 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1278 double sum = 0.0; 1279 Scalar *v; 1280 1281 PetscFunctionBegin; 1282 if (aij->size == 1) { 1283 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1284 } else { 1285 if (type == NORM_FROBENIUS) { 1286 v = amat->a; 1287 for (i=0; i<amat->nz; i++ ) { 1288 #if defined(USE_PETSC_COMPLEX) 1289 sum += real(conj(*v)*(*v)); v++; 1290 #else 1291 sum += (*v)*(*v); v++; 1292 #endif 1293 } 1294 v = bmat->a; 1295 for (i=0; i<bmat->nz; i++ ) { 1296 #if defined(USE_PETSC_COMPLEX) 1297 sum += real(conj(*v)*(*v)); v++; 1298 #else 1299 sum += (*v)*(*v); v++; 1300 #endif 1301 } 1302 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1303 *norm = sqrt(*norm); 1304 } else if (type == NORM_1) { /* max column norm */ 1305 double *tmp, *tmp2; 1306 int *jj, *garray = aij->garray; 1307 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1308 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1309 PetscMemzero(tmp,aij->N*sizeof(double)); 1310 *norm = 0.0; 1311 v = amat->a; jj = amat->j; 1312 for ( j=0; j<amat->nz; j++ ) { 1313 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1314 } 1315 v = bmat->a; jj = bmat->j; 1316 for ( j=0; j<bmat->nz; j++ ) { 1317 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1318 } 1319 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1320 for ( j=0; j<aij->N; j++ ) { 1321 if (tmp2[j] > *norm) *norm = tmp2[j]; 1322 } 1323 PetscFree(tmp); PetscFree(tmp2); 1324 } else if (type == NORM_INFINITY) { /* max row norm */ 1325 double ntemp = 0.0; 1326 for ( j=0; j<amat->m; j++ ) { 1327 v = amat->a + amat->i[j] + shift; 1328 sum = 0.0; 1329 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1330 sum += PetscAbsScalar(*v); v++; 1331 } 1332 v = bmat->a + bmat->i[j] + shift; 1333 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1334 sum += PetscAbsScalar(*v); v++; 1335 } 1336 if (sum > ntemp) ntemp = sum; 1337 } 1338 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1339 } else { 1340 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1341 } 1342 } 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNC__ 1347 #define __FUNC__ "MatTranspose_MPIAIJ" 1348 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1349 { 1350 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1351 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1352 int ierr,shift = Aloc->indexshift; 1353 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1354 Mat B; 1355 Scalar *array; 1356 1357 PetscFunctionBegin; 1358 if (matout == PETSC_NULL && M != N) { 1359 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1360 } 1361 1362 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1363 1364 /* copy over the A part */ 1365 Aloc = (Mat_SeqAIJ*) a->A->data; 1366 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1367 row = a->rstart; 1368 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1369 for ( i=0; i<m; i++ ) { 1370 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1371 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1372 } 1373 aj = Aloc->j; 1374 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1375 1376 /* copy over the B part */ 1377 Aloc = (Mat_SeqAIJ*) a->B->data; 1378 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1379 row = a->rstart; 1380 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1381 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1382 for ( i=0; i<m; i++ ) { 1383 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1384 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1385 } 1386 PetscFree(ct); 1387 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1388 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1389 if (matout != PETSC_NULL) { 1390 *matout = B; 1391 } else { 1392 PetscOps *Abops; 1393 struct _MatOps *Aops; 1394 1395 /* This isn't really an in-place transpose .... but free data structures from a */ 1396 PetscFree(a->rowners); 1397 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1398 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1399 if (a->colmap) PetscFree(a->colmap); 1400 if (a->garray) PetscFree(a->garray); 1401 if (a->lvec) VecDestroy(a->lvec); 1402 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1403 PetscFree(a); 1404 1405 /* 1406 This is horrible, horrible code. We need to keep the 1407 A pointers for the bops and ops but copy everything 1408 else from C. 1409 */ 1410 Abops = A->bops; 1411 Aops = A->ops; 1412 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1413 A->bops = Abops; 1414 A->ops = Aops; 1415 PetscHeaderDestroy(B); 1416 } 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNC__ 1421 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1422 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1423 { 1424 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1425 Mat a = aij->A, b = aij->B; 1426 int ierr,s1,s2,s3; 1427 1428 PetscFunctionBegin; 1429 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1430 if (rr) { 1431 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1432 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1433 /* Overlap communication with computation. */ 1434 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1435 } 1436 if (ll) { 1437 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1438 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1439 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1440 } 1441 /* scale the diagonal block */ 1442 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1443 1444 if (rr) { 1445 /* Do a scatter end and then right scale the off-diagonal block */ 1446 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1447 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1448 } 1449 1450 PetscFunctionReturn(0); 1451 } 1452 1453 1454 extern int MatPrintHelp_SeqAIJ(Mat); 1455 #undef __FUNC__ 1456 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1457 int MatPrintHelp_MPIAIJ(Mat A) 1458 { 1459 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1460 int ierr; 1461 1462 PetscFunctionBegin; 1463 if (!a->rank) { 1464 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1465 } 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNC__ 1470 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1471 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1472 { 1473 PetscFunctionBegin; 1474 *bs = 1; 1475 PetscFunctionReturn(0); 1476 } 1477 #undef __FUNC__ 1478 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1479 int MatSetUnfactored_MPIAIJ(Mat A) 1480 { 1481 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1482 int ierr; 1483 1484 PetscFunctionBegin; 1485 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNC__ 1490 #define __FUNC__ "MatEqual_MPIAIJ" 1491 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1492 { 1493 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1494 Mat a, b, c, d; 1495 PetscTruth flg; 1496 int ierr; 1497 1498 PetscFunctionBegin; 1499 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1500 a = matA->A; b = matA->B; 1501 c = matB->A; d = matB->B; 1502 1503 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1504 if (flg == PETSC_TRUE) { 1505 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1506 } 1507 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1508 PetscFunctionReturn(0); 1509 } 1510 1511 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1512 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1513 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1514 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1515 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 1516 1517 /* -------------------------------------------------------------------*/ 1518 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1519 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1520 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1521 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1522 0,0, 1523 0,0, 1524 0,0, 1525 MatRelax_MPIAIJ, 1526 MatTranspose_MPIAIJ, 1527 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1528 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1529 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1530 0, 1531 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1532 0,0,0,0, 1533 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1534 0,0, 1535 0,0,MatConvertSameType_MPIAIJ,0,0, 1536 0,0,0, 1537 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1538 MatPrintHelp_MPIAIJ, 1539 MatScale_MPIAIJ,0,0,0, 1540 MatGetBlockSize_MPIAIJ,0,0,0,0, 1541 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1542 0,0,MatGetSubMatrix_MPIAIJ}; 1543 1544 1545 #undef __FUNC__ 1546 #define __FUNC__ "MatCreateMPIAIJ" 1547 /*@C 1548 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1549 (the default parallel PETSc format). For good matrix assembly performance 1550 the user should preallocate the matrix storage by setting the parameters 1551 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1552 performance can be increased by more than a factor of 50. 1553 1554 Collective on MPI_Comm 1555 1556 Input Parameters: 1557 + comm - MPI communicator 1558 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1559 This value should be the same as the local size used in creating the 1560 y vector for the matrix-vector product y = Ax. 1561 . n - This value should be the same as the local size used in creating the 1562 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1563 calculated if N is given) For square matrices n is almost always m. 1564 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1565 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1566 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1567 (same for all local rows) 1568 . d_nzz - array containing the number of nonzeros in the various rows of the 1569 diagonal portion of the local submatrix (possibly different for each row) 1570 or PETSC_NULL. You must leave room for the diagonal entry even if 1571 it is zero. 1572 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1573 submatrix (same for all local rows). 1574 - o_nzz - array containing the number of nonzeros in the various rows of the 1575 off-diagonal portion of the local submatrix (possibly different for 1576 each row) or PETSC_NULL. 1577 1578 Output Parameter: 1579 . A - the matrix 1580 1581 Notes: 1582 The AIJ format (also called the Yale sparse matrix format or 1583 compressed row storage), is fully compatible with standard Fortran 77 1584 storage. That is, the stored row and column indices can begin at 1585 either one (as in Fortran) or zero. See the users manual for details. 1586 1587 The user MUST specify either the local or global matrix dimensions 1588 (possibly both). 1589 1590 By default, this format uses inodes (identical nodes) when possible. 1591 We search for consecutive rows with the same nonzero structure, thereby 1592 reusing matrix information to achieve increased efficiency. 1593 1594 Options Database Keys: 1595 + -mat_aij_no_inode - Do not use inodes 1596 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1597 - -mat_aij_oneindex - Internally use indexing starting at 1 1598 rather than 0. Note that when calling MatSetValues(), 1599 the user still MUST index entries starting at 0! 1600 1601 Storage Information: 1602 For a square global matrix we define each processor's diagonal portion 1603 to be its local rows and the corresponding columns (a square submatrix); 1604 each processor's off-diagonal portion encompasses the remainder of the 1605 local matrix (a rectangular submatrix). 1606 1607 The user can specify preallocated storage for the diagonal part of 1608 the local submatrix with either d_nz or d_nnz (not both). Set 1609 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1610 memory allocation. Likewise, specify preallocated storage for the 1611 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1612 1613 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1614 the figure below we depict these three local rows and all columns (0-11). 1615 1616 .vb 1617 0 1 2 3 4 5 6 7 8 9 10 11 1618 ------------------- 1619 row 3 | o o o d d d o o o o o o 1620 row 4 | o o o d d d o o o o o o 1621 row 5 | o o o d d d o o o o o o 1622 ------------------- 1623 .ve 1624 1625 Thus, any entries in the d locations are stored in the d (diagonal) 1626 submatrix, and any entries in the o locations are stored in the 1627 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1628 stored simply in the MATSEQAIJ format for compressed row storage. 1629 1630 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1631 and o_nz should indicate the number of nonzeros per row in the o matrix. 1632 In general, for PDE problems in which most nonzeros are near the diagonal, 1633 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1634 or you will get TERRIBLE performance; see the users' manual chapter on 1635 matrices. 1636 1637 .keywords: matrix, aij, compressed row, sparse, parallel 1638 1639 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1640 @*/ 1641 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1642 { 1643 Mat B; 1644 Mat_MPIAIJ *b; 1645 int ierr, i,sum[2],work[2],size; 1646 1647 PetscFunctionBegin; 1648 MPI_Comm_size(comm,&size); 1649 if (size == 1) { 1650 if (M == PETSC_DECIDE) M = m; 1651 if (N == PETSC_DECIDE) N = n; 1652 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1653 PetscFunctionReturn(0); 1654 } 1655 1656 *A = 0; 1657 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1658 PLogObjectCreate(B); 1659 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1660 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1661 PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1662 B->ops->destroy = MatDestroy_MPIAIJ; 1663 B->ops->view = MatView_MPIAIJ; 1664 B->factor = 0; 1665 B->assembled = PETSC_FALSE; 1666 B->mapping = 0; 1667 1668 B->insertmode = NOT_SET_VALUES; 1669 b->size = size; 1670 MPI_Comm_rank(comm,&b->rank); 1671 1672 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1673 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1674 } 1675 1676 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1677 work[0] = m; work[1] = n; 1678 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1679 if (M == PETSC_DECIDE) M = sum[0]; 1680 if (N == PETSC_DECIDE) N = sum[1]; 1681 } 1682 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1683 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1684 b->m = m; B->m = m; 1685 b->n = n; B->n = n; 1686 b->N = N; B->N = N; 1687 b->M = M; B->M = M; 1688 1689 /* build local table of row and column ownerships */ 1690 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1691 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1692 b->cowners = b->rowners + b->size + 2; 1693 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1694 b->rowners[0] = 0; 1695 for ( i=2; i<=b->size; i++ ) { 1696 b->rowners[i] += b->rowners[i-1]; 1697 } 1698 b->rstart = b->rowners[b->rank]; 1699 b->rend = b->rowners[b->rank+1]; 1700 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1701 b->cowners[0] = 0; 1702 for ( i=2; i<=b->size; i++ ) { 1703 b->cowners[i] += b->cowners[i-1]; 1704 } 1705 b->cstart = b->cowners[b->rank]; 1706 b->cend = b->cowners[b->rank+1]; 1707 1708 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1709 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1710 PLogObjectParent(B,b->A); 1711 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1712 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1713 PLogObjectParent(B,b->B); 1714 1715 /* build cache for off array entries formed */ 1716 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1717 b->donotstash = 0; 1718 b->colmap = 0; 1719 b->garray = 0; 1720 b->roworiented = 1; 1721 1722 /* stuff used for matrix vector multiply */ 1723 b->lvec = 0; 1724 b->Mvctx = 0; 1725 1726 /* stuff for MatGetRow() */ 1727 b->rowindices = 0; 1728 b->rowvalues = 0; 1729 b->getrowactive = PETSC_FALSE; 1730 1731 *A = B; 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNC__ 1736 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1737 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1738 { 1739 Mat mat; 1740 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1741 int ierr, len=0, flg; 1742 1743 PetscFunctionBegin; 1744 *newmat = 0; 1745 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1746 PLogObjectCreate(mat); 1747 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1748 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 1749 mat->ops->destroy = MatDestroy_MPIAIJ; 1750 mat->ops->view = MatView_MPIAIJ; 1751 mat->factor = matin->factor; 1752 mat->assembled = PETSC_TRUE; 1753 1754 a->m = mat->m = oldmat->m; 1755 a->n = mat->n = oldmat->n; 1756 a->M = mat->M = oldmat->M; 1757 a->N = mat->N = oldmat->N; 1758 1759 a->rstart = oldmat->rstart; 1760 a->rend = oldmat->rend; 1761 a->cstart = oldmat->cstart; 1762 a->cend = oldmat->cend; 1763 a->size = oldmat->size; 1764 a->rank = oldmat->rank; 1765 mat->insertmode = NOT_SET_VALUES; 1766 a->rowvalues = 0; 1767 a->getrowactive = PETSC_FALSE; 1768 1769 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1770 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1771 a->cowners = a->rowners + a->size + 2; 1772 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1773 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1774 if (oldmat->colmap) { 1775 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1776 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1777 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1778 } else a->colmap = 0; 1779 if (oldmat->garray) { 1780 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1781 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1782 PLogObjectMemory(mat,len*sizeof(int)); 1783 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1784 } else a->garray = 0; 1785 1786 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1787 PLogObjectParent(mat,a->lvec); 1788 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1789 PLogObjectParent(mat,a->Mvctx); 1790 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1791 PLogObjectParent(mat,a->A); 1792 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1793 PLogObjectParent(mat,a->B); 1794 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1795 if (flg) { 1796 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1797 } 1798 *newmat = mat; 1799 PetscFunctionReturn(0); 1800 } 1801 1802 #include "sys.h" 1803 1804 #undef __FUNC__ 1805 #define __FUNC__ "MatLoad_MPIAIJ" 1806 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1807 { 1808 Mat A; 1809 Scalar *vals,*svals; 1810 MPI_Comm comm = ((PetscObject)viewer)->comm; 1811 MPI_Status status; 1812 int i, nz, ierr, j,rstart, rend, fd; 1813 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1814 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1815 int tag = ((PetscObject)viewer)->tag; 1816 1817 PetscFunctionBegin; 1818 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1819 if (!rank) { 1820 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1821 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1822 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1823 if (header[3] < 0) { 1824 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1825 } 1826 } 1827 1828 1829 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1830 M = header[1]; N = header[2]; 1831 /* determine ownership of all rows */ 1832 m = M/size + ((M % size) > rank); 1833 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1834 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1835 rowners[0] = 0; 1836 for ( i=2; i<=size; i++ ) { 1837 rowners[i] += rowners[i-1]; 1838 } 1839 rstart = rowners[rank]; 1840 rend = rowners[rank+1]; 1841 1842 /* distribute row lengths to all processors */ 1843 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1844 offlens = ourlens + (rend-rstart); 1845 if (!rank) { 1846 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1847 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1848 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1849 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1850 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1851 PetscFree(sndcounts); 1852 } else { 1853 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1854 } 1855 1856 if (!rank) { 1857 /* calculate the number of nonzeros on each processor */ 1858 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1859 PetscMemzero(procsnz,size*sizeof(int)); 1860 for ( i=0; i<size; i++ ) { 1861 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1862 procsnz[i] += rowlengths[j]; 1863 } 1864 } 1865 PetscFree(rowlengths); 1866 1867 /* determine max buffer needed and allocate it */ 1868 maxnz = 0; 1869 for ( i=0; i<size; i++ ) { 1870 maxnz = PetscMax(maxnz,procsnz[i]); 1871 } 1872 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1873 1874 /* read in my part of the matrix column indices */ 1875 nz = procsnz[0]; 1876 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1877 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1878 1879 /* read in every one elses and ship off */ 1880 for ( i=1; i<size; i++ ) { 1881 nz = procsnz[i]; 1882 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1883 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1884 } 1885 PetscFree(cols); 1886 } else { 1887 /* determine buffer space needed for message */ 1888 nz = 0; 1889 for ( i=0; i<m; i++ ) { 1890 nz += ourlens[i]; 1891 } 1892 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1893 1894 /* receive message of column indices*/ 1895 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1896 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1897 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1898 } 1899 1900 /* loop over local rows, determining number of off diagonal entries */ 1901 PetscMemzero(offlens,m*sizeof(int)); 1902 jj = 0; 1903 for ( i=0; i<m; i++ ) { 1904 for ( j=0; j<ourlens[i]; j++ ) { 1905 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1906 jj++; 1907 } 1908 } 1909 1910 /* create our matrix */ 1911 for ( i=0; i<m; i++ ) { 1912 ourlens[i] -= offlens[i]; 1913 } 1914 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1915 A = *newmat; 1916 MatSetOption(A,MAT_COLUMNS_SORTED); 1917 for ( i=0; i<m; i++ ) { 1918 ourlens[i] += offlens[i]; 1919 } 1920 1921 if (!rank) { 1922 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1923 1924 /* read in my part of the matrix numerical values */ 1925 nz = procsnz[0]; 1926 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1927 1928 /* insert into matrix */ 1929 jj = rstart; 1930 smycols = mycols; 1931 svals = vals; 1932 for ( i=0; i<m; i++ ) { 1933 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1934 smycols += ourlens[i]; 1935 svals += ourlens[i]; 1936 jj++; 1937 } 1938 1939 /* read in other processors and ship out */ 1940 for ( i=1; i<size; i++ ) { 1941 nz = procsnz[i]; 1942 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1943 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1944 } 1945 PetscFree(procsnz); 1946 } else { 1947 /* receive numeric values */ 1948 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1949 1950 /* receive message of values*/ 1951 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1952 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1953 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1954 1955 /* insert into matrix */ 1956 jj = rstart; 1957 smycols = mycols; 1958 svals = vals; 1959 for ( i=0; i<m; i++ ) { 1960 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1961 smycols += ourlens[i]; 1962 svals += ourlens[i]; 1963 jj++; 1964 } 1965 } 1966 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1967 1968 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1969 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1970 PetscFunctionReturn(0); 1971 } 1972 1973 #undef __FUNC__ 1974 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1975 /* 1976 Not great since it makes two copies of the submatrix, first an SeqAIJ 1977 in local and then by concatenating the local matrices the end result. 1978 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1979 */ 1980 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 1981 { 1982 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1983 Mat *local,M, Mreuse; 1984 Scalar *vwork,*aa; 1985 MPI_Comm comm = mat->comm; 1986 Mat_SeqAIJ *aij; 1987 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 1988 1989 PetscFunctionBegin; 1990 MPI_Comm_rank(comm,&rank); 1991 MPI_Comm_size(comm,&size); 1992 1993 if (call == MAT_REUSE_MATRIX) { 1994 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1995 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 1996 local = &Mreuse; 1997 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1998 } else { 1999 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2000 Mreuse = *local; 2001 PetscFree(local); 2002 } 2003 2004 /* 2005 m - number of local rows 2006 n - number of columns (same on all processors) 2007 rstart - first row in new global matrix generated 2008 */ 2009 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2010 if (call == MAT_INITIAL_MATRIX) { 2011 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2012 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2013 ii = aij->i; 2014 jj = aij->j; 2015 2016 /* 2017 Determine the number of non-zeros in the diagonal and off-diagonal 2018 portions of the matrix in order to do correct preallocation 2019 */ 2020 2021 /* first get start and end of "diagonal" columns */ 2022 if (csize == PETSC_DECIDE) { 2023 nlocal = n/size + ((n % size) > rank); 2024 } else { 2025 nlocal = csize; 2026 } 2027 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2028 rstart = rend - nlocal; 2029 if (rank == size - 1 && rend != n) { 2030 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2031 } 2032 2033 /* next, compute all the lengths */ 2034 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2035 olens = dlens + m; 2036 for ( i=0; i<m; i++ ) { 2037 jend = ii[i+1] - ii[i]; 2038 olen = 0; 2039 dlen = 0; 2040 for ( j=0; j<jend; j++ ) { 2041 if ( *jj < rstart || *jj >= rend) olen++; 2042 else dlen++; 2043 jj++; 2044 } 2045 olens[i] = olen; 2046 dlens[i] = dlen; 2047 } 2048 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2049 PetscFree(dlens); 2050 } else { 2051 int ml,nl; 2052 2053 M = *newmat; 2054 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2055 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2056 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2057 /* 2058 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2059 rather than the slower MatSetValues(). 2060 */ 2061 M->was_assembled = PETSC_TRUE; 2062 M->assembled = PETSC_FALSE; 2063 } 2064 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2065 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2066 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2067 ii = aij->i; 2068 jj = aij->j; 2069 aa = aij->a; 2070 for (i=0; i<m; i++) { 2071 row = rstart + i; 2072 nz = ii[i+1] - ii[i]; 2073 cwork = jj; jj += nz; 2074 vwork = aa; aa += nz; 2075 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2076 } 2077 2078 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2079 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2080 *newmat = M; 2081 2082 /* save submatrix used in processor for next request */ 2083 if (call == MAT_INITIAL_MATRIX) { 2084 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2085 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2086 } 2087 2088 PetscFunctionReturn(0); 2089 } 2090