1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: mpiaij.c,v 1.229 1997/12/04 00:28:53 balay Exp bsmith $"; 4 #endif 5 6 #include "pinclude/pviewer.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 11 /* local utility routine that creates a mapping from the global column 12 number to the local number in the off-diagonal part of the local 13 storage of the matrix. This is done in a non scable way since the 14 length of colmap equals the global matrix length. 15 */ 16 #undef __FUNC__ 17 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 18 int CreateColmap_MPIAIJ_Private(Mat mat) 19 { 20 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 21 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 22 int n = B->n,i; 23 24 PetscFunctionBegin; 25 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 26 PLogObjectMemory(mat,aij->N*sizeof(int)); 27 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 28 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 29 PetscFunctionReturn(0); 30 } 31 32 extern int DisAssemble_MPIAIJ(Mat); 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for ( _i=0; _i<nrow; _i++ ) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 61 Scalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 67 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 68 new_j = (int *) (new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 73 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int)); \ 78 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 79 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(Scalar)); \ 81 /* free up old matrix storage */ \ 82 \ 83 PetscFree(a->a); \ 84 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = 1; \ 87 \ 88 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 89 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for ( ii=N; ii>=_i; ii-- ) { \ 97 rp[ii+1] = rp[ii]; \ 98 ap[ii+1] = ap[ii]; \ 99 } \ 100 rp[_i] = col1; \ 101 ap[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 \ 109 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 110 rmax = bimax[row]; nrow = bilen[row]; \ 111 col1 = col - shift; \ 112 \ 113 low = 0; high = nrow; \ 114 while (high-low > 5) { \ 115 t = (low+high)/2; \ 116 if (rp[t] > col) high = t; \ 117 else low = t; \ 118 } \ 119 for ( _i=0; _i<nrow; _i++ ) { \ 120 if (rp[_i] > col1) break; \ 121 if (rp[_i] == col1) { \ 122 if (addv == ADD_VALUES) ap[_i] += value; \ 123 else ap[_i] = value; \ 124 goto b_noinsert; \ 125 } \ 126 } \ 127 if (nonew == 1) goto b_noinsert; \ 128 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 129 if (nrow >= rmax) { \ 130 /* there is no extra room in row, therefore enlarge */ \ 131 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 132 Scalar *new_a; \ 133 \ 134 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 135 \ 136 /* malloc new storage space */ \ 137 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 138 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 139 new_j = (int *) (new_a + new_nz); \ 140 new_i = new_j + new_nz; \ 141 \ 142 /* copy over old data into new slots */ \ 143 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 144 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 145 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 146 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 147 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 148 len*sizeof(int)); \ 149 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 150 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 151 len*sizeof(Scalar)); \ 152 /* free up old matrix storage */ \ 153 \ 154 PetscFree(b->a); \ 155 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 156 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 157 b->singlemalloc = 1; \ 158 \ 159 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 160 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 161 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 162 b->maxnz += CHUNKSIZE; \ 163 b->reallocs++; \ 164 } \ 165 N = nrow++ - 1; b->nz++; \ 166 /* shift up all the later entries in this row */ \ 167 for ( ii=N; ii>=_i; ii-- ) { \ 168 rp[ii+1] = rp[ii]; \ 169 ap[ii+1] = ap[ii]; \ 170 } \ 171 rp[_i] = col1; \ 172 ap[_i] = value; \ 173 b_noinsert: ; \ 174 bilen[row] = nrow; \ 175 } 176 177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 178 #undef __FUNC__ 179 #define __FUNC__ "MatSetValues_MPIAIJ" 180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 181 { 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 183 Scalar value; 184 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 185 int cstart = aij->cstart, cend = aij->cend,row,col; 186 int roworiented = aij->roworiented; 187 188 /* Some Variables required in the macro */ 189 Mat A = aij->A; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 191 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 192 Scalar *aa = a->a; 193 194 Mat B = aij->B; 195 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 196 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 197 Scalar *ba = b->a; 198 199 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 200 int nonew = a->nonew,shift = a->indexshift; 201 Scalar *ap; 202 203 PetscFunctionBegin; 204 for ( i=0; i<m; i++ ) { 205 #if defined(USE_PETSC_BOPT_g) 206 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 207 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 208 #endif 209 if (im[i] >= rstart && im[i] < rend) { 210 row = im[i] - rstart; 211 for ( j=0; j<n; j++ ) { 212 if (in[j] >= cstart && in[j] < cend){ 213 col = in[j] - cstart; 214 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 215 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 216 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 217 } 218 #if defined(USE_PETSC_BOPT_g) 219 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 220 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 221 #endif 222 else { 223 if (mat->was_assembled) { 224 if (!aij->colmap) { 225 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 226 } 227 col = aij->colmap[in[j]] - 1; 228 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 229 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 232 B = aij->B; 233 b = (Mat_SeqAIJ *) B->data; 234 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 235 ba = b->a; 236 } 237 } 238 else col = in[j]; 239 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 240 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 241 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 242 } 243 } 244 } 245 else { 246 if (roworiented && !aij->donotstash) { 247 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 248 } 249 else { 250 if (!aij->donotstash) { 251 row = im[i]; 252 for ( j=0; j<n; j++ ) { 253 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 254 } 255 } 256 } 257 } 258 } 259 PetscFunctionReturn(0); 260 } 261 262 #undef __FUNC__ 263 #define __FUNC__ "MatGetValues_MPIAIJ" 264 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 265 { 266 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 267 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 268 int cstart = aij->cstart, cend = aij->cend,row,col; 269 270 PetscFunctionBegin; 271 for ( i=0; i<m; i++ ) { 272 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 273 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 274 if (idxm[i] >= rstart && idxm[i] < rend) { 275 row = idxm[i] - rstart; 276 for ( j=0; j<n; j++ ) { 277 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 278 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 279 if (idxn[j] >= cstart && idxn[j] < cend){ 280 col = idxn[j] - cstart; 281 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 282 } 283 else { 284 if (!aij->colmap) { 285 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 286 } 287 col = aij->colmap[idxn[j]] - 1; 288 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 289 else { 290 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 291 } 292 } 293 } 294 } else { 295 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 296 } 297 } 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNC__ 302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 306 MPI_Comm comm = mat->comm; 307 int size = aij->size, *owners = aij->rowners; 308 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 309 MPI_Request *send_waits,*recv_waits; 310 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 311 InsertMode addv; 312 Scalar *rvalues,*svalues; 313 314 PetscFunctionBegin; 315 /* make sure all processors are either in INSERTMODE or ADDMODE */ 316 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 317 if (addv == (ADD_VALUES|INSERT_VALUES)) { 318 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 319 } 320 mat->insertmode = addv; /* in case this processor had no cache */ 321 322 /* first count number of contributors to each processor */ 323 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 324 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 325 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 326 for ( i=0; i<aij->stash.n; i++ ) { 327 idx = aij->stash.idx[i]; 328 for ( j=0; j<size; j++ ) { 329 if (idx >= owners[j] && idx < owners[j+1]) { 330 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 331 } 332 } 333 } 334 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 335 336 /* inform other processors of number of messages and max length*/ 337 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 338 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 339 nreceives = work[rank]; 340 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 341 nmax = work[rank]; 342 PetscFree(work); 343 344 /* post receives: 345 1) each message will consist of ordered pairs 346 (global index,value) we store the global index as a double 347 to simplify the message passing. 348 2) since we don't know how long each individual message is we 349 allocate the largest needed buffer for each receive. Potentially 350 this is a lot of wasted space. 351 352 353 This could be done better. 354 */ 355 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 356 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 357 for ( i=0; i<nreceives; i++ ) { 358 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 359 comm,recv_waits+i);CHKERRQ(ierr); 360 } 361 362 /* do sends: 363 1) starts[i] gives the starting index in svalues for stuff going to 364 the ith processor 365 */ 366 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 367 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 368 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 369 starts[0] = 0; 370 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 371 for ( i=0; i<aij->stash.n; i++ ) { 372 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 373 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 374 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 375 } 376 PetscFree(owner); 377 starts[0] = 0; 378 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 379 count = 0; 380 for ( i=0; i<size; i++ ) { 381 if (procs[i]) { 382 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 383 } 384 } 385 PetscFree(starts); PetscFree(nprocs); 386 387 /* Free cache space */ 388 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 389 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 390 391 aij->svalues = svalues; aij->rvalues = rvalues; 392 aij->nsends = nsends; aij->nrecvs = nreceives; 393 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 394 aij->rmax = nmax; 395 396 PetscFunctionReturn(0); 397 } 398 extern int MatSetUpMultiply_MPIAIJ(Mat); 399 400 #undef __FUNC__ 401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 403 { 404 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 405 MPI_Status *send_status,recv_status; 406 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 407 int row,col,other_disassembled; 408 Scalar *values,val; 409 InsertMode addv = mat->insertmode; 410 411 PetscFunctionBegin; 412 /* wait on receives */ 413 while (count) { 414 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 415 /* unpack receives into our local space */ 416 values = aij->rvalues + 3*imdex*aij->rmax; 417 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 418 n = n/3; 419 for ( i=0; i<n; i++ ) { 420 row = (int) PetscReal(values[3*i]) - aij->rstart; 421 col = (int) PetscReal(values[3*i+1]); 422 val = values[3*i+2]; 423 if (col >= aij->cstart && col < aij->cend) { 424 col -= aij->cstart; 425 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 426 } else { 427 if (mat->was_assembled || mat->assembled) { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 430 } 431 col = aij->colmap[col] - 1; 432 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 433 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 434 col = (int) PetscReal(values[3*i+1]); 435 } 436 } 437 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 438 } 439 } 440 count--; 441 } 442 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 443 444 /* wait on sends */ 445 if (aij->nsends) { 446 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 447 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 448 PetscFree(send_status); 449 } 450 PetscFree(aij->send_waits); PetscFree(aij->svalues); 451 452 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 453 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 454 455 /* determine if any processor has disassembled, if so we must 456 also disassemble ourselfs, in order that we may reassemble. */ 457 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 458 if (mat->was_assembled && !other_disassembled) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 } 461 462 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 463 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 464 } 465 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 467 468 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNC__ 473 #define __FUNC__ "MatZeroEntries_MPIAIJ" 474 int MatZeroEntries_MPIAIJ(Mat A) 475 { 476 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 477 int ierr; 478 479 PetscFunctionBegin; 480 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 481 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNC__ 486 #define __FUNC__ "MatZeroRows_MPIAIJ" 487 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 488 { 489 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 490 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 491 int *procs,*nprocs,j,found,idx,nsends,*work,row; 492 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 493 int *rvalues,tag = A->tag,count,base,slen,n,*source; 494 int *lens,imdex,*lrows,*values,rstart=l->rstart; 495 MPI_Comm comm = A->comm; 496 MPI_Request *send_waits,*recv_waits; 497 MPI_Status recv_status,*send_status; 498 IS istmp; 499 500 PetscFunctionBegin; 501 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 502 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 503 504 /* first count number of contributors to each processor */ 505 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 506 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 507 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 508 for ( i=0; i<N; i++ ) { 509 idx = rows[i]; 510 found = 0; 511 for ( j=0; j<size; j++ ) { 512 if (idx >= owners[j] && idx < owners[j+1]) { 513 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 514 } 515 } 516 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 517 } 518 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 519 520 /* inform other processors of number of messages and max length*/ 521 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 522 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 523 nrecvs = work[rank]; 524 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 525 nmax = work[rank]; 526 PetscFree(work); 527 528 /* post receives: */ 529 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 530 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 531 for ( i=0; i<nrecvs; i++ ) { 532 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 533 } 534 535 /* do sends: 536 1) starts[i] gives the starting index in svalues for stuff going to 537 the ith processor 538 */ 539 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 540 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 541 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 542 starts[0] = 0; 543 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 544 for ( i=0; i<N; i++ ) { 545 svalues[starts[owner[i]]++] = rows[i]; 546 } 547 ISRestoreIndices(is,&rows); 548 549 starts[0] = 0; 550 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 551 count = 0; 552 for ( i=0; i<size; i++ ) { 553 if (procs[i]) { 554 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 555 } 556 } 557 PetscFree(starts); 558 559 base = owners[rank]; 560 561 /* wait on receives */ 562 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 563 source = lens + nrecvs; 564 count = nrecvs; slen = 0; 565 while (count) { 566 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 567 /* unpack receives into our local space */ 568 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 569 source[imdex] = recv_status.MPI_SOURCE; 570 lens[imdex] = n; 571 slen += n; 572 count--; 573 } 574 PetscFree(recv_waits); 575 576 /* move the data into the send scatter */ 577 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 578 count = 0; 579 for ( i=0; i<nrecvs; i++ ) { 580 values = rvalues + i*nmax; 581 for ( j=0; j<lens[i]; j++ ) { 582 lrows[count++] = values[j] - base; 583 } 584 } 585 PetscFree(rvalues); PetscFree(lens); 586 PetscFree(owner); PetscFree(nprocs); 587 588 /* actually zap the local rows */ 589 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 590 PLogObjectParent(A,istmp); 591 592 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 593 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 594 ierr = ISDestroy(istmp); CHKERRQ(ierr); 595 596 if (diag) { 597 for ( i = 0; i < slen; i++ ) { 598 row = lrows[i] + rstart; 599 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 600 } 601 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 602 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 603 } 604 PetscFree(lrows); 605 /* wait on sends */ 606 if (nsends) { 607 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 608 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 609 PetscFree(send_status); 610 } 611 PetscFree(send_waits); PetscFree(svalues); 612 613 PetscFunctionReturn(0); 614 } 615 616 #undef __FUNC__ 617 #define __FUNC__ "MatMult_MPIAIJ" 618 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 619 { 620 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 621 int ierr,nt; 622 623 PetscFunctionBegin; 624 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 625 if (nt != a->n) { 626 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 627 } 628 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 629 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 630 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 631 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 #undef __FUNC__ 636 #define __FUNC__ "MatMultAdd_MPIAIJ" 637 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 638 { 639 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 640 int ierr; 641 642 PetscFunctionBegin; 643 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 644 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 645 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 646 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNC__ 651 #define __FUNC__ "MatMultTrans_MPIAIJ" 652 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 653 { 654 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 655 int ierr; 656 657 PetscFunctionBegin; 658 /* do nondiagonal part */ 659 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 660 /* send it on its way */ 661 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 662 /* do local part */ 663 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 664 /* receive remote parts: note this assumes the values are not actually */ 665 /* inserted in yy until the next line, which is true for my implementation*/ 666 /* but is not perhaps always true. */ 667 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 #undef __FUNC__ 672 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 673 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 674 { 675 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 676 int ierr; 677 678 PetscFunctionBegin; 679 /* do nondiagonal part */ 680 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 681 /* send it on its way */ 682 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 683 /* do local part */ 684 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 685 /* receive remote parts: note this assumes the values are not actually */ 686 /* inserted in yy until the next line, which is true for my implementation*/ 687 /* but is not perhaps always true. */ 688 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 692 /* 693 This only works correctly for square matrices where the subblock A->A is the 694 diagonal block 695 */ 696 #undef __FUNC__ 697 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 698 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 699 { 700 int ierr; 701 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 702 703 PetscFunctionBegin; 704 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 705 if (a->rstart != a->cstart || a->rend != a->cend) { 706 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 707 } 708 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNC__ 713 #define __FUNC__ "MatScale_MPIAIJ" 714 int MatScale_MPIAIJ(Scalar *aa,Mat A) 715 { 716 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 717 int ierr; 718 719 PetscFunctionBegin; 720 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 721 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNC__ 726 #define __FUNC__ "MatDestroy_MPIAIJ" 727 int MatDestroy_MPIAIJ(PetscObject obj) 728 { 729 Mat mat = (Mat) obj; 730 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 731 int ierr; 732 733 PetscFunctionBegin; 734 #if defined(USE_PETSC_LOG) 735 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 736 #endif 737 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 738 PetscFree(aij->rowners); 739 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 740 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 741 if (aij->colmap) PetscFree(aij->colmap); 742 if (aij->garray) PetscFree(aij->garray); 743 if (aij->lvec) VecDestroy(aij->lvec); 744 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 745 if (aij->rowvalues) PetscFree(aij->rowvalues); 746 PetscFree(aij); 747 PLogObjectDestroy(mat); 748 PetscHeaderDestroy(mat); 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNC__ 753 #define __FUNC__ "MatView_MPIAIJ_Binary" 754 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 757 int ierr; 758 759 PetscFunctionBegin; 760 if (aij->size == 1) { 761 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 762 } 763 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 764 PetscFunctionReturn(0); 765 } 766 767 #undef __FUNC__ 768 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 769 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 770 { 771 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 772 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 773 int ierr, format,shift = C->indexshift,rank; 774 FILE *fd; 775 ViewerType vtype; 776 777 PetscFunctionBegin; 778 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 779 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 780 ierr = ViewerGetFormat(viewer,&format); 781 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 782 MatInfo info; 783 int flg; 784 MPI_Comm_rank(mat->comm,&rank); 785 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 786 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 787 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 788 PetscSequentialPhaseBegin(mat->comm,1); 789 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 790 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 791 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 792 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 793 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 794 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 795 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 796 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 797 fflush(fd); 798 PetscSequentialPhaseEnd(mat->comm,1); 799 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 800 PetscFunctionReturn(0); 801 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 802 PetscFunctionReturn(0); 803 } 804 } 805 806 if (vtype == DRAW_VIEWER) { 807 Draw draw; 808 PetscTruth isnull; 809 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 810 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 811 } 812 813 if (vtype == ASCII_FILE_VIEWER) { 814 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 815 PetscSequentialPhaseBegin(mat->comm,1); 816 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 817 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 818 aij->cend); 819 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 820 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 821 fflush(fd); 822 PetscSequentialPhaseEnd(mat->comm,1); 823 } else { 824 int size = aij->size; 825 rank = aij->rank; 826 if (size == 1) { 827 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 828 } else { 829 /* assemble the entire matrix onto first processor. */ 830 Mat A; 831 Mat_SeqAIJ *Aloc; 832 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 833 Scalar *a; 834 835 if (!rank) { 836 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 837 } else { 838 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 839 } 840 PLogObjectParent(mat,A); 841 842 /* copy over the A part */ 843 Aloc = (Mat_SeqAIJ*) aij->A->data; 844 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 845 row = aij->rstart; 846 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 847 for ( i=0; i<m; i++ ) { 848 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 849 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 850 } 851 aj = Aloc->j; 852 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 853 854 /* copy over the B part */ 855 Aloc = (Mat_SeqAIJ*) aij->B->data; 856 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 857 row = aij->rstart; 858 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 859 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 860 for ( i=0; i<m; i++ ) { 861 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 862 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 863 } 864 PetscFree(ct); 865 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 866 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 867 /* 868 Everyone has to call to draw the matrix since the graphics waits are 869 synchronized across all processors that share the Draw object 870 */ 871 if (!rank || vtype == DRAW_VIEWER) { 872 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 873 } 874 ierr = MatDestroy(A); CHKERRQ(ierr); 875 } 876 } 877 PetscFunctionReturn(0); 878 } 879 880 #undef __FUNC__ 881 #define __FUNC__ "MatView_MPIAIJ" 882 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 883 { 884 Mat mat = (Mat) obj; 885 int ierr; 886 ViewerType vtype; 887 888 PetscFunctionBegin; 889 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 890 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 891 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 892 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 893 } 894 else if (vtype == BINARY_FILE_VIEWER) { 895 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 896 } 897 PetscFunctionReturn(0); 898 } 899 900 /* 901 This has to provide several versions. 902 903 2) a) use only local smoothing updating outer values only once. 904 b) local smoothing updating outer values each inner iteration 905 3) color updating out values betwen colors. 906 */ 907 #undef __FUNC__ 908 #define __FUNC__ "MatRelax_MPIAIJ" 909 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 910 double fshift,int its,Vec xx) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 913 Mat AA = mat->A, BB = mat->B; 914 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 915 Scalar *b,*x,*xs,*ls,d,*v,sum; 916 int ierr,*idx, *diag; 917 int n = mat->n, m = mat->m, i,shift = A->indexshift; 918 919 PetscFunctionBegin; 920 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 921 xs = x + shift; /* shift by one for index start of 1 */ 922 ls = ls + shift; 923 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 924 diag = A->diag; 925 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 926 if (flag & SOR_ZERO_INITIAL_GUESS) { 927 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 928 PetscFunctionReturn(0); 929 } 930 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 931 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 932 while (its--) { 933 /* go down through the rows */ 934 for ( i=0; i<m; i++ ) { 935 n = A->i[i+1] - A->i[i]; 936 idx = A->j + A->i[i] + shift; 937 v = A->a + A->i[i] + shift; 938 sum = b[i]; 939 SPARSEDENSEMDOT(sum,xs,v,idx,n); 940 d = fshift + A->a[diag[i]+shift]; 941 n = B->i[i+1] - B->i[i]; 942 idx = B->j + B->i[i] + shift; 943 v = B->a + B->i[i] + shift; 944 SPARSEDENSEMDOT(sum,ls,v,idx,n); 945 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 946 } 947 /* come up through the rows */ 948 for ( i=m-1; i>-1; i-- ) { 949 n = A->i[i+1] - A->i[i]; 950 idx = A->j + A->i[i] + shift; 951 v = A->a + A->i[i] + shift; 952 sum = b[i]; 953 SPARSEDENSEMDOT(sum,xs,v,idx,n); 954 d = fshift + A->a[diag[i]+shift]; 955 n = B->i[i+1] - B->i[i]; 956 idx = B->j + B->i[i] + shift; 957 v = B->a + B->i[i] + shift; 958 SPARSEDENSEMDOT(sum,ls,v,idx,n); 959 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 960 } 961 } 962 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 963 if (flag & SOR_ZERO_INITIAL_GUESS) { 964 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 968 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 969 while (its--) { 970 for ( i=0; i<m; i++ ) { 971 n = A->i[i+1] - A->i[i]; 972 idx = A->j + A->i[i] + shift; 973 v = A->a + A->i[i] + shift; 974 sum = b[i]; 975 SPARSEDENSEMDOT(sum,xs,v,idx,n); 976 d = fshift + A->a[diag[i]+shift]; 977 n = B->i[i+1] - B->i[i]; 978 idx = B->j + B->i[i] + shift; 979 v = B->a + B->i[i] + shift; 980 SPARSEDENSEMDOT(sum,ls,v,idx,n); 981 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 982 } 983 } 984 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 985 if (flag & SOR_ZERO_INITIAL_GUESS) { 986 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 990 mat->Mvctx); CHKERRQ(ierr); 991 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 992 mat->Mvctx); CHKERRQ(ierr); 993 while (its--) { 994 for ( i=m-1; i>-1; i-- ) { 995 n = A->i[i+1] - A->i[i]; 996 idx = A->j + A->i[i] + shift; 997 v = A->a + A->i[i] + shift; 998 sum = b[i]; 999 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1000 d = fshift + A->a[diag[i]+shift]; 1001 n = B->i[i+1] - B->i[i]; 1002 idx = B->j + B->i[i] + shift; 1003 v = B->a + B->i[i] + shift; 1004 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1005 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1006 } 1007 } 1008 } else { 1009 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNC__ 1015 #define __FUNC__ "MatGetInfo_MPIAIJ" 1016 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1019 Mat A = mat->A, B = mat->B; 1020 int ierr; 1021 double isend[5], irecv[5]; 1022 1023 PetscFunctionBegin; 1024 info->block_size = 1.0; 1025 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1026 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1027 isend[3] = info->memory; isend[4] = info->mallocs; 1028 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1029 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1030 isend[3] += info->memory; isend[4] += info->mallocs; 1031 if (flag == MAT_LOCAL) { 1032 info->nz_used = isend[0]; 1033 info->nz_allocated = isend[1]; 1034 info->nz_unneeded = isend[2]; 1035 info->memory = isend[3]; 1036 info->mallocs = isend[4]; 1037 } else if (flag == MAT_GLOBAL_MAX) { 1038 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1039 info->nz_used = irecv[0]; 1040 info->nz_allocated = irecv[1]; 1041 info->nz_unneeded = irecv[2]; 1042 info->memory = irecv[3]; 1043 info->mallocs = irecv[4]; 1044 } else if (flag == MAT_GLOBAL_SUM) { 1045 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1046 info->nz_used = irecv[0]; 1047 info->nz_allocated = irecv[1]; 1048 info->nz_unneeded = irecv[2]; 1049 info->memory = irecv[3]; 1050 info->mallocs = irecv[4]; 1051 } 1052 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1053 info->fill_ratio_needed = 0; 1054 info->factor_mallocs = 0; 1055 info->rows_global = (double)mat->M; 1056 info->columns_global = (double)mat->N; 1057 info->rows_local = (double)mat->m; 1058 info->columns_local = (double)mat->N; 1059 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNC__ 1064 #define __FUNC__ "MatSetOption_MPIAIJ" 1065 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1066 { 1067 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1068 1069 PetscFunctionBegin; 1070 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1071 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1072 op == MAT_COLUMNS_UNSORTED || 1073 op == MAT_COLUMNS_SORTED || 1074 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1075 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1076 MatSetOption(a->A,op); 1077 MatSetOption(a->B,op); 1078 } else if (op == MAT_ROW_ORIENTED) { 1079 a->roworiented = 1; 1080 MatSetOption(a->A,op); 1081 MatSetOption(a->B,op); 1082 } else if (op == MAT_ROWS_SORTED || 1083 op == MAT_ROWS_UNSORTED || 1084 op == MAT_SYMMETRIC || 1085 op == MAT_STRUCTURALLY_SYMMETRIC || 1086 op == MAT_YES_NEW_DIAGONALS) 1087 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1088 else if (op == MAT_COLUMN_ORIENTED) { 1089 a->roworiented = 0; 1090 MatSetOption(a->A,op); 1091 MatSetOption(a->B,op); 1092 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1093 a->donotstash = 1; 1094 } else if (op == MAT_NO_NEW_DIAGONALS){ 1095 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1096 } else { 1097 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1098 } 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNC__ 1103 #define __FUNC__ "MatGetSize_MPIAIJ" 1104 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1105 { 1106 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1107 1108 PetscFunctionBegin; 1109 if (m) *m = mat->M; 1110 if (n) *n = mat->N; 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNC__ 1115 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1116 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1117 { 1118 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1119 1120 PetscFunctionBegin; 1121 if (m) *m = mat->m; 1122 if (n) *n = mat->N; 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNC__ 1127 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1128 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1129 { 1130 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1131 1132 PetscFunctionBegin; 1133 *m = mat->rstart; *n = mat->rend; 1134 PetscFunctionReturn(0); 1135 } 1136 1137 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1138 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1139 1140 #undef __FUNC__ 1141 #define __FUNC__ "MatGetRow_MPIAIJ" 1142 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1143 { 1144 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1145 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1146 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1147 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1148 int *cmap, *idx_p; 1149 1150 PetscFunctionBegin; 1151 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1152 mat->getrowactive = PETSC_TRUE; 1153 1154 if (!mat->rowvalues && (idx || v)) { 1155 /* 1156 allocate enough space to hold information from the longest row. 1157 */ 1158 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1159 int max = 1,m = mat->m,tmp; 1160 for ( i=0; i<m; i++ ) { 1161 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1162 if (max < tmp) { max = tmp; } 1163 } 1164 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1165 CHKPTRQ(mat->rowvalues); 1166 mat->rowindices = (int *) (mat->rowvalues + max); 1167 } 1168 1169 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1170 lrow = row - rstart; 1171 1172 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1173 if (!v) {pvA = 0; pvB = 0;} 1174 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1175 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1176 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1177 nztot = nzA + nzB; 1178 1179 cmap = mat->garray; 1180 if (v || idx) { 1181 if (nztot) { 1182 /* Sort by increasing column numbers, assuming A and B already sorted */ 1183 int imark = -1; 1184 if (v) { 1185 *v = v_p = mat->rowvalues; 1186 for ( i=0; i<nzB; i++ ) { 1187 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1188 else break; 1189 } 1190 imark = i; 1191 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1192 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1193 } 1194 if (idx) { 1195 *idx = idx_p = mat->rowindices; 1196 if (imark > -1) { 1197 for ( i=0; i<imark; i++ ) { 1198 idx_p[i] = cmap[cworkB[i]]; 1199 } 1200 } else { 1201 for ( i=0; i<nzB; i++ ) { 1202 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1203 else break; 1204 } 1205 imark = i; 1206 } 1207 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1208 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1209 } 1210 } 1211 else { 1212 if (idx) *idx = 0; 1213 if (v) *v = 0; 1214 } 1215 } 1216 *nz = nztot; 1217 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1218 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNC__ 1223 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1224 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1225 { 1226 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1227 1228 PetscFunctionBegin; 1229 if (aij->getrowactive == PETSC_FALSE) { 1230 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1231 } 1232 aij->getrowactive = PETSC_FALSE; 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNC__ 1237 #define __FUNC__ "MatNorm_MPIAIJ" 1238 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1239 { 1240 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1241 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1242 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1243 double sum = 0.0; 1244 Scalar *v; 1245 1246 PetscFunctionBegin; 1247 if (aij->size == 1) { 1248 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1249 } else { 1250 if (type == NORM_FROBENIUS) { 1251 v = amat->a; 1252 for (i=0; i<amat->nz; i++ ) { 1253 #if defined(USE_PETSC_COMPLEX) 1254 sum += real(conj(*v)*(*v)); v++; 1255 #else 1256 sum += (*v)*(*v); v++; 1257 #endif 1258 } 1259 v = bmat->a; 1260 for (i=0; i<bmat->nz; i++ ) { 1261 #if defined(USE_PETSC_COMPLEX) 1262 sum += real(conj(*v)*(*v)); v++; 1263 #else 1264 sum += (*v)*(*v); v++; 1265 #endif 1266 } 1267 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1268 *norm = sqrt(*norm); 1269 } else if (type == NORM_1) { /* max column norm */ 1270 double *tmp, *tmp2; 1271 int *jj, *garray = aij->garray; 1272 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1273 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1274 PetscMemzero(tmp,aij->N*sizeof(double)); 1275 *norm = 0.0; 1276 v = amat->a; jj = amat->j; 1277 for ( j=0; j<amat->nz; j++ ) { 1278 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1279 } 1280 v = bmat->a; jj = bmat->j; 1281 for ( j=0; j<bmat->nz; j++ ) { 1282 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1283 } 1284 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1285 for ( j=0; j<aij->N; j++ ) { 1286 if (tmp2[j] > *norm) *norm = tmp2[j]; 1287 } 1288 PetscFree(tmp); PetscFree(tmp2); 1289 } else if (type == NORM_INFINITY) { /* max row norm */ 1290 double ntemp = 0.0; 1291 for ( j=0; j<amat->m; j++ ) { 1292 v = amat->a + amat->i[j] + shift; 1293 sum = 0.0; 1294 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1295 sum += PetscAbsScalar(*v); v++; 1296 } 1297 v = bmat->a + bmat->i[j] + shift; 1298 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1299 sum += PetscAbsScalar(*v); v++; 1300 } 1301 if (sum > ntemp) ntemp = sum; 1302 } 1303 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1304 } else { 1305 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1306 } 1307 } 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNC__ 1312 #define __FUNC__ "MatTranspose_MPIAIJ" 1313 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1314 { 1315 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1316 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1317 int ierr,shift = Aloc->indexshift; 1318 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1319 Mat B; 1320 Scalar *array; 1321 1322 PetscFunctionBegin; 1323 if (matout == PETSC_NULL && M != N) { 1324 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1325 } 1326 1327 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1328 1329 /* copy over the A part */ 1330 Aloc = (Mat_SeqAIJ*) a->A->data; 1331 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1332 row = a->rstart; 1333 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1334 for ( i=0; i<m; i++ ) { 1335 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1336 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1337 } 1338 aj = Aloc->j; 1339 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1340 1341 /* copy over the B part */ 1342 Aloc = (Mat_SeqAIJ*) a->B->data; 1343 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1344 row = a->rstart; 1345 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1346 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1347 for ( i=0; i<m; i++ ) { 1348 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1349 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1350 } 1351 PetscFree(ct); 1352 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1353 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1354 if (matout != PETSC_NULL) { 1355 *matout = B; 1356 } else { 1357 /* This isn't really an in-place transpose .... but free data structures from a */ 1358 PetscFree(a->rowners); 1359 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1360 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1361 if (a->colmap) PetscFree(a->colmap); 1362 if (a->garray) PetscFree(a->garray); 1363 if (a->lvec) VecDestroy(a->lvec); 1364 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1365 PetscFree(a); 1366 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1367 PetscHeaderDestroy(B); 1368 } 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNC__ 1373 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1374 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1375 { 1376 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1377 Mat a = aij->A, b = aij->B; 1378 int ierr,s1,s2,s3; 1379 1380 PetscFunctionBegin; 1381 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1382 if (rr) { 1383 s3 = aij->n; 1384 VecGetLocalSize_Fast(rr,s1); 1385 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1386 /* Overlap communication with computation. */ 1387 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1388 } 1389 if (ll) { 1390 VecGetLocalSize_Fast(ll,s1); 1391 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1392 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1393 } 1394 /* scale the diagonal block */ 1395 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1396 1397 if (rr) { 1398 /* Do a scatter end and then right scale the off-diagonal block */ 1399 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1400 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1401 } 1402 1403 PetscFunctionReturn(0); 1404 } 1405 1406 1407 extern int MatPrintHelp_SeqAIJ(Mat); 1408 #undef __FUNC__ 1409 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1410 int MatPrintHelp_MPIAIJ(Mat A) 1411 { 1412 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1413 int ierr; 1414 1415 PetscFunctionBegin; 1416 if (!a->rank) { 1417 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1418 } 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNC__ 1423 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1424 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1425 { 1426 PetscFunctionBegin; 1427 *bs = 1; 1428 PetscFunctionReturn(0); 1429 } 1430 #undef __FUNC__ 1431 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1432 int MatSetUnfactored_MPIAIJ(Mat A) 1433 { 1434 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1435 int ierr; 1436 1437 PetscFunctionBegin; 1438 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #undef __FUNC__ 1443 #define __FUNC__ "MatEqual_MPIAIJ" 1444 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1445 { 1446 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1447 Mat a, b, c, d; 1448 PetscTruth flg; 1449 int ierr; 1450 1451 PetscFunctionBegin; 1452 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1453 a = matA->A; b = matA->B; 1454 c = matB->A; d = matB->B; 1455 1456 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1457 if (flg == PETSC_TRUE) { 1458 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1459 } 1460 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } 1463 1464 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1465 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1466 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1467 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1468 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,MatGetSubMatrixCall,Mat *); 1469 1470 /* -------------------------------------------------------------------*/ 1471 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1472 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1473 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1474 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1475 0,0, 1476 0,0, 1477 0,0, 1478 MatRelax_MPIAIJ, 1479 MatTranspose_MPIAIJ, 1480 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1481 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1482 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1483 0, 1484 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1485 0,0,0,0, 1486 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1487 0,0, 1488 0,0,MatConvertSameType_MPIAIJ,0,0, 1489 0,0,0, 1490 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1491 MatPrintHelp_MPIAIJ, 1492 MatScale_MPIAIJ,0,0,0, 1493 MatGetBlockSize_MPIAIJ,0,0,0,0, 1494 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1495 0,0,MatGetSubMatrix_MPIAIJ}; 1496 1497 1498 #undef __FUNC__ 1499 #define __FUNC__ "MatCreateMPIAIJ" 1500 /*@C 1501 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1502 (the default parallel PETSc format). For good matrix assembly performance 1503 the user should preallocate the matrix storage by setting the parameters 1504 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1505 performance can be increased by more than a factor of 50. 1506 1507 Input Parameters: 1508 . comm - MPI communicator 1509 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1510 This value should be the same as the local size used in creating the 1511 y vector for the matrix-vector product y = Ax. 1512 . n - This value should be the same as the local size used in creating the 1513 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1514 calculated if N is given) For square matrices n is almost always m. 1515 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1516 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1517 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1518 (same for all local rows) 1519 . d_nzz - array containing the number of nonzeros in the various rows of the 1520 diagonal portion of the local submatrix (possibly different for each row) 1521 or PETSC_NULL. You must leave room for the diagonal entry even if 1522 it is zero. 1523 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1524 submatrix (same for all local rows). 1525 . o_nzz - array containing the number of nonzeros in the various rows of the 1526 off-diagonal portion of the local submatrix (possibly different for 1527 each row) or PETSC_NULL. 1528 1529 Output Parameter: 1530 . A - the matrix 1531 1532 Notes: 1533 The AIJ format (also called the Yale sparse matrix format or 1534 compressed row storage), is fully compatible with standard Fortran 77 1535 storage. That is, the stored row and column indices can begin at 1536 either one (as in Fortran) or zero. See the users manual for details. 1537 1538 The user MUST specify either the local or global matrix dimensions 1539 (possibly both). 1540 1541 By default, this format uses inodes (identical nodes) when possible. 1542 We search for consecutive rows with the same nonzero structure, thereby 1543 reusing matrix information to achieve increased efficiency. 1544 1545 Options Database Keys: 1546 $ -mat_aij_no_inode - Do not use inodes 1547 $ -mat_aij_inode_limit <limit> - Set inode limit. 1548 $ (max limit=5) 1549 $ -mat_aij_oneindex - Internally use indexing starting at 1 1550 $ rather than 0. Note: When calling MatSetValues(), 1551 $ the user still MUST index entries starting at 0! 1552 1553 Storage Information: 1554 For a square global matrix we define each processor's diagonal portion 1555 to be its local rows and the corresponding columns (a square submatrix); 1556 each processor's off-diagonal portion encompasses the remainder of the 1557 local matrix (a rectangular submatrix). 1558 1559 The user can specify preallocated storage for the diagonal part of 1560 the local submatrix with either d_nz or d_nnz (not both). Set 1561 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1562 memory allocation. Likewise, specify preallocated storage for the 1563 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1564 1565 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1566 the figure below we depict these three local rows and all columns (0-11). 1567 1568 $ 0 1 2 3 4 5 6 7 8 9 10 11 1569 $ ------------------- 1570 $ row 3 | o o o d d d o o o o o o 1571 $ row 4 | o o o d d d o o o o o o 1572 $ row 5 | o o o d d d o o o o o o 1573 $ ------------------- 1574 $ 1575 1576 Thus, any entries in the d locations are stored in the d (diagonal) 1577 submatrix, and any entries in the o locations are stored in the 1578 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1579 stored simply in the MATSEQAIJ format for compressed row storage. 1580 1581 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1582 and o_nz should indicate the number of nonzeros per row in the o matrix. 1583 In general, for PDE problems in which most nonzeros are near the diagonal, 1584 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1585 or you will get TERRIBLE performance; see the users' manual chapter on 1586 matrices. 1587 1588 .keywords: matrix, aij, compressed row, sparse, parallel 1589 1590 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1591 @*/ 1592 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1593 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1594 { 1595 Mat B; 1596 Mat_MPIAIJ *b; 1597 int ierr, i,sum[2],work[2],size; 1598 1599 PetscFunctionBegin; 1600 MPI_Comm_size(comm,&size); 1601 if (size == 1) { 1602 if (M == PETSC_DECIDE) M = m; 1603 if (N == PETSC_DECIDE) N = n; 1604 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1605 PetscFunctionReturn(0); 1606 } 1607 1608 *A = 0; 1609 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1610 PLogObjectCreate(B); 1611 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1612 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1613 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1614 B->destroy = MatDestroy_MPIAIJ; 1615 B->view = MatView_MPIAIJ; 1616 B->factor = 0; 1617 B->assembled = PETSC_FALSE; 1618 B->mapping = 0; 1619 1620 B->insertmode = NOT_SET_VALUES; 1621 b->size = size; 1622 MPI_Comm_rank(comm,&b->rank); 1623 1624 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1625 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1626 } 1627 1628 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1629 work[0] = m; work[1] = n; 1630 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1631 if (M == PETSC_DECIDE) M = sum[0]; 1632 if (N == PETSC_DECIDE) N = sum[1]; 1633 } 1634 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1635 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1636 b->m = m; B->m = m; 1637 b->n = n; B->n = n; 1638 b->N = N; B->N = N; 1639 b->M = M; B->M = M; 1640 1641 /* build local table of row and column ownerships */ 1642 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1643 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1644 b->cowners = b->rowners + b->size + 2; 1645 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1646 b->rowners[0] = 0; 1647 for ( i=2; i<=b->size; i++ ) { 1648 b->rowners[i] += b->rowners[i-1]; 1649 } 1650 b->rstart = b->rowners[b->rank]; 1651 b->rend = b->rowners[b->rank+1]; 1652 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1653 b->cowners[0] = 0; 1654 for ( i=2; i<=b->size; i++ ) { 1655 b->cowners[i] += b->cowners[i-1]; 1656 } 1657 b->cstart = b->cowners[b->rank]; 1658 b->cend = b->cowners[b->rank+1]; 1659 1660 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1661 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1662 PLogObjectParent(B,b->A); 1663 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1664 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1665 PLogObjectParent(B,b->B); 1666 1667 /* build cache for off array entries formed */ 1668 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1669 b->donotstash = 0; 1670 b->colmap = 0; 1671 b->garray = 0; 1672 b->roworiented = 1; 1673 1674 /* stuff used for matrix vector multiply */ 1675 b->lvec = 0; 1676 b->Mvctx = 0; 1677 1678 /* stuff for MatGetRow() */ 1679 b->rowindices = 0; 1680 b->rowvalues = 0; 1681 b->getrowactive = PETSC_FALSE; 1682 1683 *A = B; 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNC__ 1688 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1689 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1690 { 1691 Mat mat; 1692 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1693 int ierr, len=0, flg; 1694 1695 PetscFunctionBegin; 1696 *newmat = 0; 1697 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1698 PLogObjectCreate(mat); 1699 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1700 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1701 mat->destroy = MatDestroy_MPIAIJ; 1702 mat->view = MatView_MPIAIJ; 1703 mat->factor = matin->factor; 1704 mat->assembled = PETSC_TRUE; 1705 1706 a->m = mat->m = oldmat->m; 1707 a->n = mat->n = oldmat->n; 1708 a->M = mat->M = oldmat->M; 1709 a->N = mat->N = oldmat->N; 1710 1711 a->rstart = oldmat->rstart; 1712 a->rend = oldmat->rend; 1713 a->cstart = oldmat->cstart; 1714 a->cend = oldmat->cend; 1715 a->size = oldmat->size; 1716 a->rank = oldmat->rank; 1717 mat->insertmode = NOT_SET_VALUES; 1718 a->rowvalues = 0; 1719 a->getrowactive = PETSC_FALSE; 1720 1721 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1722 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1723 a->cowners = a->rowners + a->size + 2; 1724 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1725 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1726 if (oldmat->colmap) { 1727 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1728 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1729 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1730 } else a->colmap = 0; 1731 if (oldmat->garray) { 1732 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1733 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1734 PLogObjectMemory(mat,len*sizeof(int)); 1735 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1736 } else a->garray = 0; 1737 1738 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1739 PLogObjectParent(mat,a->lvec); 1740 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1741 PLogObjectParent(mat,a->Mvctx); 1742 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1743 PLogObjectParent(mat,a->A); 1744 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1745 PLogObjectParent(mat,a->B); 1746 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1747 if (flg) { 1748 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1749 } 1750 *newmat = mat; 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #include "sys.h" 1755 1756 #undef __FUNC__ 1757 #define __FUNC__ "MatLoad_MPIAIJ" 1758 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1759 { 1760 Mat A; 1761 Scalar *vals,*svals; 1762 MPI_Comm comm = ((PetscObject)viewer)->comm; 1763 MPI_Status status; 1764 int i, nz, ierr, j,rstart, rend, fd; 1765 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1766 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1767 int tag = ((PetscObject)viewer)->tag; 1768 1769 PetscFunctionBegin; 1770 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1771 if (!rank) { 1772 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1773 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1774 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1775 if (header[3] < 0) { 1776 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1777 } 1778 } 1779 1780 1781 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1782 M = header[1]; N = header[2]; 1783 /* determine ownership of all rows */ 1784 m = M/size + ((M % size) > rank); 1785 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1786 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1787 rowners[0] = 0; 1788 for ( i=2; i<=size; i++ ) { 1789 rowners[i] += rowners[i-1]; 1790 } 1791 rstart = rowners[rank]; 1792 rend = rowners[rank+1]; 1793 1794 /* distribute row lengths to all processors */ 1795 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1796 offlens = ourlens + (rend-rstart); 1797 if (!rank) { 1798 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1799 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1800 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1801 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1802 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1803 PetscFree(sndcounts); 1804 } else { 1805 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1806 } 1807 1808 if (!rank) { 1809 /* calculate the number of nonzeros on each processor */ 1810 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1811 PetscMemzero(procsnz,size*sizeof(int)); 1812 for ( i=0; i<size; i++ ) { 1813 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1814 procsnz[i] += rowlengths[j]; 1815 } 1816 } 1817 PetscFree(rowlengths); 1818 1819 /* determine max buffer needed and allocate it */ 1820 maxnz = 0; 1821 for ( i=0; i<size; i++ ) { 1822 maxnz = PetscMax(maxnz,procsnz[i]); 1823 } 1824 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1825 1826 /* read in my part of the matrix column indices */ 1827 nz = procsnz[0]; 1828 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1829 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1830 1831 /* read in every one elses and ship off */ 1832 for ( i=1; i<size; i++ ) { 1833 nz = procsnz[i]; 1834 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1835 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1836 } 1837 PetscFree(cols); 1838 } else { 1839 /* determine buffer space needed for message */ 1840 nz = 0; 1841 for ( i=0; i<m; i++ ) { 1842 nz += ourlens[i]; 1843 } 1844 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1845 1846 /* receive message of column indices*/ 1847 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1848 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1849 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1850 } 1851 1852 /* loop over local rows, determining number of off diagonal entries */ 1853 PetscMemzero(offlens,m*sizeof(int)); 1854 jj = 0; 1855 for ( i=0; i<m; i++ ) { 1856 for ( j=0; j<ourlens[i]; j++ ) { 1857 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1858 jj++; 1859 } 1860 } 1861 1862 /* create our matrix */ 1863 for ( i=0; i<m; i++ ) { 1864 ourlens[i] -= offlens[i]; 1865 } 1866 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1867 A = *newmat; 1868 MatSetOption(A,MAT_COLUMNS_SORTED); 1869 for ( i=0; i<m; i++ ) { 1870 ourlens[i] += offlens[i]; 1871 } 1872 1873 if (!rank) { 1874 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1875 1876 /* read in my part of the matrix numerical values */ 1877 nz = procsnz[0]; 1878 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1879 1880 /* insert into matrix */ 1881 jj = rstart; 1882 smycols = mycols; 1883 svals = vals; 1884 for ( i=0; i<m; i++ ) { 1885 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1886 smycols += ourlens[i]; 1887 svals += ourlens[i]; 1888 jj++; 1889 } 1890 1891 /* read in other processors and ship out */ 1892 for ( i=1; i<size; i++ ) { 1893 nz = procsnz[i]; 1894 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1895 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1896 } 1897 PetscFree(procsnz); 1898 } else { 1899 /* receive numeric values */ 1900 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1901 1902 /* receive message of values*/ 1903 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1904 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1905 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1906 1907 /* insert into matrix */ 1908 jj = rstart; 1909 smycols = mycols; 1910 svals = vals; 1911 for ( i=0; i<m; i++ ) { 1912 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1913 smycols += ourlens[i]; 1914 svals += ourlens[i]; 1915 jj++; 1916 } 1917 } 1918 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1919 1920 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1921 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1922 PetscFunctionReturn(0); 1923 } 1924 1925 #undef __FUNC__ 1926 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1927 /* 1928 Not great since it makes two copies of the submatrix, first an SeqAIJ 1929 in local and then by concatenating the local matrices the end result. 1930 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1931 */ 1932 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatGetSubMatrixCall call,Mat *newmat) 1933 { 1934 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1935 Mat *local,M; 1936 Scalar *vwork,*aa; 1937 MPI_Comm comm = mat->comm; 1938 Mat_SeqAIJ *aij; 1939 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 1940 1941 PetscFunctionBegin; 1942 MPI_Comm_rank(comm,&rank); 1943 MPI_Comm_size(comm,&size); 1944 1945 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1946 1947 /* 1948 m - number of local rows 1949 n - number of columns (same on all processors) 1950 rstart - first row in new global matrix generated 1951 */ 1952 ierr = MatGetSize(*local,&m,&n);CHKERRQ(ierr); 1953 if (call == MAT_INITIAL_MATRIX) { 1954 aij = (Mat_SeqAIJ *) (*local)->data; 1955 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 1956 ii = aij->i; 1957 jj = aij->j; 1958 1959 /* 1960 Determine the number of non-zeros in the diagonal and off-diagonal 1961 portions of the matrix in order to do correct preallocation 1962 */ 1963 1964 /* first get start and end of "diagonal" columns */ 1965 nlocal = n/size + ((n % size) > rank); 1966 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1967 rstart = rend - nlocal; 1968 1969 /* next, compute all the lengths */ 1970 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 1971 olens = dlens + m; 1972 for ( i=0; i<m; i++ ) { 1973 jend = ii[i+1] - ii[i]; 1974 olen = 0; 1975 dlen = 0; 1976 for ( j=0; j<jend; j++ ) { 1977 if ( *jj < rstart || *jj >= rend) olen++; 1978 else dlen++; 1979 jj++; 1980 } 1981 olens[i] = olen; 1982 dlens[i] = dlen; 1983 } 1984 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1985 PetscFree(dlens); 1986 } else { 1987 int ml,nl; 1988 1989 M = *newmat; 1990 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1991 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 1992 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1993 } 1994 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 1995 aij = (Mat_SeqAIJ *) (*local)->data; 1996 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 1997 ii = aij->i; 1998 jj = aij->j; 1999 aa = aij->a; 2000 for (i=0; i<m; i++) { 2001 row = rstart + i; 2002 nz = ii[i+1] - ii[i]; 2003 cwork = jj; jj += nz; 2004 vwork = aa; aa += nz; 2005 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2006 } 2007 2008 ierr = MatDestroyMatrices(1,&local);CHKERRQ(ierr); 2009 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2010 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2011 *newmat = M; 2012 PetscFunctionReturn(0); 2013 } 2014