1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: mpiaij.c,v 1.250 1998/06/19 15:53:31 bsmith Exp bsmith $"; 4 #endif 5 6 #include "pinclude/pviewer.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 11 /* local utility routine that creates a mapping from the global column 12 number to the local number in the off-diagonal part of the local 13 storage of the matrix. This is done in a non scable way since the 14 length of colmap equals the global matrix length. 15 */ 16 #undef __FUNC__ 17 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 18 int CreateColmap_MPIAIJ_Private(Mat mat) 19 { 20 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 21 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 22 int n = B->n,i; 23 24 PetscFunctionBegin; 25 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 26 PLogObjectMemory(mat,aij->N*sizeof(int)); 27 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 28 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 29 PetscFunctionReturn(0); 30 } 31 32 extern int DisAssemble_MPIAIJ(Mat); 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for ( _i=0; _i<nrow; _i++ ) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 61 Scalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 67 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 68 new_j = (int *) (new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 73 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int)); \ 78 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 79 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(Scalar)); \ 81 /* free up old matrix storage */ \ 82 \ 83 PetscFree(a->a); \ 84 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = 1; \ 87 \ 88 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 89 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for ( ii=N; ii>=_i; ii-- ) { \ 97 rp[ii+1] = rp[ii]; \ 98 ap[ii+1] = ap[ii]; \ 99 } \ 100 rp[_i] = col1; \ 101 ap[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 \ 109 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 110 rmax = bimax[row]; nrow = bilen[row]; \ 111 col1 = col - shift; \ 112 \ 113 low = 0; high = nrow; \ 114 while (high-low > 5) { \ 115 t = (low+high)/2; \ 116 if (rp[t] > col) high = t; \ 117 else low = t; \ 118 } \ 119 for ( _i=0; _i<nrow; _i++ ) { \ 120 if (rp[_i] > col1) break; \ 121 if (rp[_i] == col1) { \ 122 if (addv == ADD_VALUES) ap[_i] += value; \ 123 else ap[_i] = value; \ 124 goto b_noinsert; \ 125 } \ 126 } \ 127 if (nonew == 1) goto b_noinsert; \ 128 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 129 if (nrow >= rmax) { \ 130 /* there is no extra room in row, therefore enlarge */ \ 131 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 132 Scalar *new_a; \ 133 \ 134 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 135 \ 136 /* malloc new storage space */ \ 137 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 138 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 139 new_j = (int *) (new_a + new_nz); \ 140 new_i = new_j + new_nz; \ 141 \ 142 /* copy over old data into new slots */ \ 143 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 144 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 145 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 146 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 147 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 148 len*sizeof(int)); \ 149 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 150 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 151 len*sizeof(Scalar)); \ 152 /* free up old matrix storage */ \ 153 \ 154 PetscFree(b->a); \ 155 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 156 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 157 b->singlemalloc = 1; \ 158 \ 159 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 160 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 161 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 162 b->maxnz += CHUNKSIZE; \ 163 b->reallocs++; \ 164 } \ 165 N = nrow++ - 1; b->nz++; \ 166 /* shift up all the later entries in this row */ \ 167 for ( ii=N; ii>=_i; ii-- ) { \ 168 rp[ii+1] = rp[ii]; \ 169 ap[ii+1] = ap[ii]; \ 170 } \ 171 rp[_i] = col1; \ 172 ap[_i] = value; \ 173 b_noinsert: ; \ 174 bilen[row] = nrow; \ 175 } 176 177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 178 #undef __FUNC__ 179 #define __FUNC__ "MatSetValues_MPIAIJ" 180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 181 { 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 183 Scalar value; 184 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 185 int cstart = aij->cstart, cend = aij->cend,row,col; 186 int roworiented = aij->roworiented; 187 188 /* Some Variables required in the macro */ 189 Mat A = aij->A; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 191 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 192 Scalar *aa = a->a; 193 194 Mat B = aij->B; 195 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 196 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 197 Scalar *ba = b->a; 198 199 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 200 int nonew = a->nonew,shift = a->indexshift; 201 Scalar *ap; 202 203 PetscFunctionBegin; 204 for ( i=0; i<m; i++ ) { 205 #if defined(USE_PETSC_BOPT_g) 206 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 207 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 208 #endif 209 if (im[i] >= rstart && im[i] < rend) { 210 row = im[i] - rstart; 211 for ( j=0; j<n; j++ ) { 212 if (in[j] >= cstart && in[j] < cend){ 213 col = in[j] - cstart; 214 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 215 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 216 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 217 } 218 #if defined(USE_PETSC_BOPT_g) 219 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 220 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 221 #endif 222 else { 223 if (mat->was_assembled) { 224 if (!aij->colmap) { 225 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 226 } 227 col = aij->colmap[in[j]] - 1; 228 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 229 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 232 B = aij->B; 233 b = (Mat_SeqAIJ *) B->data; 234 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 235 ba = b->a; 236 } 237 } else col = in[j]; 238 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 240 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 241 } 242 } 243 } 244 else { 245 if (roworiented && !aij->donotstash) { 246 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 247 } 248 else { 249 if (!aij->donotstash) { 250 row = im[i]; 251 for ( j=0; j<n; j++ ) { 252 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 253 } 254 } 255 } 256 } 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNC__ 262 #define __FUNC__ "MatGetValues_MPIAIJ" 263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 264 { 265 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 266 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 267 int cstart = aij->cstart, cend = aij->cend,row,col; 268 269 PetscFunctionBegin; 270 for ( i=0; i<m; i++ ) { 271 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 272 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 273 if (idxm[i] >= rstart && idxm[i] < rend) { 274 row = idxm[i] - rstart; 275 for ( j=0; j<n; j++ ) { 276 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 277 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 278 if (idxn[j] >= cstart && idxn[j] < cend){ 279 col = idxn[j] - cstart; 280 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281 } 282 else { 283 if (!aij->colmap) { 284 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 285 } 286 col = aij->colmap[idxn[j]] - 1; 287 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 288 else { 289 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290 } 291 } 292 } 293 } else { 294 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 295 } 296 } 297 PetscFunctionReturn(0); 298 } 299 300 #undef __FUNC__ 301 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 302 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 303 { 304 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 305 MPI_Comm comm = mat->comm; 306 int size = aij->size, *owners = aij->rowners; 307 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 308 MPI_Request *send_waits,*recv_waits; 309 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 310 InsertMode addv; 311 Scalar *rvalues,*svalues; 312 313 PetscFunctionBegin; 314 if (aij->donotstash) { 315 aij->svalues = 0; aij->rvalues = 0; 316 aij->nsends = 0; aij->nrecvs = 0; 317 aij->send_waits = 0; aij->recv_waits = 0; 318 aij->rmax = 0; 319 PetscFunctionReturn(0); 320 } 321 322 /* make sure all processors are either in INSERTMODE or ADDMODE */ 323 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 324 if (addv == (ADD_VALUES|INSERT_VALUES)) { 325 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 326 } 327 mat->insertmode = addv; /* in case this processor had no cache */ 328 329 /* first count number of contributors to each processor */ 330 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 331 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 332 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 333 for ( i=0; i<aij->stash.n; i++ ) { 334 idx = aij->stash.idx[i]; 335 for ( j=0; j<size; j++ ) { 336 if (idx >= owners[j] && idx < owners[j+1]) { 337 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 338 } 339 } 340 } 341 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 342 343 /* inform other processors of number of messages and max length*/ 344 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 345 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 346 nreceives = work[rank]; 347 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 348 nmax = work[rank]; 349 PetscFree(work); 350 351 /* post receives: 352 1) each message will consist of ordered pairs 353 (global index,value) we store the global index as a double 354 to simplify the message passing. 355 2) since we don't know how long each individual message is we 356 allocate the largest needed buffer for each receive. Potentially 357 this is a lot of wasted space. 358 359 360 This could be done better. 361 */ 362 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 363 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 364 for ( i=0; i<nreceives; i++ ) { 365 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 366 comm,recv_waits+i);CHKERRQ(ierr); 367 } 368 369 /* do sends: 370 1) starts[i] gives the starting index in svalues for stuff going to 371 the ith processor 372 */ 373 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 374 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 375 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 376 starts[0] = 0; 377 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 378 for ( i=0; i<aij->stash.n; i++ ) { 379 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 380 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 381 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 382 } 383 PetscFree(owner); 384 starts[0] = 0; 385 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 386 count = 0; 387 for ( i=0; i<size; i++ ) { 388 if (procs[i]) { 389 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 390 } 391 } 392 PetscFree(starts); 393 PetscFree(nprocs); 394 395 /* Free cache space */ 396 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 397 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 398 399 aij->svalues = svalues; aij->rvalues = rvalues; 400 aij->nsends = nsends; aij->nrecvs = nreceives; 401 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 402 aij->rmax = nmax; 403 404 PetscFunctionReturn(0); 405 } 406 extern int MatSetUpMultiply_MPIAIJ(Mat); 407 408 #undef __FUNC__ 409 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 410 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 413 MPI_Status *send_status,recv_status; 414 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 415 int row,col,other_disassembled; 416 Scalar *values,val; 417 InsertMode addv = mat->insertmode; 418 419 PetscFunctionBegin; 420 421 /* wait on receives */ 422 while (count) { 423 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 424 /* unpack receives into our local space */ 425 values = aij->rvalues + 3*imdex*aij->rmax; 426 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 427 n = n/3; 428 for ( i=0; i<n; i++ ) { 429 row = (int) PetscReal(values[3*i]) - aij->rstart; 430 col = (int) PetscReal(values[3*i+1]); 431 val = values[3*i+2]; 432 if (col >= aij->cstart && col < aij->cend) { 433 col -= aij->cstart; 434 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 435 } else { 436 if (mat->was_assembled || mat->assembled) { 437 if (!aij->colmap) { 438 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 439 } 440 col = aij->colmap[col] - 1; 441 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 442 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 443 col = (int) PetscReal(values[3*i+1]); 444 } 445 } 446 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 447 } 448 } 449 count--; 450 } 451 if (aij->recv_waits) PetscFree(aij->recv_waits); 452 if (aij->rvalues) PetscFree(aij->rvalues); 453 454 /* wait on sends */ 455 if (aij->nsends) { 456 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 457 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 458 PetscFree(send_status); 459 } 460 if (aij->send_waits) PetscFree(aij->send_waits); 461 if (aij->svalues) PetscFree(aij->svalues); 462 463 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 464 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 465 466 /* determine if any processor has disassembled, if so we must 467 also disassemble ourselfs, in order that we may reassemble. */ 468 /* 469 if nonzero structure of submatrix B cannot change then we know that 470 no processor disassembled thus we can skip this stuff 471 */ 472 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 473 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 474 if (mat->was_assembled && !other_disassembled) { 475 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 476 } 477 } 478 479 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 480 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 481 } 482 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 483 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 484 485 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 486 PetscFunctionReturn(0); 487 } 488 489 #undef __FUNC__ 490 #define __FUNC__ "MatZeroEntries_MPIAIJ" 491 int MatZeroEntries_MPIAIJ(Mat A) 492 { 493 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 494 int ierr; 495 496 PetscFunctionBegin; 497 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 498 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #undef __FUNC__ 503 #define __FUNC__ "MatZeroRows_MPIAIJ" 504 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 505 { 506 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 507 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 508 int *procs,*nprocs,j,found,idx,nsends,*work,row; 509 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 510 int *rvalues,tag = A->tag,count,base,slen,n,*source; 511 int *lens,imdex,*lrows,*values,rstart=l->rstart; 512 MPI_Comm comm = A->comm; 513 MPI_Request *send_waits,*recv_waits; 514 MPI_Status recv_status,*send_status; 515 IS istmp; 516 PetscTruth localdiag; 517 518 PetscFunctionBegin; 519 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 520 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 521 522 /* first count number of contributors to each processor */ 523 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 524 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 525 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 526 for ( i=0; i<N; i++ ) { 527 idx = rows[i]; 528 found = 0; 529 for ( j=0; j<size; j++ ) { 530 if (idx >= owners[j] && idx < owners[j+1]) { 531 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 532 } 533 } 534 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 535 } 536 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 537 538 /* inform other processors of number of messages and max length*/ 539 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 540 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 541 nrecvs = work[rank]; 542 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 543 nmax = work[rank]; 544 PetscFree(work); 545 546 /* post receives: */ 547 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 548 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 549 for ( i=0; i<nrecvs; i++ ) { 550 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 551 } 552 553 /* do sends: 554 1) starts[i] gives the starting index in svalues for stuff going to 555 the ith processor 556 */ 557 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 558 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 559 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 560 starts[0] = 0; 561 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 562 for ( i=0; i<N; i++ ) { 563 svalues[starts[owner[i]]++] = rows[i]; 564 } 565 ISRestoreIndices(is,&rows); 566 567 starts[0] = 0; 568 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 569 count = 0; 570 for ( i=0; i<size; i++ ) { 571 if (procs[i]) { 572 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 573 } 574 } 575 PetscFree(starts); 576 577 base = owners[rank]; 578 579 /* wait on receives */ 580 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 581 source = lens + nrecvs; 582 count = nrecvs; slen = 0; 583 while (count) { 584 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 585 /* unpack receives into our local space */ 586 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 587 source[imdex] = recv_status.MPI_SOURCE; 588 lens[imdex] = n; 589 slen += n; 590 count--; 591 } 592 PetscFree(recv_waits); 593 594 /* move the data into the send scatter */ 595 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 596 count = 0; 597 for ( i=0; i<nrecvs; i++ ) { 598 values = rvalues + i*nmax; 599 for ( j=0; j<lens[i]; j++ ) { 600 lrows[count++] = values[j] - base; 601 } 602 } 603 PetscFree(rvalues); PetscFree(lens); 604 PetscFree(owner); PetscFree(nprocs); 605 606 /* actually zap the local rows */ 607 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 608 PLogObjectParent(A,istmp); 609 610 /* 611 Zero the required rows. If the "diagonal block" of the matrix 612 is square and the user wishes to set the diagonal we use seperate 613 code so that MatSetValues() is not called for each diagonal allocating 614 new memory, thus calling lots of mallocs and slowing things down. 615 616 Contributed by: Mathew Knepley 617 */ 618 localdiag = PETSC_FALSE; 619 if (diag && (l->A->M == l->A->N)) { 620 localdiag = PETSC_TRUE; 621 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 622 } else { 623 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 624 } 625 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 626 ierr = ISDestroy(istmp); CHKERRQ(ierr); 627 628 if (diag && (localdiag == PETSC_FALSE)) { 629 for ( i = 0; i < slen; i++ ) { 630 row = lrows[i] + rstart; 631 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 632 } 633 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 634 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 635 } 636 637 if (diag) { 638 for ( i = 0; i < slen; i++ ) { 639 row = lrows[i] + rstart; 640 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 641 } 642 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 643 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 644 } 645 PetscFree(lrows); 646 647 /* wait on sends */ 648 if (nsends) { 649 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 650 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 651 PetscFree(send_status); 652 } 653 PetscFree(send_waits); PetscFree(svalues); 654 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNC__ 659 #define __FUNC__ "MatMult_MPIAIJ" 660 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 661 { 662 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 663 int ierr,nt; 664 665 PetscFunctionBegin; 666 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 667 if (nt != a->n) { 668 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 669 } 670 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 671 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 672 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 673 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 674 PetscFunctionReturn(0); 675 } 676 677 #undef __FUNC__ 678 #define __FUNC__ "MatMultAdd_MPIAIJ" 679 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 680 { 681 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 682 int ierr; 683 684 PetscFunctionBegin; 685 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 686 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 687 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 688 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 692 #undef __FUNC__ 693 #define __FUNC__ "MatMultTrans_MPIAIJ" 694 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 695 { 696 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 697 int ierr; 698 699 PetscFunctionBegin; 700 /* do nondiagonal part */ 701 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 702 /* send it on its way */ 703 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 704 /* do local part */ 705 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 706 /* receive remote parts: note this assumes the values are not actually */ 707 /* inserted in yy until the next line, which is true for my implementation*/ 708 /* but is not perhaps always true. */ 709 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 713 #undef __FUNC__ 714 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 715 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 716 { 717 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 718 int ierr; 719 720 PetscFunctionBegin; 721 /* do nondiagonal part */ 722 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 723 /* send it on its way */ 724 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 725 /* do local part */ 726 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 727 /* receive remote parts: note this assumes the values are not actually */ 728 /* inserted in yy until the next line, which is true for my implementation*/ 729 /* but is not perhaps always true. */ 730 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } 733 734 /* 735 This only works correctly for square matrices where the subblock A->A is the 736 diagonal block 737 */ 738 #undef __FUNC__ 739 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 740 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 741 { 742 int ierr; 743 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 744 745 PetscFunctionBegin; 746 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 747 if (a->rstart != a->cstart || a->rend != a->cend) { 748 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 749 } 750 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 751 PetscFunctionReturn(0); 752 } 753 754 #undef __FUNC__ 755 #define __FUNC__ "MatScale_MPIAIJ" 756 int MatScale_MPIAIJ(Scalar *aa,Mat A) 757 { 758 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 759 int ierr; 760 761 PetscFunctionBegin; 762 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 763 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 #undef __FUNC__ 768 #define __FUNC__ "MatDestroy_MPIAIJ" 769 int MatDestroy_MPIAIJ(Mat mat) 770 { 771 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 772 int ierr; 773 774 PetscFunctionBegin; 775 if (--mat->refct > 0) PetscFunctionReturn(0); 776 777 if (mat->mapping) { 778 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 779 } 780 if (mat->bmapping) { 781 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 782 } 783 #if defined(USE_PETSC_LOG) 784 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 785 #endif 786 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 787 PetscFree(aij->rowners); 788 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 789 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 790 if (aij->colmap) PetscFree(aij->colmap); 791 if (aij->garray) PetscFree(aij->garray); 792 if (aij->lvec) VecDestroy(aij->lvec); 793 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 794 if (aij->rowvalues) PetscFree(aij->rowvalues); 795 PetscFree(aij); 796 PLogObjectDestroy(mat); 797 PetscHeaderDestroy(mat); 798 PetscFunctionReturn(0); 799 } 800 801 #undef __FUNC__ 802 #define __FUNC__ "MatView_MPIAIJ_Binary" 803 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 804 { 805 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 806 int ierr; 807 808 PetscFunctionBegin; 809 if (aij->size == 1) { 810 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 811 } 812 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 813 PetscFunctionReturn(0); 814 } 815 816 #undef __FUNC__ 817 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 818 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 819 { 820 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 821 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 822 int ierr, format,shift = C->indexshift,rank; 823 FILE *fd; 824 ViewerType vtype; 825 826 PetscFunctionBegin; 827 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 828 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 829 ierr = ViewerGetFormat(viewer,&format); 830 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 831 MatInfo info; 832 int flg; 833 MPI_Comm_rank(mat->comm,&rank); 834 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 835 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 836 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 837 PetscSequentialPhaseBegin(mat->comm,1); 838 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 839 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 840 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 841 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 842 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 843 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 844 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 845 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 846 fflush(fd); 847 PetscSequentialPhaseEnd(mat->comm,1); 848 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 849 PetscFunctionReturn(0); 850 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 851 PetscFunctionReturn(0); 852 } 853 } 854 855 if (vtype == DRAW_VIEWER) { 856 Draw draw; 857 PetscTruth isnull; 858 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 859 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 860 } 861 862 if (vtype == ASCII_FILE_VIEWER) { 863 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 864 PetscSequentialPhaseBegin(mat->comm,1); 865 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 866 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 867 aij->cend); 868 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 869 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 870 fflush(fd); 871 PetscSequentialPhaseEnd(mat->comm,1); 872 } else { 873 int size = aij->size; 874 rank = aij->rank; 875 if (size == 1) { 876 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 877 } else { 878 /* assemble the entire matrix onto first processor. */ 879 Mat A; 880 Mat_SeqAIJ *Aloc; 881 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 882 Scalar *a; 883 884 if (!rank) { 885 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 886 } else { 887 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 888 } 889 PLogObjectParent(mat,A); 890 891 /* copy over the A part */ 892 Aloc = (Mat_SeqAIJ*) aij->A->data; 893 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 894 row = aij->rstart; 895 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 896 for ( i=0; i<m; i++ ) { 897 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 898 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 899 } 900 aj = Aloc->j; 901 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 902 903 /* copy over the B part */ 904 Aloc = (Mat_SeqAIJ*) aij->B->data; 905 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 906 row = aij->rstart; 907 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 908 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 909 for ( i=0; i<m; i++ ) { 910 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 911 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 912 } 913 PetscFree(ct); 914 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 915 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 916 /* 917 Everyone has to call to draw the matrix since the graphics waits are 918 synchronized across all processors that share the Draw object 919 */ 920 if (!rank || vtype == DRAW_VIEWER) { 921 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 922 } 923 ierr = MatDestroy(A); CHKERRQ(ierr); 924 } 925 } 926 PetscFunctionReturn(0); 927 } 928 929 #undef __FUNC__ 930 #define __FUNC__ "MatView_MPIAIJ" 931 int MatView_MPIAIJ(Mat mat,Viewer viewer) 932 { 933 int ierr; 934 ViewerType vtype; 935 936 PetscFunctionBegin; 937 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 938 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 939 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 940 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 941 } else if (vtype == BINARY_FILE_VIEWER) { 942 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 943 } else { 944 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 945 } 946 PetscFunctionReturn(0); 947 } 948 949 /* 950 This has to provide several versions. 951 952 2) a) use only local smoothing updating outer values only once. 953 b) local smoothing updating outer values each inner iteration 954 3) color updating out values betwen colors. 955 */ 956 #undef __FUNC__ 957 #define __FUNC__ "MatRelax_MPIAIJ" 958 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 959 double fshift,int its,Vec xx) 960 { 961 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 962 Mat AA = mat->A, BB = mat->B; 963 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 964 Scalar *b,*x,*xs,*ls,d,*v,sum; 965 int ierr,*idx, *diag; 966 int n = mat->n, m = mat->m, i,shift = A->indexshift; 967 968 PetscFunctionBegin; 969 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 970 xs = x + shift; /* shift by one for index start of 1 */ 971 ls = ls + shift; 972 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 973 diag = A->diag; 974 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 975 if (flag & SOR_ZERO_INITIAL_GUESS) { 976 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 977 PetscFunctionReturn(0); 978 } 979 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 980 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 981 while (its--) { 982 /* go down through the rows */ 983 for ( i=0; i<m; i++ ) { 984 n = A->i[i+1] - A->i[i]; 985 idx = A->j + A->i[i] + shift; 986 v = A->a + A->i[i] + shift; 987 sum = b[i]; 988 SPARSEDENSEMDOT(sum,xs,v,idx,n); 989 d = fshift + A->a[diag[i]+shift]; 990 n = B->i[i+1] - B->i[i]; 991 idx = B->j + B->i[i] + shift; 992 v = B->a + B->i[i] + shift; 993 SPARSEDENSEMDOT(sum,ls,v,idx,n); 994 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 995 } 996 /* come up through the rows */ 997 for ( i=m-1; i>-1; i-- ) { 998 n = A->i[i+1] - A->i[i]; 999 idx = A->j + A->i[i] + shift; 1000 v = A->a + A->i[i] + shift; 1001 sum = b[i]; 1002 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1003 d = fshift + A->a[diag[i]+shift]; 1004 n = B->i[i+1] - B->i[i]; 1005 idx = B->j + B->i[i] + shift; 1006 v = B->a + B->i[i] + shift; 1007 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1008 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1009 } 1010 } 1011 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1012 if (flag & SOR_ZERO_INITIAL_GUESS) { 1013 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1014 PetscFunctionReturn(0); 1015 } 1016 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1017 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1018 while (its--) { 1019 for ( i=0; i<m; i++ ) { 1020 n = A->i[i+1] - A->i[i]; 1021 idx = A->j + A->i[i] + shift; 1022 v = A->a + A->i[i] + shift; 1023 sum = b[i]; 1024 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1025 d = fshift + A->a[diag[i]+shift]; 1026 n = B->i[i+1] - B->i[i]; 1027 idx = B->j + B->i[i] + shift; 1028 v = B->a + B->i[i] + shift; 1029 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1030 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1031 } 1032 } 1033 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1034 if (flag & SOR_ZERO_INITIAL_GUESS) { 1035 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1039 mat->Mvctx); CHKERRQ(ierr); 1040 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1041 mat->Mvctx); CHKERRQ(ierr); 1042 while (its--) { 1043 for ( i=m-1; i>-1; i-- ) { 1044 n = A->i[i+1] - A->i[i]; 1045 idx = A->j + A->i[i] + shift; 1046 v = A->a + A->i[i] + shift; 1047 sum = b[i]; 1048 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1049 d = fshift + A->a[diag[i]+shift]; 1050 n = B->i[i+1] - B->i[i]; 1051 idx = B->j + B->i[i] + shift; 1052 v = B->a + B->i[i] + shift; 1053 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1054 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1055 } 1056 } 1057 } else { 1058 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1059 } 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNC__ 1064 #define __FUNC__ "MatGetInfo_MPIAIJ" 1065 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1066 { 1067 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1068 Mat A = mat->A, B = mat->B; 1069 int ierr; 1070 double isend[5], irecv[5]; 1071 1072 PetscFunctionBegin; 1073 info->block_size = 1.0; 1074 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1075 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1076 isend[3] = info->memory; isend[4] = info->mallocs; 1077 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1078 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1079 isend[3] += info->memory; isend[4] += info->mallocs; 1080 if (flag == MAT_LOCAL) { 1081 info->nz_used = isend[0]; 1082 info->nz_allocated = isend[1]; 1083 info->nz_unneeded = isend[2]; 1084 info->memory = isend[3]; 1085 info->mallocs = isend[4]; 1086 } else if (flag == MAT_GLOBAL_MAX) { 1087 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1088 info->nz_used = irecv[0]; 1089 info->nz_allocated = irecv[1]; 1090 info->nz_unneeded = irecv[2]; 1091 info->memory = irecv[3]; 1092 info->mallocs = irecv[4]; 1093 } else if (flag == MAT_GLOBAL_SUM) { 1094 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1095 info->nz_used = irecv[0]; 1096 info->nz_allocated = irecv[1]; 1097 info->nz_unneeded = irecv[2]; 1098 info->memory = irecv[3]; 1099 info->mallocs = irecv[4]; 1100 } 1101 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1102 info->fill_ratio_needed = 0; 1103 info->factor_mallocs = 0; 1104 info->rows_global = (double)mat->M; 1105 info->columns_global = (double)mat->N; 1106 info->rows_local = (double)mat->m; 1107 info->columns_local = (double)mat->N; 1108 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNC__ 1113 #define __FUNC__ "MatSetOption_MPIAIJ" 1114 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1115 { 1116 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1117 1118 PetscFunctionBegin; 1119 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1120 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1121 op == MAT_COLUMNS_UNSORTED || 1122 op == MAT_COLUMNS_SORTED || 1123 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1124 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1125 MatSetOption(a->A,op); 1126 MatSetOption(a->B,op); 1127 } else if (op == MAT_ROW_ORIENTED) { 1128 a->roworiented = 1; 1129 MatSetOption(a->A,op); 1130 MatSetOption(a->B,op); 1131 } else if (op == MAT_ROWS_SORTED || 1132 op == MAT_ROWS_UNSORTED || 1133 op == MAT_SYMMETRIC || 1134 op == MAT_STRUCTURALLY_SYMMETRIC || 1135 op == MAT_YES_NEW_DIAGONALS) 1136 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1137 else if (op == MAT_COLUMN_ORIENTED) { 1138 a->roworiented = 0; 1139 MatSetOption(a->A,op); 1140 MatSetOption(a->B,op); 1141 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1142 a->donotstash = 1; 1143 } else if (op == MAT_NO_NEW_DIAGONALS){ 1144 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1145 } else { 1146 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1147 } 1148 PetscFunctionReturn(0); 1149 } 1150 1151 #undef __FUNC__ 1152 #define __FUNC__ "MatGetSize_MPIAIJ" 1153 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1154 { 1155 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1156 1157 PetscFunctionBegin; 1158 if (m) *m = mat->M; 1159 if (n) *n = mat->N; 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #undef __FUNC__ 1164 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1165 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1166 { 1167 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1168 1169 PetscFunctionBegin; 1170 if (m) *m = mat->m; 1171 if (n) *n = mat->n; 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNC__ 1176 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1177 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1178 { 1179 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1180 1181 PetscFunctionBegin; 1182 *m = mat->rstart; *n = mat->rend; 1183 PetscFunctionReturn(0); 1184 } 1185 1186 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1187 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1188 1189 #undef __FUNC__ 1190 #define __FUNC__ "MatGetRow_MPIAIJ" 1191 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1192 { 1193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1194 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1195 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1196 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1197 int *cmap, *idx_p; 1198 1199 PetscFunctionBegin; 1200 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1201 mat->getrowactive = PETSC_TRUE; 1202 1203 if (!mat->rowvalues && (idx || v)) { 1204 /* 1205 allocate enough space to hold information from the longest row. 1206 */ 1207 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1208 int max = 1,m = mat->m,tmp; 1209 for ( i=0; i<m; i++ ) { 1210 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1211 if (max < tmp) { max = tmp; } 1212 } 1213 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1214 CHKPTRQ(mat->rowvalues); 1215 mat->rowindices = (int *) (mat->rowvalues + max); 1216 } 1217 1218 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1219 lrow = row - rstart; 1220 1221 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1222 if (!v) {pvA = 0; pvB = 0;} 1223 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1224 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1225 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1226 nztot = nzA + nzB; 1227 1228 cmap = mat->garray; 1229 if (v || idx) { 1230 if (nztot) { 1231 /* Sort by increasing column numbers, assuming A and B already sorted */ 1232 int imark = -1; 1233 if (v) { 1234 *v = v_p = mat->rowvalues; 1235 for ( i=0; i<nzB; i++ ) { 1236 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1237 else break; 1238 } 1239 imark = i; 1240 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1241 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1242 } 1243 if (idx) { 1244 *idx = idx_p = mat->rowindices; 1245 if (imark > -1) { 1246 for ( i=0; i<imark; i++ ) { 1247 idx_p[i] = cmap[cworkB[i]]; 1248 } 1249 } else { 1250 for ( i=0; i<nzB; i++ ) { 1251 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1252 else break; 1253 } 1254 imark = i; 1255 } 1256 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1257 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1258 } 1259 } 1260 else { 1261 if (idx) *idx = 0; 1262 if (v) *v = 0; 1263 } 1264 } 1265 *nz = nztot; 1266 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1267 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNC__ 1272 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1273 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1274 { 1275 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1276 1277 PetscFunctionBegin; 1278 if (aij->getrowactive == PETSC_FALSE) { 1279 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1280 } 1281 aij->getrowactive = PETSC_FALSE; 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNC__ 1286 #define __FUNC__ "MatNorm_MPIAIJ" 1287 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1288 { 1289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1290 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1291 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1292 double sum = 0.0; 1293 Scalar *v; 1294 1295 PetscFunctionBegin; 1296 if (aij->size == 1) { 1297 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1298 } else { 1299 if (type == NORM_FROBENIUS) { 1300 v = amat->a; 1301 for (i=0; i<amat->nz; i++ ) { 1302 #if defined(USE_PETSC_COMPLEX) 1303 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1304 #else 1305 sum += (*v)*(*v); v++; 1306 #endif 1307 } 1308 v = bmat->a; 1309 for (i=0; i<bmat->nz; i++ ) { 1310 #if defined(USE_PETSC_COMPLEX) 1311 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1312 #else 1313 sum += (*v)*(*v); v++; 1314 #endif 1315 } 1316 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1317 *norm = sqrt(*norm); 1318 } else if (type == NORM_1) { /* max column norm */ 1319 double *tmp, *tmp2; 1320 int *jj, *garray = aij->garray; 1321 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1322 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1323 PetscMemzero(tmp,aij->N*sizeof(double)); 1324 *norm = 0.0; 1325 v = amat->a; jj = amat->j; 1326 for ( j=0; j<amat->nz; j++ ) { 1327 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1328 } 1329 v = bmat->a; jj = bmat->j; 1330 for ( j=0; j<bmat->nz; j++ ) { 1331 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1332 } 1333 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1334 for ( j=0; j<aij->N; j++ ) { 1335 if (tmp2[j] > *norm) *norm = tmp2[j]; 1336 } 1337 PetscFree(tmp); PetscFree(tmp2); 1338 } else if (type == NORM_INFINITY) { /* max row norm */ 1339 double ntemp = 0.0; 1340 for ( j=0; j<amat->m; j++ ) { 1341 v = amat->a + amat->i[j] + shift; 1342 sum = 0.0; 1343 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1344 sum += PetscAbsScalar(*v); v++; 1345 } 1346 v = bmat->a + bmat->i[j] + shift; 1347 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1348 sum += PetscAbsScalar(*v); v++; 1349 } 1350 if (sum > ntemp) ntemp = sum; 1351 } 1352 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1353 } else { 1354 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1355 } 1356 } 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNC__ 1361 #define __FUNC__ "MatTranspose_MPIAIJ" 1362 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1363 { 1364 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1365 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1366 int ierr,shift = Aloc->indexshift; 1367 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1368 Mat B; 1369 Scalar *array; 1370 1371 PetscFunctionBegin; 1372 if (matout == PETSC_NULL && M != N) { 1373 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1374 } 1375 1376 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1377 1378 /* copy over the A part */ 1379 Aloc = (Mat_SeqAIJ*) a->A->data; 1380 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1381 row = a->rstart; 1382 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1383 for ( i=0; i<m; i++ ) { 1384 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1385 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1386 } 1387 aj = Aloc->j; 1388 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1389 1390 /* copy over the B part */ 1391 Aloc = (Mat_SeqAIJ*) a->B->data; 1392 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1393 row = a->rstart; 1394 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1395 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1396 for ( i=0; i<m; i++ ) { 1397 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1398 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1399 } 1400 PetscFree(ct); 1401 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1402 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1403 if (matout != PETSC_NULL) { 1404 *matout = B; 1405 } else { 1406 PetscOps *Abops; 1407 struct _MatOps *Aops; 1408 1409 /* This isn't really an in-place transpose .... but free data structures from a */ 1410 PetscFree(a->rowners); 1411 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1412 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1413 if (a->colmap) PetscFree(a->colmap); 1414 if (a->garray) PetscFree(a->garray); 1415 if (a->lvec) VecDestroy(a->lvec); 1416 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1417 PetscFree(a); 1418 1419 /* 1420 This is horrible, horrible code. We need to keep the 1421 A pointers for the bops and ops but copy everything 1422 else from C. 1423 */ 1424 Abops = A->bops; 1425 Aops = A->ops; 1426 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1427 A->bops = Abops; 1428 A->ops = Aops; 1429 PetscHeaderDestroy(B); 1430 } 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNC__ 1435 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1436 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1437 { 1438 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1439 Mat a = aij->A, b = aij->B; 1440 int ierr,s1,s2,s3; 1441 1442 PetscFunctionBegin; 1443 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1444 if (rr) { 1445 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1446 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1447 /* Overlap communication with computation. */ 1448 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1449 } 1450 if (ll) { 1451 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1452 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1453 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1454 } 1455 /* scale the diagonal block */ 1456 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1457 1458 if (rr) { 1459 /* Do a scatter end and then right scale the off-diagonal block */ 1460 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1461 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1462 } 1463 1464 PetscFunctionReturn(0); 1465 } 1466 1467 1468 extern int MatPrintHelp_SeqAIJ(Mat); 1469 #undef __FUNC__ 1470 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1471 int MatPrintHelp_MPIAIJ(Mat A) 1472 { 1473 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1474 int ierr; 1475 1476 PetscFunctionBegin; 1477 if (!a->rank) { 1478 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1479 } 1480 PetscFunctionReturn(0); 1481 } 1482 1483 #undef __FUNC__ 1484 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1485 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1486 { 1487 PetscFunctionBegin; 1488 *bs = 1; 1489 PetscFunctionReturn(0); 1490 } 1491 #undef __FUNC__ 1492 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1493 int MatSetUnfactored_MPIAIJ(Mat A) 1494 { 1495 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1496 int ierr; 1497 1498 PetscFunctionBegin; 1499 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNC__ 1504 #define __FUNC__ "MatEqual_MPIAIJ" 1505 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1506 { 1507 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1508 Mat a, b, c, d; 1509 PetscTruth flg; 1510 int ierr; 1511 1512 PetscFunctionBegin; 1513 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1514 a = matA->A; b = matA->B; 1515 c = matB->A; d = matB->B; 1516 1517 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1518 if (flg == PETSC_TRUE) { 1519 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1520 } 1521 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1522 PetscFunctionReturn(0); 1523 } 1524 1525 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1526 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1527 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1528 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1529 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 1530 1531 /* -------------------------------------------------------------------*/ 1532 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1533 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1534 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1535 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1536 0,0, 1537 0,0, 1538 0,0, 1539 MatRelax_MPIAIJ, 1540 MatTranspose_MPIAIJ, 1541 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1542 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1543 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1544 0, 1545 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1546 0,0,0,0, 1547 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1548 0,0, 1549 0,0,MatConvertSameType_MPIAIJ,0,0, 1550 0,0,0, 1551 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1552 MatPrintHelp_MPIAIJ, 1553 MatScale_MPIAIJ,0,0,0, 1554 MatGetBlockSize_MPIAIJ,0,0,0,0, 1555 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1556 0,0,MatGetSubMatrix_MPIAIJ}; 1557 1558 1559 #undef __FUNC__ 1560 #define __FUNC__ "MatCreateMPIAIJ" 1561 /*@C 1562 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1563 (the default parallel PETSc format). For good matrix assembly performance 1564 the user should preallocate the matrix storage by setting the parameters 1565 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1566 performance can be increased by more than a factor of 50. 1567 1568 Collective on MPI_Comm 1569 1570 Input Parameters: 1571 + comm - MPI communicator 1572 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1573 This value should be the same as the local size used in creating the 1574 y vector for the matrix-vector product y = Ax. 1575 . n - This value should be the same as the local size used in creating the 1576 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1577 calculated if N is given) For square matrices n is almost always m. 1578 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1579 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1580 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1581 (same for all local rows) 1582 . d_nzz - array containing the number of nonzeros in the various rows of the 1583 diagonal portion of the local submatrix (possibly different for each row) 1584 or PETSC_NULL. You must leave room for the diagonal entry even if 1585 it is zero. 1586 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1587 submatrix (same for all local rows). 1588 - o_nzz - array containing the number of nonzeros in the various rows of the 1589 off-diagonal portion of the local submatrix (possibly different for 1590 each row) or PETSC_NULL. 1591 1592 Output Parameter: 1593 . A - the matrix 1594 1595 Notes: 1596 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1597 than it must be used on all processors that share the object for that argument. 1598 1599 The AIJ format (also called the Yale sparse matrix format or 1600 compressed row storage), is fully compatible with standard Fortran 77 1601 storage. That is, the stored row and column indices can begin at 1602 either one (as in Fortran) or zero. See the users manual for details. 1603 1604 The user MUST specify either the local or global matrix dimensions 1605 (possibly both). 1606 1607 By default, this format uses inodes (identical nodes) when possible. 1608 We search for consecutive rows with the same nonzero structure, thereby 1609 reusing matrix information to achieve increased efficiency. 1610 1611 Options Database Keys: 1612 + -mat_aij_no_inode - Do not use inodes 1613 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1614 - -mat_aij_oneindex - Internally use indexing starting at 1 1615 rather than 0. Note that when calling MatSetValues(), 1616 the user still MUST index entries starting at 0! 1617 1618 Storage Information: 1619 For a square global matrix we define each processor's diagonal portion 1620 to be its local rows and the corresponding columns (a square submatrix); 1621 each processor's off-diagonal portion encompasses the remainder of the 1622 local matrix (a rectangular submatrix). 1623 1624 The user can specify preallocated storage for the diagonal part of 1625 the local submatrix with either d_nz or d_nnz (not both). Set 1626 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1627 memory allocation. Likewise, specify preallocated storage for the 1628 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1629 1630 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1631 the figure below we depict these three local rows and all columns (0-11). 1632 1633 .vb 1634 0 1 2 3 4 5 6 7 8 9 10 11 1635 ------------------- 1636 row 3 | o o o d d d o o o o o o 1637 row 4 | o o o d d d o o o o o o 1638 row 5 | o o o d d d o o o o o o 1639 ------------------- 1640 .ve 1641 1642 Thus, any entries in the d locations are stored in the d (diagonal) 1643 submatrix, and any entries in the o locations are stored in the 1644 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1645 stored simply in the MATSEQAIJ format for compressed row storage. 1646 1647 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1648 and o_nz should indicate the number of nonzeros per row in the o matrix. 1649 In general, for PDE problems in which most nonzeros are near the diagonal, 1650 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1651 or you will get TERRIBLE performance; see the users' manual chapter on 1652 matrices. 1653 1654 .keywords: matrix, aij, compressed row, sparse, parallel 1655 1656 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1657 @*/ 1658 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1659 { 1660 Mat B; 1661 Mat_MPIAIJ *b; 1662 int ierr, i,sum[2],work[2],size; 1663 1664 PetscFunctionBegin; 1665 MPI_Comm_size(comm,&size); 1666 if (size == 1) { 1667 if (M == PETSC_DECIDE) M = m; 1668 if (N == PETSC_DECIDE) N = n; 1669 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1670 PetscFunctionReturn(0); 1671 } 1672 1673 *A = 0; 1674 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1675 PLogObjectCreate(B); 1676 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1677 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1678 PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1679 B->ops->destroy = MatDestroy_MPIAIJ; 1680 B->ops->view = MatView_MPIAIJ; 1681 B->factor = 0; 1682 B->assembled = PETSC_FALSE; 1683 B->mapping = 0; 1684 1685 B->insertmode = NOT_SET_VALUES; 1686 b->size = size; 1687 MPI_Comm_rank(comm,&b->rank); 1688 1689 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1690 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1691 } 1692 1693 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1694 work[0] = m; work[1] = n; 1695 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1696 if (M == PETSC_DECIDE) M = sum[0]; 1697 if (N == PETSC_DECIDE) N = sum[1]; 1698 } 1699 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1700 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1701 b->m = m; B->m = m; 1702 b->n = n; B->n = n; 1703 b->N = N; B->N = N; 1704 b->M = M; B->M = M; 1705 1706 /* build local table of row and column ownerships */ 1707 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1708 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1709 b->cowners = b->rowners + b->size + 2; 1710 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1711 b->rowners[0] = 0; 1712 for ( i=2; i<=b->size; i++ ) { 1713 b->rowners[i] += b->rowners[i-1]; 1714 } 1715 b->rstart = b->rowners[b->rank]; 1716 b->rend = b->rowners[b->rank+1]; 1717 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1718 b->cowners[0] = 0; 1719 for ( i=2; i<=b->size; i++ ) { 1720 b->cowners[i] += b->cowners[i-1]; 1721 } 1722 b->cstart = b->cowners[b->rank]; 1723 b->cend = b->cowners[b->rank+1]; 1724 1725 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1726 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1727 PLogObjectParent(B,b->A); 1728 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1729 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1730 PLogObjectParent(B,b->B); 1731 1732 /* build cache for off array entries formed */ 1733 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1734 b->donotstash = 0; 1735 b->colmap = 0; 1736 b->garray = 0; 1737 b->roworiented = 1; 1738 1739 /* stuff used for matrix vector multiply */ 1740 b->lvec = 0; 1741 b->Mvctx = 0; 1742 1743 /* stuff for MatGetRow() */ 1744 b->rowindices = 0; 1745 b->rowvalues = 0; 1746 b->getrowactive = PETSC_FALSE; 1747 1748 *A = B; 1749 PetscFunctionReturn(0); 1750 } 1751 1752 #undef __FUNC__ 1753 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1754 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1755 { 1756 Mat mat; 1757 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1758 int ierr, len=0, flg; 1759 1760 PetscFunctionBegin; 1761 *newmat = 0; 1762 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1763 PLogObjectCreate(mat); 1764 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1765 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 1766 mat->ops->destroy = MatDestroy_MPIAIJ; 1767 mat->ops->view = MatView_MPIAIJ; 1768 mat->factor = matin->factor; 1769 mat->assembled = PETSC_TRUE; 1770 1771 a->m = mat->m = oldmat->m; 1772 a->n = mat->n = oldmat->n; 1773 a->M = mat->M = oldmat->M; 1774 a->N = mat->N = oldmat->N; 1775 1776 a->rstart = oldmat->rstart; 1777 a->rend = oldmat->rend; 1778 a->cstart = oldmat->cstart; 1779 a->cend = oldmat->cend; 1780 a->size = oldmat->size; 1781 a->rank = oldmat->rank; 1782 mat->insertmode = NOT_SET_VALUES; 1783 a->rowvalues = 0; 1784 a->getrowactive = PETSC_FALSE; 1785 1786 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1787 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1788 a->cowners = a->rowners + a->size + 2; 1789 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1790 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1791 if (oldmat->colmap) { 1792 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1793 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1794 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1795 } else a->colmap = 0; 1796 if (oldmat->garray) { 1797 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1798 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1799 PLogObjectMemory(mat,len*sizeof(int)); 1800 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1801 } else a->garray = 0; 1802 1803 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1804 PLogObjectParent(mat,a->lvec); 1805 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1806 PLogObjectParent(mat,a->Mvctx); 1807 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1808 PLogObjectParent(mat,a->A); 1809 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1810 PLogObjectParent(mat,a->B); 1811 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1812 if (flg) { 1813 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1814 } 1815 *newmat = mat; 1816 PetscFunctionReturn(0); 1817 } 1818 1819 #include "sys.h" 1820 1821 #undef __FUNC__ 1822 #define __FUNC__ "MatLoad_MPIAIJ" 1823 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1824 { 1825 Mat A; 1826 Scalar *vals,*svals; 1827 MPI_Comm comm = ((PetscObject)viewer)->comm; 1828 MPI_Status status; 1829 int i, nz, ierr, j,rstart, rend, fd; 1830 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1831 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1832 int tag = ((PetscObject)viewer)->tag; 1833 1834 PetscFunctionBegin; 1835 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1836 if (!rank) { 1837 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1838 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1839 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1840 if (header[3] < 0) { 1841 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1842 } 1843 } 1844 1845 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1846 M = header[1]; N = header[2]; 1847 /* determine ownership of all rows */ 1848 m = M/size + ((M % size) > rank); 1849 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1850 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1851 rowners[0] = 0; 1852 for ( i=2; i<=size; i++ ) { 1853 rowners[i] += rowners[i-1]; 1854 } 1855 rstart = rowners[rank]; 1856 rend = rowners[rank+1]; 1857 1858 /* distribute row lengths to all processors */ 1859 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1860 offlens = ourlens + (rend-rstart); 1861 if (!rank) { 1862 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1863 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1864 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1865 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1866 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1867 PetscFree(sndcounts); 1868 } else { 1869 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1870 } 1871 1872 if (!rank) { 1873 /* calculate the number of nonzeros on each processor */ 1874 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1875 PetscMemzero(procsnz,size*sizeof(int)); 1876 for ( i=0; i<size; i++ ) { 1877 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1878 procsnz[i] += rowlengths[j]; 1879 } 1880 } 1881 PetscFree(rowlengths); 1882 1883 /* determine max buffer needed and allocate it */ 1884 maxnz = 0; 1885 for ( i=0; i<size; i++ ) { 1886 maxnz = PetscMax(maxnz,procsnz[i]); 1887 } 1888 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1889 1890 /* read in my part of the matrix column indices */ 1891 nz = procsnz[0]; 1892 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1893 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1894 1895 /* read in every one elses and ship off */ 1896 for ( i=1; i<size; i++ ) { 1897 nz = procsnz[i]; 1898 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1899 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1900 } 1901 PetscFree(cols); 1902 } else { 1903 /* determine buffer space needed for message */ 1904 nz = 0; 1905 for ( i=0; i<m; i++ ) { 1906 nz += ourlens[i]; 1907 } 1908 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1909 1910 /* receive message of column indices*/ 1911 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1912 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1913 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1914 } 1915 1916 /* loop over local rows, determining number of off diagonal entries */ 1917 PetscMemzero(offlens,m*sizeof(int)); 1918 jj = 0; 1919 for ( i=0; i<m; i++ ) { 1920 for ( j=0; j<ourlens[i]; j++ ) { 1921 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1922 jj++; 1923 } 1924 } 1925 1926 /* create our matrix */ 1927 for ( i=0; i<m; i++ ) { 1928 ourlens[i] -= offlens[i]; 1929 } 1930 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1931 A = *newmat; 1932 MatSetOption(A,MAT_COLUMNS_SORTED); 1933 for ( i=0; i<m; i++ ) { 1934 ourlens[i] += offlens[i]; 1935 } 1936 1937 if (!rank) { 1938 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1939 1940 /* read in my part of the matrix numerical values */ 1941 nz = procsnz[0]; 1942 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1943 1944 /* insert into matrix */ 1945 jj = rstart; 1946 smycols = mycols; 1947 svals = vals; 1948 for ( i=0; i<m; i++ ) { 1949 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1950 smycols += ourlens[i]; 1951 svals += ourlens[i]; 1952 jj++; 1953 } 1954 1955 /* read in other processors and ship out */ 1956 for ( i=1; i<size; i++ ) { 1957 nz = procsnz[i]; 1958 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1959 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1960 } 1961 PetscFree(procsnz); 1962 } else { 1963 /* receive numeric values */ 1964 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1965 1966 /* receive message of values*/ 1967 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1968 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1969 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1970 1971 /* insert into matrix */ 1972 jj = rstart; 1973 smycols = mycols; 1974 svals = vals; 1975 for ( i=0; i<m; i++ ) { 1976 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1977 smycols += ourlens[i]; 1978 svals += ourlens[i]; 1979 jj++; 1980 } 1981 } 1982 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1983 1984 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1985 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1986 PetscFunctionReturn(0); 1987 } 1988 1989 #undef __FUNC__ 1990 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1991 /* 1992 Not great since it makes two copies of the submatrix, first an SeqAIJ 1993 in local and then by concatenating the local matrices the end result. 1994 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1995 */ 1996 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 1997 { 1998 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1999 Mat *local,M, Mreuse; 2000 Scalar *vwork,*aa; 2001 MPI_Comm comm = mat->comm; 2002 Mat_SeqAIJ *aij; 2003 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2004 2005 PetscFunctionBegin; 2006 MPI_Comm_rank(comm,&rank); 2007 MPI_Comm_size(comm,&size); 2008 2009 if (call == MAT_REUSE_MATRIX) { 2010 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2011 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2012 local = &Mreuse; 2013 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2014 } else { 2015 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2016 Mreuse = *local; 2017 PetscFree(local); 2018 } 2019 2020 /* 2021 m - number of local rows 2022 n - number of columns (same on all processors) 2023 rstart - first row in new global matrix generated 2024 */ 2025 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2026 if (call == MAT_INITIAL_MATRIX) { 2027 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2028 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2029 ii = aij->i; 2030 jj = aij->j; 2031 2032 /* 2033 Determine the number of non-zeros in the diagonal and off-diagonal 2034 portions of the matrix in order to do correct preallocation 2035 */ 2036 2037 /* first get start and end of "diagonal" columns */ 2038 if (csize == PETSC_DECIDE) { 2039 nlocal = n/size + ((n % size) > rank); 2040 } else { 2041 nlocal = csize; 2042 } 2043 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2044 rstart = rend - nlocal; 2045 if (rank == size - 1 && rend != n) { 2046 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2047 } 2048 2049 /* next, compute all the lengths */ 2050 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2051 olens = dlens + m; 2052 for ( i=0; i<m; i++ ) { 2053 jend = ii[i+1] - ii[i]; 2054 olen = 0; 2055 dlen = 0; 2056 for ( j=0; j<jend; j++ ) { 2057 if ( *jj < rstart || *jj >= rend) olen++; 2058 else dlen++; 2059 jj++; 2060 } 2061 olens[i] = olen; 2062 dlens[i] = dlen; 2063 } 2064 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2065 PetscFree(dlens); 2066 } else { 2067 int ml,nl; 2068 2069 M = *newmat; 2070 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2071 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2072 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2073 /* 2074 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2075 rather than the slower MatSetValues(). 2076 */ 2077 M->was_assembled = PETSC_TRUE; 2078 M->assembled = PETSC_FALSE; 2079 } 2080 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2081 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2082 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2083 ii = aij->i; 2084 jj = aij->j; 2085 aa = aij->a; 2086 for (i=0; i<m; i++) { 2087 row = rstart + i; 2088 nz = ii[i+1] - ii[i]; 2089 cwork = jj; jj += nz; 2090 vwork = aa; aa += nz; 2091 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2092 } 2093 2094 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2095 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2096 *newmat = M; 2097 2098 /* save submatrix used in processor for next request */ 2099 if (call == MAT_INITIAL_MATRIX) { 2100 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2101 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2102 } 2103 2104 PetscFunctionReturn(0); 2105 } 2106