1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.294 1999/05/12 03:29:11 bsmith Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 extern int MatSetUpMultiply_MPIAIJ(Mat); 10 extern int DisAssemble_MPIAIJ(Mat); 11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatPrintHelp_SeqAIJ(Mat); 15 16 /* local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 26 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 27 int n = B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = TableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr); 32 for ( i=0; i<n; i++ ){ 33 ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 37 PLogObjectMemory(mat,aij->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr); 39 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for ( _i=0; _i<nrow; _i++ ) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 71 Scalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 77 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 78 new_j = (int *) (new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 83 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(Scalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 PetscFree(a->a); \ 94 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 95 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 96 a->singlemalloc = 1; \ 97 \ 98 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 99 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 100 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 101 a->maxnz += CHUNKSIZE; \ 102 a->reallocs++; \ 103 } \ 104 N = nrow++ - 1; a->nz++; \ 105 /* shift up all the later entries in this row */ \ 106 for ( ii=N; ii>=_i; ii-- ) { \ 107 rp[ii+1] = rp[ii]; \ 108 ap[ii+1] = ap[ii]; \ 109 } \ 110 rp[_i] = col1; \ 111 ap[_i] = value; \ 112 a_noinsert: ; \ 113 ailen[row] = nrow; \ 114 } 115 116 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 117 { \ 118 \ 119 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 120 rmax = bimax[row]; nrow = bilen[row]; \ 121 col1 = col - shift; \ 122 \ 123 low = 0; high = nrow; \ 124 while (high-low > 5) { \ 125 t = (low+high)/2; \ 126 if (rp[t] > col) high = t; \ 127 else low = t; \ 128 } \ 129 for ( _i=0; _i<nrow; _i++ ) { \ 130 if (rp[_i] > col1) break; \ 131 if (rp[_i] == col1) { \ 132 if (addv == ADD_VALUES) ap[_i] += value; \ 133 else ap[_i] = value; \ 134 goto b_noinsert; \ 135 } \ 136 } \ 137 if (nonew == 1) goto b_noinsert; \ 138 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139 if (nrow >= rmax) { \ 140 /* there is no extra room in row, therefore enlarge */ \ 141 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 142 Scalar *new_a; \ 143 \ 144 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 145 \ 146 /* malloc new storage space */ \ 147 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 148 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 149 new_j = (int *) (new_a + new_nz); \ 150 new_i = new_j + new_nz; \ 151 \ 152 /* copy over old data into new slots */ \ 153 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 154 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 156 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 157 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 158 len*sizeof(int));CHKERRQ(ierr); \ 159 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 160 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 161 len*sizeof(Scalar));CHKERRQ(ierr); \ 162 /* free up old matrix storage */ \ 163 \ 164 PetscFree(b->a); \ 165 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 166 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 167 b->singlemalloc = 1; \ 168 \ 169 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 170 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 171 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 172 b->maxnz += CHUNKSIZE; \ 173 b->reallocs++; \ 174 } \ 175 N = nrow++ - 1; b->nz++; \ 176 /* shift up all the later entries in this row */ \ 177 for ( ii=N; ii>=_i; ii-- ) { \ 178 rp[ii+1] = rp[ii]; \ 179 ap[ii+1] = ap[ii]; \ 180 } \ 181 rp[_i] = col1; \ 182 ap[_i] = value; \ 183 b_noinsert: ; \ 184 bilen[row] = nrow; \ 185 } 186 187 #undef __FUNC__ 188 #define __FUNC__ "MatSetValues_MPIAIJ" 189 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 190 { 191 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 192 Scalar value; 193 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 194 int cstart = aij->cstart, cend = aij->cend,row,col; 195 int roworiented = aij->roworiented; 196 197 /* Some Variables required in the macro */ 198 Mat A = aij->A; 199 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 200 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 201 Scalar *aa = a->a; 202 203 Mat B = aij->B; 204 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 205 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 206 Scalar *ba = b->a; 207 208 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 209 int nonew = a->nonew,shift = a->indexshift; 210 Scalar *ap; 211 212 PetscFunctionBegin; 213 for ( i=0; i<m; i++ ) { 214 if (im[i] < 0) continue; 215 #if defined(PETSC_USE_BOPT_g) 216 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 217 #endif 218 if (im[i] >= rstart && im[i] < rend) { 219 row = im[i] - rstart; 220 for ( j=0; j<n; j++ ) { 221 if (in[j] >= cstart && in[j] < cend){ 222 col = in[j] - cstart; 223 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 224 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 225 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 226 } 227 else if (in[j] < 0) continue; 228 #if defined(PETSC_USE_BOPT_g) 229 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 230 #endif 231 else { 232 if (mat->was_assembled) { 233 if (!aij->colmap) { 234 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 235 } 236 #if defined (PETSC_USE_CTABLE) 237 ierr = TableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 238 col--; 239 #else 240 col = aij->colmap[in[j]] - 1; 241 #endif 242 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 243 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 244 col = in[j]; 245 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 246 B = aij->B; 247 b = (Mat_SeqAIJ *) B->data; 248 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 249 ba = b->a; 250 } 251 } else col = in[j]; 252 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 253 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 254 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 255 } 256 } 257 } else { 258 if (!aij->donotstash) { 259 if (roworiented) { 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNC__ 271 #define __FUNC__ "MatGetValues_MPIAIJ" 272 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 275 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 276 int cstart = aij->cstart, cend = aij->cend,row,col; 277 278 PetscFunctionBegin; 279 for ( i=0; i<m; i++ ) { 280 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 281 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 282 if (idxm[i] >= rstart && idxm[i] < rend) { 283 row = idxm[i] - rstart; 284 for ( j=0; j<n; j++ ) { 285 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 286 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 287 if (idxn[j] >= cstart && idxn[j] < cend){ 288 col = idxn[j] - cstart; 289 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 290 } else { 291 if (!aij->colmap) { 292 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 293 } 294 #if defined (PETSC_USE_CTABLE) 295 ierr = TableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 296 col --; 297 #else 298 col = aij->colmap[idxn[j]] - 1; 299 #endif 300 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 301 else { 302 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 303 } 304 } 305 } 306 } else { 307 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 308 } 309 } 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNC__ 314 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 316 { 317 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 318 int ierr,nstash,reallocs; 319 InsertMode addv; 320 321 PetscFunctionBegin; 322 if (aij->donotstash) { 323 PetscFunctionReturn(0); 324 } 325 326 /* make sure all processors are either in INSERTMODE or ADDMODE */ 327 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 328 if (addv == (ADD_VALUES|INSERT_VALUES)) { 329 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 330 } 331 mat->insertmode = addv; /* in case this processor had no cache */ 332 333 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 334 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 335 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 336 PetscFunctionReturn(0); 337 } 338 339 340 #undef __FUNC__ 341 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 342 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 343 { 344 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 345 int i,j,rstart,ncols,n,ierr,flg; 346 int *row,*col,other_disassembled; 347 Scalar *val; 348 InsertMode addv = mat->insertmode; 349 350 PetscFunctionBegin; 351 if (!aij->donotstash) { 352 while (1) { 353 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 354 if (!flg) break; 355 356 for ( i=0; i<n; ) { 357 /* Now identify the consecutive vals belonging to the same row */ 358 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 359 if (j < n) ncols = j-i; 360 else ncols = n-i; 361 /* Now assemble all these values with a single function call */ 362 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 363 i = j; 364 } 365 } 366 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 367 } 368 369 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 370 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 371 372 /* determine if any processor has disassembled, if so we must 373 also disassemble ourselfs, in order that we may reassemble. */ 374 /* 375 if nonzero structure of submatrix B cannot change then we know that 376 no processor disassembled thus we can skip this stuff 377 */ 378 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 379 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 380 if (mat->was_assembled && !other_disassembled) { 381 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 382 } 383 } 384 385 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 386 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 387 } 388 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 389 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 390 391 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 392 PetscFunctionReturn(0); 393 } 394 395 #undef __FUNC__ 396 #define __FUNC__ "MatZeroEntries_MPIAIJ" 397 int MatZeroEntries_MPIAIJ(Mat A) 398 { 399 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 400 int ierr; 401 402 PetscFunctionBegin; 403 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 404 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNC__ 409 #define __FUNC__ "MatZeroRows_MPIAIJ" 410 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 411 { 412 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 413 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 414 int *procs,*nprocs,j,found,idx,nsends,*work,row; 415 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 416 int *rvalues,tag = A->tag,count,base,slen,n,*source; 417 int *lens,imdex,*lrows,*values,rstart=l->rstart; 418 MPI_Comm comm = A->comm; 419 MPI_Request *send_waits,*recv_waits; 420 MPI_Status recv_status,*send_status; 421 IS istmp; 422 423 PetscFunctionBegin; 424 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 425 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 426 427 /* first count number of contributors to each processor */ 428 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 429 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 430 procs = nprocs + size; 431 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 432 for ( i=0; i<N; i++ ) { 433 idx = rows[i]; 434 found = 0; 435 for ( j=0; j<size; j++ ) { 436 if (idx >= owners[j] && idx < owners[j+1]) { 437 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 438 } 439 } 440 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 441 } 442 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 443 444 /* inform other processors of number of messages and max length*/ 445 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 446 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 447 nrecvs = work[rank]; 448 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 449 nmax = work[rank]; 450 PetscFree(work); 451 452 /* post receives: */ 453 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 454 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 455 for ( i=0; i<nrecvs; i++ ) { 456 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 457 } 458 459 /* do sends: 460 1) starts[i] gives the starting index in svalues for stuff going to 461 the ith processor 462 */ 463 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 464 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 465 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 466 starts[0] = 0; 467 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 468 for ( i=0; i<N; i++ ) { 469 svalues[starts[owner[i]]++] = rows[i]; 470 } 471 ISRestoreIndices(is,&rows); 472 473 starts[0] = 0; 474 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 475 count = 0; 476 for ( i=0; i<size; i++ ) { 477 if (procs[i]) { 478 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 479 } 480 } 481 PetscFree(starts); 482 483 base = owners[rank]; 484 485 /* wait on receives */ 486 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 487 source = lens + nrecvs; 488 count = nrecvs; slen = 0; 489 while (count) { 490 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 491 /* unpack receives into our local space */ 492 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 493 source[imdex] = recv_status.MPI_SOURCE; 494 lens[imdex] = n; 495 slen += n; 496 count--; 497 } 498 PetscFree(recv_waits); 499 500 /* move the data into the send scatter */ 501 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 502 count = 0; 503 for ( i=0; i<nrecvs; i++ ) { 504 values = rvalues + i*nmax; 505 for ( j=0; j<lens[i]; j++ ) { 506 lrows[count++] = values[j] - base; 507 } 508 } 509 PetscFree(rvalues); PetscFree(lens); 510 PetscFree(owner); PetscFree(nprocs); 511 512 /* actually zap the local rows */ 513 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 514 PLogObjectParent(A,istmp); 515 516 /* 517 Zero the required rows. If the "diagonal block" of the matrix 518 is square and the user wishes to set the diagonal we use seperate 519 code so that MatSetValues() is not called for each diagonal allocating 520 new memory, thus calling lots of mallocs and slowing things down. 521 522 Contributed by: Mathew Knepley 523 */ 524 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 525 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 526 if (diag && (l->A->M == l->A->N)) { 527 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 528 } else if (diag) { 529 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 530 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 531 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 532 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 533 } 534 for ( i = 0; i < slen; i++ ) { 535 row = lrows[i] + rstart; 536 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 537 } 538 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 539 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 540 } else { 541 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 542 } 543 ierr = ISDestroy(istmp);CHKERRQ(ierr); 544 PetscFree(lrows); 545 546 /* wait on sends */ 547 if (nsends) { 548 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 549 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 550 PetscFree(send_status); 551 } 552 PetscFree(send_waits); PetscFree(svalues); 553 554 PetscFunctionReturn(0); 555 } 556 557 #undef __FUNC__ 558 #define __FUNC__ "MatMult_MPIAIJ" 559 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 560 { 561 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 562 int ierr,nt; 563 564 PetscFunctionBegin; 565 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 566 if (nt != a->n) { 567 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 568 } 569 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 570 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 571 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 572 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNC__ 577 #define __FUNC__ "MatMultAdd_MPIAIJ" 578 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 579 { 580 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 581 int ierr; 582 583 PetscFunctionBegin; 584 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 586 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNC__ 592 #define __FUNC__ "MatMultTrans_MPIAIJ" 593 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 594 { 595 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 596 int ierr; 597 598 PetscFunctionBegin; 599 /* do nondiagonal part */ 600 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 601 /* send it on its way */ 602 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 603 /* do local part */ 604 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 605 /* receive remote parts: note this assumes the values are not actually */ 606 /* inserted in yy until the next line, which is true for my implementation*/ 607 /* but is not perhaps always true. */ 608 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNC__ 613 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 614 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 615 { 616 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 617 int ierr; 618 619 PetscFunctionBegin; 620 /* do nondiagonal part */ 621 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 622 /* send it on its way */ 623 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 /* do local part */ 625 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 626 /* receive remote parts: note this assumes the values are not actually */ 627 /* inserted in yy until the next line, which is true for my implementation*/ 628 /* but is not perhaps always true. */ 629 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 /* 634 This only works correctly for square matrices where the subblock A->A is the 635 diagonal block 636 */ 637 #undef __FUNC__ 638 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 639 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 640 { 641 int ierr; 642 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 643 644 PetscFunctionBegin; 645 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 646 if (a->rstart != a->cstart || a->rend != a->cend) { 647 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 648 } 649 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNC__ 654 #define __FUNC__ "MatScale_MPIAIJ" 655 int MatScale_MPIAIJ(Scalar *aa,Mat A) 656 { 657 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 658 int ierr; 659 660 PetscFunctionBegin; 661 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 662 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNC__ 667 #define __FUNC__ "MatDestroy_MPIAIJ" 668 int MatDestroy_MPIAIJ(Mat mat) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 671 int ierr; 672 673 PetscFunctionBegin; 674 if (--mat->refct > 0) PetscFunctionReturn(0); 675 676 if (mat->mapping) { 677 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 678 } 679 if (mat->bmapping) { 680 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 681 } 682 if (mat->rmap) { 683 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 684 } 685 if (mat->cmap) { 686 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 687 } 688 #if defined(PETSC_USE_LOG) 689 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 690 #endif 691 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 692 PetscFree(aij->rowners); 693 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 694 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 695 #if defined (PETSC_USE_CTABLE) 696 if (aij->colmap) TableDelete(aij->colmap); 697 #else 698 if (aij->colmap) PetscFree(aij->colmap); 699 #endif 700 if (aij->garray) PetscFree(aij->garray); 701 if (aij->lvec) VecDestroy(aij->lvec); 702 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 703 if (aij->rowvalues) PetscFree(aij->rowvalues); 704 PetscFree(aij); 705 PLogObjectDestroy(mat); 706 PetscHeaderDestroy(mat); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNC__ 711 #define __FUNC__ "MatView_MPIAIJ_Binary" 712 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 713 { 714 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 715 int ierr; 716 717 PetscFunctionBegin; 718 if (aij->size == 1) { 719 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 720 } 721 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNC__ 726 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 727 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 730 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 731 int ierr, format,shift = C->indexshift,rank = aij->rank ; 732 int size = aij->size; 733 FILE *fd; 734 ViewerType vtype; 735 736 PetscFunctionBegin; 737 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 738 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 739 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 740 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 741 MatInfo info; 742 int flg; 743 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 744 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 745 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 746 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 747 PetscSequentialPhaseBegin(mat->comm,1); 748 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 749 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 750 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 751 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 752 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 753 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 754 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 755 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 756 fflush(fd); 757 PetscSequentialPhaseEnd(mat->comm,1); 758 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 761 PetscFunctionReturn(0); 762 } 763 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 764 Draw draw; 765 PetscTruth isnull; 766 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 767 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 768 } 769 770 if (size == 1) { 771 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 772 } else { 773 /* assemble the entire matrix onto first processor. */ 774 Mat A; 775 Mat_SeqAIJ *Aloc; 776 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 777 Scalar *a; 778 779 if (!rank) { 780 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 781 } else { 782 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 783 } 784 PLogObjectParent(mat,A); 785 786 /* copy over the A part */ 787 Aloc = (Mat_SeqAIJ*) aij->A->data; 788 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 789 row = aij->rstart; 790 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 791 for ( i=0; i<m; i++ ) { 792 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 793 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 794 } 795 aj = Aloc->j; 796 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 797 798 /* copy over the B part */ 799 Aloc = (Mat_SeqAIJ*) aij->B->data; 800 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 801 row = aij->rstart; 802 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols); 803 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 804 for ( i=0; i<m; i++ ) { 805 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 806 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 807 } 808 PetscFree(ct); 809 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 810 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 811 /* 812 Everyone has to call to draw the matrix since the graphics waits are 813 synchronized across all processors that share the Draw object 814 */ 815 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 816 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 817 } 818 ierr = MatDestroy(A);CHKERRQ(ierr); 819 } 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNC__ 824 #define __FUNC__ "MatView_MPIAIJ" 825 int MatView_MPIAIJ(Mat mat,Viewer viewer) 826 { 827 int ierr; 828 ViewerType vtype; 829 830 PetscFunctionBegin; 831 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 832 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 833 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 834 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 835 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 836 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 837 } else { 838 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 839 } 840 PetscFunctionReturn(0); 841 } 842 843 /* 844 This has to provide several versions. 845 846 2) a) use only local smoothing updating outer values only once. 847 b) local smoothing updating outer values each inner iteration 848 3) color updating out values betwen colors. 849 */ 850 #undef __FUNC__ 851 #define __FUNC__ "MatRelax_MPIAIJ" 852 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 853 double fshift,int its,Vec xx) 854 { 855 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 856 Mat AA = mat->A, BB = mat->B; 857 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 858 Scalar *b,*x,*xs,*ls,d,*v,sum; 859 int ierr,*idx, *diag; 860 int n = mat->n, m = mat->m, i,shift = A->indexshift; 861 862 PetscFunctionBegin; 863 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 864 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 865 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 866 xs = x + shift; /* shift by one for index start of 1 */ 867 ls = ls + shift; 868 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);} 869 diag = A->diag; 870 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 871 if (flag & SOR_ZERO_INITIAL_GUESS) { 872 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 873 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 874 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 875 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 879 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 880 while (its--) { 881 /* go down through the rows */ 882 for ( i=0; i<m; i++ ) { 883 n = A->i[i+1] - A->i[i]; 884 PLogFlops(4*n+3); 885 idx = A->j + A->i[i] + shift; 886 v = A->a + A->i[i] + shift; 887 sum = b[i]; 888 SPARSEDENSEMDOT(sum,xs,v,idx,n); 889 d = fshift + A->a[diag[i]+shift]; 890 n = B->i[i+1] - B->i[i]; 891 idx = B->j + B->i[i] + shift; 892 v = B->a + B->i[i] + shift; 893 SPARSEDENSEMDOT(sum,ls,v,idx,n); 894 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 895 } 896 /* come up through the rows */ 897 for ( i=m-1; i>-1; i-- ) { 898 n = A->i[i+1] - A->i[i]; 899 PLogFlops(4*n+3) 900 idx = A->j + A->i[i] + shift; 901 v = A->a + A->i[i] + shift; 902 sum = b[i]; 903 SPARSEDENSEMDOT(sum,xs,v,idx,n); 904 d = fshift + A->a[diag[i]+shift]; 905 n = B->i[i+1] - B->i[i]; 906 idx = B->j + B->i[i] + shift; 907 v = B->a + B->i[i] + shift; 908 SPARSEDENSEMDOT(sum,ls,v,idx,n); 909 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 910 } 911 } 912 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 913 if (flag & SOR_ZERO_INITIAL_GUESS) { 914 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 915 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 916 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 917 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 921 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 922 while (its--) { 923 for ( i=0; i<m; i++ ) { 924 n = A->i[i+1] - A->i[i]; 925 PLogFlops(4*n+3); 926 idx = A->j + A->i[i] + shift; 927 v = A->a + A->i[i] + shift; 928 sum = b[i]; 929 SPARSEDENSEMDOT(sum,xs,v,idx,n); 930 d = fshift + A->a[diag[i]+shift]; 931 n = B->i[i+1] - B->i[i]; 932 idx = B->j + B->i[i] + shift; 933 v = B->a + B->i[i] + shift; 934 SPARSEDENSEMDOT(sum,ls,v,idx,n); 935 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 936 } 937 } 938 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 939 if (flag & SOR_ZERO_INITIAL_GUESS) { 940 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 941 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 942 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 943 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 944 PetscFunctionReturn(0); 945 } 946 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 947 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 948 while (its--) { 949 for ( i=m-1; i>-1; i-- ) { 950 n = A->i[i+1] - A->i[i]; 951 PLogFlops(4*n+3); 952 idx = A->j + A->i[i] + shift; 953 v = A->a + A->i[i] + shift; 954 sum = b[i]; 955 SPARSEDENSEMDOT(sum,xs,v,idx,n); 956 d = fshift + A->a[diag[i]+shift]; 957 n = B->i[i+1] - B->i[i]; 958 idx = B->j + B->i[i] + shift; 959 v = B->a + B->i[i] + shift; 960 SPARSEDENSEMDOT(sum,ls,v,idx,n); 961 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 962 } 963 } 964 } else { 965 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 966 } 967 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 968 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 969 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 973 #undef __FUNC__ 974 #define __FUNC__ "MatGetInfo_MPIAIJ" 975 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 976 { 977 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 978 Mat A = mat->A, B = mat->B; 979 int ierr; 980 double isend[5], irecv[5]; 981 982 PetscFunctionBegin; 983 info->block_size = 1.0; 984 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 985 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 986 isend[3] = info->memory; isend[4] = info->mallocs; 987 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 988 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 989 isend[3] += info->memory; isend[4] += info->mallocs; 990 if (flag == MAT_LOCAL) { 991 info->nz_used = isend[0]; 992 info->nz_allocated = isend[1]; 993 info->nz_unneeded = isend[2]; 994 info->memory = isend[3]; 995 info->mallocs = isend[4]; 996 } else if (flag == MAT_GLOBAL_MAX) { 997 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 998 info->nz_used = irecv[0]; 999 info->nz_allocated = irecv[1]; 1000 info->nz_unneeded = irecv[2]; 1001 info->memory = irecv[3]; 1002 info->mallocs = irecv[4]; 1003 } else if (flag == MAT_GLOBAL_SUM) { 1004 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1005 info->nz_used = irecv[0]; 1006 info->nz_allocated = irecv[1]; 1007 info->nz_unneeded = irecv[2]; 1008 info->memory = irecv[3]; 1009 info->mallocs = irecv[4]; 1010 } 1011 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1012 info->fill_ratio_needed = 0; 1013 info->factor_mallocs = 0; 1014 info->rows_global = (double)mat->M; 1015 info->columns_global = (double)mat->N; 1016 info->rows_local = (double)mat->m; 1017 info->columns_local = (double)mat->N; 1018 1019 PetscFunctionReturn(0); 1020 } 1021 1022 #undef __FUNC__ 1023 #define __FUNC__ "MatSetOption_MPIAIJ" 1024 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1025 { 1026 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1027 int ierr; 1028 1029 PetscFunctionBegin; 1030 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1031 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1032 op == MAT_COLUMNS_UNSORTED || 1033 op == MAT_COLUMNS_SORTED || 1034 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1035 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1036 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1037 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1038 } else if (op == MAT_ROW_ORIENTED) { 1039 a->roworiented = 1; 1040 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1041 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1042 } else if (op == MAT_ROWS_SORTED || 1043 op == MAT_ROWS_UNSORTED || 1044 op == MAT_SYMMETRIC || 1045 op == MAT_STRUCTURALLY_SYMMETRIC || 1046 op == MAT_YES_NEW_DIAGONALS) 1047 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1048 else if (op == MAT_COLUMN_ORIENTED) { 1049 a->roworiented = 0; 1050 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1051 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1052 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1053 a->donotstash = 1; 1054 } else if (op == MAT_NO_NEW_DIAGONALS){ 1055 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1056 } else { 1057 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1058 } 1059 PetscFunctionReturn(0); 1060 } 1061 1062 #undef __FUNC__ 1063 #define __FUNC__ "MatGetSize_MPIAIJ" 1064 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1065 { 1066 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1067 1068 PetscFunctionBegin; 1069 if (m) *m = mat->M; 1070 if (n) *n = mat->N; 1071 PetscFunctionReturn(0); 1072 } 1073 1074 #undef __FUNC__ 1075 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1076 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1077 { 1078 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1079 1080 PetscFunctionBegin; 1081 if (m) *m = mat->m; 1082 if (n) *n = mat->n; 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNC__ 1087 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1088 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1089 { 1090 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1091 1092 PetscFunctionBegin; 1093 *m = mat->rstart; *n = mat->rend; 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNC__ 1098 #define __FUNC__ "MatGetRow_MPIAIJ" 1099 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1100 { 1101 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1102 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1103 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1104 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1105 int *cmap, *idx_p; 1106 1107 PetscFunctionBegin; 1108 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1109 mat->getrowactive = PETSC_TRUE; 1110 1111 if (!mat->rowvalues && (idx || v)) { 1112 /* 1113 allocate enough space to hold information from the longest row. 1114 */ 1115 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1116 int max = 1,m = mat->m,tmp; 1117 for ( i=0; i<m; i++ ) { 1118 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1119 if (max < tmp) { max = tmp; } 1120 } 1121 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1122 mat->rowindices = (int *) (mat->rowvalues + max); 1123 } 1124 1125 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1126 lrow = row - rstart; 1127 1128 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1129 if (!v) {pvA = 0; pvB = 0;} 1130 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1131 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1132 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1133 nztot = nzA + nzB; 1134 1135 cmap = mat->garray; 1136 if (v || idx) { 1137 if (nztot) { 1138 /* Sort by increasing column numbers, assuming A and B already sorted */ 1139 int imark = -1; 1140 if (v) { 1141 *v = v_p = mat->rowvalues; 1142 for ( i=0; i<nzB; i++ ) { 1143 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1144 else break; 1145 } 1146 imark = i; 1147 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1148 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1149 } 1150 if (idx) { 1151 *idx = idx_p = mat->rowindices; 1152 if (imark > -1) { 1153 for ( i=0; i<imark; i++ ) { 1154 idx_p[i] = cmap[cworkB[i]]; 1155 } 1156 } else { 1157 for ( i=0; i<nzB; i++ ) { 1158 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1159 else break; 1160 } 1161 imark = i; 1162 } 1163 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1164 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1165 } 1166 } else { 1167 if (idx) *idx = 0; 1168 if (v) *v = 0; 1169 } 1170 } 1171 *nz = nztot; 1172 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1173 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNC__ 1178 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1179 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1180 { 1181 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1182 1183 PetscFunctionBegin; 1184 if (aij->getrowactive == PETSC_FALSE) { 1185 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1186 } 1187 aij->getrowactive = PETSC_FALSE; 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNC__ 1192 #define __FUNC__ "MatNorm_MPIAIJ" 1193 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1194 { 1195 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1196 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1197 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1198 double sum = 0.0; 1199 Scalar *v; 1200 1201 PetscFunctionBegin; 1202 if (aij->size == 1) { 1203 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1204 } else { 1205 if (type == NORM_FROBENIUS) { 1206 v = amat->a; 1207 for (i=0; i<amat->nz; i++ ) { 1208 #if defined(PETSC_USE_COMPLEX) 1209 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1210 #else 1211 sum += (*v)*(*v); v++; 1212 #endif 1213 } 1214 v = bmat->a; 1215 for (i=0; i<bmat->nz; i++ ) { 1216 #if defined(PETSC_USE_COMPLEX) 1217 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1218 #else 1219 sum += (*v)*(*v); v++; 1220 #endif 1221 } 1222 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1223 *norm = sqrt(*norm); 1224 } else if (type == NORM_1) { /* max column norm */ 1225 double *tmp, *tmp2; 1226 int *jj, *garray = aij->garray; 1227 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp); 1228 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2); 1229 ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr); 1230 *norm = 0.0; 1231 v = amat->a; jj = amat->j; 1232 for ( j=0; j<amat->nz; j++ ) { 1233 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1234 } 1235 v = bmat->a; jj = bmat->j; 1236 for ( j=0; j<bmat->nz; j++ ) { 1237 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1238 } 1239 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1240 for ( j=0; j<aij->N; j++ ) { 1241 if (tmp2[j] > *norm) *norm = tmp2[j]; 1242 } 1243 PetscFree(tmp); PetscFree(tmp2); 1244 } else if (type == NORM_INFINITY) { /* max row norm */ 1245 double ntemp = 0.0; 1246 for ( j=0; j<amat->m; j++ ) { 1247 v = amat->a + amat->i[j] + shift; 1248 sum = 0.0; 1249 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1250 sum += PetscAbsScalar(*v); v++; 1251 } 1252 v = bmat->a + bmat->i[j] + shift; 1253 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1254 sum += PetscAbsScalar(*v); v++; 1255 } 1256 if (sum > ntemp) ntemp = sum; 1257 } 1258 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1259 } else { 1260 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1261 } 1262 } 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNC__ 1267 #define __FUNC__ "MatTranspose_MPIAIJ" 1268 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1269 { 1270 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1271 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1272 int ierr,shift = Aloc->indexshift; 1273 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1274 Mat B; 1275 Scalar *array; 1276 1277 PetscFunctionBegin; 1278 if (matout == PETSC_NULL && M != N) { 1279 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1280 } 1281 1282 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1283 1284 /* copy over the A part */ 1285 Aloc = (Mat_SeqAIJ*) a->A->data; 1286 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1287 row = a->rstart; 1288 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1289 for ( i=0; i<m; i++ ) { 1290 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1291 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1292 } 1293 aj = Aloc->j; 1294 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1295 1296 /* copy over the B part */ 1297 Aloc = (Mat_SeqAIJ*) a->B->data; 1298 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1299 row = a->rstart; 1300 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols); 1301 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1302 for ( i=0; i<m; i++ ) { 1303 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1304 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1305 } 1306 PetscFree(ct); 1307 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1308 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1309 if (matout != PETSC_NULL) { 1310 *matout = B; 1311 } else { 1312 PetscOps *Abops; 1313 struct _MatOps *Aops; 1314 1315 /* This isn't really an in-place transpose .... but free data structures from a */ 1316 PetscFree(a->rowners); 1317 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1318 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1319 #if defined (PETSC_USE_CTABLE) 1320 if (a->colmap) TableDelete(a->colmap); 1321 #else 1322 if (a->colmap) PetscFree(a->colmap); 1323 #endif 1324 if (a->garray) PetscFree(a->garray); 1325 if (a->lvec) VecDestroy(a->lvec); 1326 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1327 PetscFree(a); 1328 1329 /* 1330 This is horrible, horrible code. We need to keep the 1331 A pointers for the bops and ops but copy everything 1332 else from C. 1333 */ 1334 Abops = A->bops; 1335 Aops = A->ops; 1336 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1337 A->bops = Abops; 1338 A->ops = Aops; 1339 PetscHeaderDestroy(B); 1340 } 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNC__ 1345 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1346 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1347 { 1348 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1349 Mat a = aij->A, b = aij->B; 1350 int ierr,s1,s2,s3; 1351 1352 PetscFunctionBegin; 1353 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1354 if (rr) { 1355 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1356 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1357 /* Overlap communication with computation. */ 1358 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1359 } 1360 if (ll) { 1361 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1362 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1363 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1364 } 1365 /* scale the diagonal block */ 1366 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1367 1368 if (rr) { 1369 /* Do a scatter end and then right scale the off-diagonal block */ 1370 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1371 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1372 } 1373 1374 PetscFunctionReturn(0); 1375 } 1376 1377 1378 #undef __FUNC__ 1379 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1380 int MatPrintHelp_MPIAIJ(Mat A) 1381 { 1382 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1383 int ierr; 1384 1385 PetscFunctionBegin; 1386 if (!a->rank) { 1387 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1388 } 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNC__ 1393 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1394 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1395 { 1396 PetscFunctionBegin; 1397 *bs = 1; 1398 PetscFunctionReturn(0); 1399 } 1400 #undef __FUNC__ 1401 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1402 int MatSetUnfactored_MPIAIJ(Mat A) 1403 { 1404 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1405 int ierr; 1406 1407 PetscFunctionBegin; 1408 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNC__ 1413 #define __FUNC__ "MatEqual_MPIAIJ" 1414 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1415 { 1416 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1417 Mat a, b, c, d; 1418 PetscTruth flg; 1419 int ierr; 1420 1421 PetscFunctionBegin; 1422 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1423 a = matA->A; b = matA->B; 1424 c = matB->A; d = matB->B; 1425 1426 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1427 if (flg == PETSC_TRUE) { 1428 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1429 } 1430 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNC__ 1435 #define __FUNC__ "MatCopy_MPIAIJ" 1436 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1437 { 1438 int ierr; 1439 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1440 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1441 1442 PetscFunctionBegin; 1443 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1444 /* because of the column compression in the off-processor part of the matrix a->B, 1445 the number of columns in a->B and b->B may be different, hence we cannot call 1446 the MatCopy() directly on the two parts. If need be, we can provide a more 1447 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1448 then copying the submatrices */ 1449 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1450 } else { 1451 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1452 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1453 } 1454 PetscFunctionReturn(0); 1455 } 1456 1457 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1458 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1459 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1460 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1461 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1462 1463 /* -------------------------------------------------------------------*/ 1464 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1465 MatGetRow_MPIAIJ, 1466 MatRestoreRow_MPIAIJ, 1467 MatMult_MPIAIJ, 1468 MatMultAdd_MPIAIJ, 1469 MatMultTrans_MPIAIJ, 1470 MatMultTransAdd_MPIAIJ, 1471 0, 1472 0, 1473 0, 1474 0, 1475 0, 1476 0, 1477 MatRelax_MPIAIJ, 1478 MatTranspose_MPIAIJ, 1479 MatGetInfo_MPIAIJ, 1480 MatEqual_MPIAIJ, 1481 MatGetDiagonal_MPIAIJ, 1482 MatDiagonalScale_MPIAIJ, 1483 MatNorm_MPIAIJ, 1484 MatAssemblyBegin_MPIAIJ, 1485 MatAssemblyEnd_MPIAIJ, 1486 0, 1487 MatSetOption_MPIAIJ, 1488 MatZeroEntries_MPIAIJ, 1489 MatZeroRows_MPIAIJ, 1490 0, 1491 0, 1492 0, 1493 0, 1494 MatGetSize_MPIAIJ, 1495 MatGetLocalSize_MPIAIJ, 1496 MatGetOwnershipRange_MPIAIJ, 1497 0, 1498 0, 1499 0, 1500 0, 1501 MatDuplicate_MPIAIJ, 1502 0, 1503 0, 1504 0, 1505 0, 1506 0, 1507 MatGetSubMatrices_MPIAIJ, 1508 MatIncreaseOverlap_MPIAIJ, 1509 MatGetValues_MPIAIJ, 1510 MatCopy_MPIAIJ, 1511 MatPrintHelp_MPIAIJ, 1512 MatScale_MPIAIJ, 1513 0, 1514 0, 1515 0, 1516 MatGetBlockSize_MPIAIJ, 1517 0, 1518 0, 1519 0, 1520 0, 1521 MatFDColoringCreate_MPIAIJ, 1522 0, 1523 MatSetUnfactored_MPIAIJ, 1524 0, 1525 0, 1526 MatGetSubMatrix_MPIAIJ, 1527 0, 1528 0, 1529 MatGetMaps_Petsc}; 1530 1531 /* ----------------------------------------------------------------------------------------*/ 1532 1533 EXTERN_C_BEGIN 1534 #undef __FUNC__ 1535 #define __FUNC__ "MatStoreValues_MPIAIJ" 1536 int MatStoreValues_MPIAIJ(Mat mat) 1537 { 1538 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1539 int ierr; 1540 1541 PetscFunctionBegin; 1542 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1543 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1544 PetscFunctionReturn(0); 1545 } 1546 EXTERN_C_END 1547 1548 EXTERN_C_BEGIN 1549 #undef __FUNC__ 1550 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1551 int MatRetrieveValues_MPIAIJ(Mat mat) 1552 { 1553 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1554 int ierr; 1555 1556 PetscFunctionBegin; 1557 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1558 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 EXTERN_C_END 1562 1563 #include "pc.h" 1564 EXTERN_C_BEGIN 1565 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1566 EXTERN_C_END 1567 1568 #undef __FUNC__ 1569 #define __FUNC__ "MatCreateMPIAIJ" 1570 /*@C 1571 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1572 (the default parallel PETSc format). For good matrix assembly performance 1573 the user should preallocate the matrix storage by setting the parameters 1574 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1575 performance can be increased by more than a factor of 50. 1576 1577 Collective on MPI_Comm 1578 1579 Input Parameters: 1580 + comm - MPI communicator 1581 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1582 This value should be the same as the local size used in creating the 1583 y vector for the matrix-vector product y = Ax. 1584 . n - This value should be the same as the local size used in creating the 1585 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1586 calculated if N is given) For square matrices n is almost always m. 1587 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1588 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1589 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1590 (same value is used for all local rows) 1591 . d_nnz - array containing the number of nonzeros in the various rows of the 1592 DIAGONAL portion of the local submatrix (possibly different for each row) 1593 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1594 The size of this array is equal to the number of local rows, i.e 'm'. 1595 You must leave room for the diagonal entry even if it is zero. 1596 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1597 submatrix (same value is used for all local rows). 1598 - o_nnz - array containing the number of nonzeros in the various rows of the 1599 OFF-DIAGONAL portion of the local submatrix (possibly different for 1600 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1601 structure. The size of this array is equal to the number 1602 of local rows, i.e 'm'. 1603 1604 Output Parameter: 1605 . A - the matrix 1606 1607 Notes: 1608 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1609 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1610 storage requirements for this matrix. 1611 1612 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1613 processor than it must be used on all processors that share the object for 1614 that argument. 1615 1616 The AIJ format (also called the Yale sparse matrix format or 1617 compressed row storage), is fully compatible with standard Fortran 77 1618 storage. That is, the stored row and column indices can begin at 1619 either one (as in Fortran) or zero. See the users manual for details. 1620 1621 The user MUST specify either the local or global matrix dimensions 1622 (possibly both). 1623 1624 The parallel matrix is partitioned such that the first m0 rows belong to 1625 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1626 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1627 1628 The DIAGONAL portion of the local submatrix of a processor can be defined 1629 as the submatrix which is obtained by extraction the part corresponding 1630 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1631 first row that belongs to the processor, and r2 is the last row belonging 1632 to the this processor. This is a square mxm matrix. The remaining portion 1633 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1634 1635 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1636 1637 By default, this format uses inodes (identical nodes) when possible. 1638 We search for consecutive rows with the same nonzero structure, thereby 1639 reusing matrix information to achieve increased efficiency. 1640 1641 Options Database Keys: 1642 + -mat_aij_no_inode - Do not use inodes 1643 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1644 - -mat_aij_oneindex - Internally use indexing starting at 1 1645 rather than 0. Note that when calling MatSetValues(), 1646 the user still MUST index entries starting at 0! 1647 . -mat_mpi - use the parallel matrix data structures even on one processor 1648 (defaults to using SeqBAIJ format on one processor) 1649 . -mat_mpi - use the parallel matrix data structures even on one processor 1650 (defaults to using SeqAIJ format on one processor) 1651 1652 1653 Example usage: 1654 1655 Consider the following 8x8 matrix with 34 non-zero values, that is 1656 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1657 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1658 as follows: 1659 1660 .vb 1661 1 2 0 | 0 3 0 | 0 4 1662 Proc0 0 5 6 | 7 0 0 | 8 0 1663 9 0 10 | 11 0 0 | 12 0 1664 ------------------------------------- 1665 13 0 14 | 15 16 17 | 0 0 1666 Proc1 0 18 0 | 19 20 21 | 0 0 1667 0 0 0 | 22 23 0 | 24 0 1668 ------------------------------------- 1669 Proc2 25 26 27 | 0 0 28 | 29 0 1670 30 0 0 | 31 32 33 | 0 34 1671 .ve 1672 1673 This can be represented as a collection of submatrices as: 1674 1675 .vb 1676 A B C 1677 D E F 1678 G H I 1679 .ve 1680 1681 Where the submatrices A,B,C are owned by proc0, D,E,F are 1682 owned by proc1, G,H,I are owned by proc2. 1683 1684 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1685 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1686 The 'M','N' parameters are 8,8, and have the same values on all procs. 1687 1688 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1689 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1690 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1691 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1692 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1693 matrix, ans [DF] as another SeqAIJ matrix. 1694 1695 When d_nz, o_nz parameters are specified, d_nz storage elements are 1696 allocated for every row of the local diagonal submatrix, and o_nz 1697 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1698 One way to choose d_nz and o_nz is to use the max nonzerors per local 1699 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1700 In this case, the values of d_nz,o_nz are: 1701 .vb 1702 proc0 : dnz = 2, o_nz = 2 1703 proc1 : dnz = 3, o_nz = 2 1704 proc2 : dnz = 1, o_nz = 4 1705 .ve 1706 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1707 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1708 for proc3. i.e we are using 12+15+10=37 storage locations to store 1709 34 values. 1710 1711 When d_nnz, o_nnz parameters are specified, the storage is specified 1712 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1713 In the above case the values for d_nnz,o_nnz are: 1714 .vb 1715 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1716 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1717 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1718 .ve 1719 Here the space allocated is sum of all the above values i.e 34, and 1720 hence pre-allocation is perfect. 1721 1722 Level: intermediate 1723 1724 .keywords: matrix, aij, compressed row, sparse, parallel 1725 1726 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1727 @*/ 1728 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1729 { 1730 Mat B; 1731 Mat_MPIAIJ *b; 1732 int ierr, i,size,flag1 = 0, flag2 = 0; 1733 1734 PetscFunctionBegin; 1735 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1736 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr); 1737 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1738 if (!flag1 && !flag2 && size == 1) { 1739 if (M == PETSC_DECIDE) M = m; 1740 if (N == PETSC_DECIDE) N = n; 1741 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1742 PetscFunctionReturn(0); 1743 } 1744 1745 *A = 0; 1746 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1747 PLogObjectCreate(B); 1748 B->data = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b); 1749 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1750 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1751 B->ops->destroy = MatDestroy_MPIAIJ; 1752 B->ops->view = MatView_MPIAIJ; 1753 B->factor = 0; 1754 B->assembled = PETSC_FALSE; 1755 B->mapping = 0; 1756 1757 B->insertmode = NOT_SET_VALUES; 1758 b->size = size; 1759 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1760 1761 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1762 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1763 } 1764 1765 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1766 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1767 b->m = m; B->m = m; 1768 b->n = n; B->n = n; 1769 b->N = N; B->N = N; 1770 b->M = M; B->M = M; 1771 1772 /* the information in the maps duplicates the information computed below, eventually 1773 we should remove the duplicate information that is not contained in the maps */ 1774 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1775 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1776 1777 /* build local table of row and column ownerships */ 1778 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1779 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1780 b->cowners = b->rowners + b->size + 2; 1781 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1782 b->rowners[0] = 0; 1783 for ( i=2; i<=b->size; i++ ) { 1784 b->rowners[i] += b->rowners[i-1]; 1785 } 1786 b->rstart = b->rowners[b->rank]; 1787 b->rend = b->rowners[b->rank+1]; 1788 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1789 b->cowners[0] = 0; 1790 for ( i=2; i<=b->size; i++ ) { 1791 b->cowners[i] += b->cowners[i-1]; 1792 } 1793 b->cstart = b->cowners[b->rank]; 1794 b->cend = b->cowners[b->rank+1]; 1795 1796 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1797 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1798 PLogObjectParent(B,b->A); 1799 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1800 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1801 PLogObjectParent(B,b->B); 1802 1803 /* build cache for off array entries formed */ 1804 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 1805 b->donotstash = 0; 1806 b->colmap = 0; 1807 b->garray = 0; 1808 b->roworiented = 1; 1809 1810 /* stuff used for matrix vector multiply */ 1811 b->lvec = 0; 1812 b->Mvctx = 0; 1813 1814 /* stuff for MatGetRow() */ 1815 b->rowindices = 0; 1816 b->rowvalues = 0; 1817 b->getrowactive = PETSC_FALSE; 1818 1819 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1820 "MatStoreValues_MPIAIJ", 1821 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1822 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1823 "MatRetrieveValues_MPIAIJ", 1824 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1825 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 1826 "MatGetDiagonalBlock_MPIAIJ", 1827 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1828 *A = B; 1829 PetscFunctionReturn(0); 1830 } 1831 1832 #undef __FUNC__ 1833 #define __FUNC__ "MatDuplicate_MPIAIJ" 1834 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1835 { 1836 Mat mat; 1837 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1838 int ierr, len=0, flg; 1839 1840 PetscFunctionBegin; 1841 *newmat = 0; 1842 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1843 PLogObjectCreate(mat); 1844 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a); 1845 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1846 mat->ops->destroy = MatDestroy_MPIAIJ; 1847 mat->ops->view = MatView_MPIAIJ; 1848 mat->factor = matin->factor; 1849 mat->assembled = PETSC_TRUE; 1850 mat->insertmode = NOT_SET_VALUES; 1851 1852 a->m = mat->m = oldmat->m; 1853 a->n = mat->n = oldmat->n; 1854 a->M = mat->M = oldmat->M; 1855 a->N = mat->N = oldmat->N; 1856 1857 a->rstart = oldmat->rstart; 1858 a->rend = oldmat->rend; 1859 a->cstart = oldmat->cstart; 1860 a->cend = oldmat->cend; 1861 a->size = oldmat->size; 1862 a->rank = oldmat->rank; 1863 a->donotstash = oldmat->donotstash; 1864 a->roworiented = oldmat->roworiented; 1865 a->rowindices = 0; 1866 a->rowvalues = 0; 1867 a->getrowactive = PETSC_FALSE; 1868 1869 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1870 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1871 a->cowners = a->rowners + a->size + 2; 1872 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1873 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1874 if (oldmat->colmap) { 1875 #if defined (PETSC_USE_CTABLE) 1876 ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1877 #else 1878 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1879 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1880 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr); 1881 #endif 1882 } else a->colmap = 0; 1883 if (oldmat->garray) { 1884 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1885 a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray); 1886 PLogObjectMemory(mat,len*sizeof(int)); 1887 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1888 } else a->garray = 0; 1889 1890 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1891 PLogObjectParent(mat,a->lvec); 1892 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1893 PLogObjectParent(mat,a->Mvctx); 1894 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1895 PLogObjectParent(mat,a->A); 1896 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1897 PLogObjectParent(mat,a->B); 1898 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1899 if (flg) { 1900 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1901 } 1902 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1903 *newmat = mat; 1904 PetscFunctionReturn(0); 1905 } 1906 1907 #include "sys.h" 1908 1909 #undef __FUNC__ 1910 #define __FUNC__ "MatLoad_MPIAIJ" 1911 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1912 { 1913 Mat A; 1914 Scalar *vals,*svals; 1915 MPI_Comm comm = ((PetscObject)viewer)->comm; 1916 MPI_Status status; 1917 int i, nz, ierr, j,rstart, rend, fd; 1918 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1919 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1920 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1921 1922 PetscFunctionBegin; 1923 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1924 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1925 if (!rank) { 1926 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1927 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1928 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1929 if (header[3] < 0) { 1930 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1931 } 1932 } 1933 1934 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1935 M = header[1]; N = header[2]; 1936 /* determine ownership of all rows */ 1937 m = M/size + ((M % size) > rank); 1938 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1939 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1940 rowners[0] = 0; 1941 for ( i=2; i<=size; i++ ) { 1942 rowners[i] += rowners[i-1]; 1943 } 1944 rstart = rowners[rank]; 1945 rend = rowners[rank+1]; 1946 1947 /* distribute row lengths to all processors */ 1948 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1949 offlens = ourlens + (rend-rstart); 1950 if (!rank) { 1951 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1952 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1953 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1954 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1955 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1956 PetscFree(sndcounts); 1957 } else { 1958 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1959 } 1960 1961 if (!rank) { 1962 /* calculate the number of nonzeros on each processor */ 1963 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1964 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1965 for ( i=0; i<size; i++ ) { 1966 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1967 procsnz[i] += rowlengths[j]; 1968 } 1969 } 1970 PetscFree(rowlengths); 1971 1972 /* determine max buffer needed and allocate it */ 1973 maxnz = 0; 1974 for ( i=0; i<size; i++ ) { 1975 maxnz = PetscMax(maxnz,procsnz[i]); 1976 } 1977 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1978 1979 /* read in my part of the matrix column indices */ 1980 nz = procsnz[0]; 1981 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1982 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1983 1984 /* read in every one elses and ship off */ 1985 for ( i=1; i<size; i++ ) { 1986 nz = procsnz[i]; 1987 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1988 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1989 } 1990 PetscFree(cols); 1991 } else { 1992 /* determine buffer space needed for message */ 1993 nz = 0; 1994 for ( i=0; i<m; i++ ) { 1995 nz += ourlens[i]; 1996 } 1997 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1998 1999 /* receive message of column indices*/ 2000 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2001 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2002 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2003 } 2004 2005 /* determine column ownership if matrix is not square */ 2006 if (N != M) { 2007 n = N/size + ((N % size) > rank); 2008 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2009 cstart = cend - n; 2010 } else { 2011 cstart = rstart; 2012 cend = rend; 2013 n = cend - cstart; 2014 } 2015 2016 /* loop over local rows, determining number of off diagonal entries */ 2017 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2018 jj = 0; 2019 for ( i=0; i<m; i++ ) { 2020 for ( j=0; j<ourlens[i]; j++ ) { 2021 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2022 jj++; 2023 } 2024 } 2025 2026 /* create our matrix */ 2027 for ( i=0; i<m; i++ ) { 2028 ourlens[i] -= offlens[i]; 2029 } 2030 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2031 A = *newmat; 2032 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2033 for ( i=0; i<m; i++ ) { 2034 ourlens[i] += offlens[i]; 2035 } 2036 2037 if (!rank) { 2038 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 2039 2040 /* read in my part of the matrix numerical values */ 2041 nz = procsnz[0]; 2042 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2043 2044 /* insert into matrix */ 2045 jj = rstart; 2046 smycols = mycols; 2047 svals = vals; 2048 for ( i=0; i<m; i++ ) { 2049 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2050 smycols += ourlens[i]; 2051 svals += ourlens[i]; 2052 jj++; 2053 } 2054 2055 /* read in other processors and ship out */ 2056 for ( i=1; i<size; i++ ) { 2057 nz = procsnz[i]; 2058 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2059 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2060 } 2061 PetscFree(procsnz); 2062 } else { 2063 /* receive numeric values */ 2064 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 2065 2066 /* receive message of values*/ 2067 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2068 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2069 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2070 2071 /* insert into matrix */ 2072 jj = rstart; 2073 smycols = mycols; 2074 svals = vals; 2075 for ( i=0; i<m; i++ ) { 2076 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2077 smycols += ourlens[i]; 2078 svals += ourlens[i]; 2079 jj++; 2080 } 2081 } 2082 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2083 2084 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2085 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2086 PetscFunctionReturn(0); 2087 } 2088 2089 #undef __FUNC__ 2090 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2091 /* 2092 Not great since it makes two copies of the submatrix, first an SeqAIJ 2093 in local and then by concatenating the local matrices the end result. 2094 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2095 */ 2096 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2097 { 2098 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2099 Mat *local,M, Mreuse; 2100 Scalar *vwork,*aa; 2101 MPI_Comm comm = mat->comm; 2102 Mat_SeqAIJ *aij; 2103 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2104 2105 PetscFunctionBegin; 2106 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2107 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2108 2109 if (call == MAT_REUSE_MATRIX) { 2110 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2111 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2112 local = &Mreuse; 2113 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2114 } else { 2115 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2116 Mreuse = *local; 2117 PetscFree(local); 2118 } 2119 2120 /* 2121 m - number of local rows 2122 n - number of columns (same on all processors) 2123 rstart - first row in new global matrix generated 2124 */ 2125 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2126 if (call == MAT_INITIAL_MATRIX) { 2127 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2128 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2129 ii = aij->i; 2130 jj = aij->j; 2131 2132 /* 2133 Determine the number of non-zeros in the diagonal and off-diagonal 2134 portions of the matrix in order to do correct preallocation 2135 */ 2136 2137 /* first get start and end of "diagonal" columns */ 2138 if (csize == PETSC_DECIDE) { 2139 nlocal = n/size + ((n % size) > rank); 2140 } else { 2141 nlocal = csize; 2142 } 2143 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2144 rstart = rend - nlocal; 2145 if (rank == size - 1 && rend != n) { 2146 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2147 } 2148 2149 /* next, compute all the lengths */ 2150 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2151 olens = dlens + m; 2152 for ( i=0; i<m; i++ ) { 2153 jend = ii[i+1] - ii[i]; 2154 olen = 0; 2155 dlen = 0; 2156 for ( j=0; j<jend; j++ ) { 2157 if ( *jj < rstart || *jj >= rend) olen++; 2158 else dlen++; 2159 jj++; 2160 } 2161 olens[i] = olen; 2162 dlens[i] = dlen; 2163 } 2164 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2165 PetscFree(dlens); 2166 } else { 2167 int ml,nl; 2168 2169 M = *newmat; 2170 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2171 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2172 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2173 /* 2174 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2175 rather than the slower MatSetValues(). 2176 */ 2177 M->was_assembled = PETSC_TRUE; 2178 M->assembled = PETSC_FALSE; 2179 } 2180 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2181 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2182 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2183 ii = aij->i; 2184 jj = aij->j; 2185 aa = aij->a; 2186 for (i=0; i<m; i++) { 2187 row = rstart + i; 2188 nz = ii[i+1] - ii[i]; 2189 cwork = jj; jj += nz; 2190 vwork = aa; aa += nz; 2191 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2192 } 2193 2194 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2195 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2196 *newmat = M; 2197 2198 /* save submatrix used in processor for next request */ 2199 if (call == MAT_INITIAL_MATRIX) { 2200 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2201 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2202 } 2203 2204 PetscFunctionReturn(0); 2205 } 2206