1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.296 1999/06/10 16:08:15 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 extern int MatSetUpMultiply_MPIAIJ(Mat); 10 extern int DisAssemble_MPIAIJ(Mat); 11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatPrintHelp_SeqAIJ(Mat); 15 16 /* local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 26 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 27 int n = B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = TableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr); 32 for ( i=0; i<n; i++ ){ 33 ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 37 PLogObjectMemory(mat,aij->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr); 39 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for ( _i=0; _i<nrow; _i++ ) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 71 Scalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 77 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 78 new_j = (int *) (new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 83 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(Scalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 94 if (!a->singlemalloc) { \ 95 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 96 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 97 } \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = 1; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for ( ii=N; ii>=_i; ii-- ) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for ( _i=0; _i<nrow; _i++ ) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 145 Scalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 151 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 152 new_j = (int *) (new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 157 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(Scalar));CHKERRQ(ierr); \ 165 /* free up old matrix storage */ \ 166 \ 167 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 168 if (!b->singlemalloc) { \ 169 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 170 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 171 } \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = 1; \ 174 \ 175 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 176 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 177 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 178 b->maxnz += CHUNKSIZE; \ 179 b->reallocs++; \ 180 } \ 181 N = nrow++ - 1; b->nz++; \ 182 /* shift up all the later entries in this row */ \ 183 for ( ii=N; ii>=_i; ii-- ) { \ 184 rp[ii+1] = rp[ii]; \ 185 ap[ii+1] = ap[ii]; \ 186 } \ 187 rp[_i] = col1; \ 188 ap[_i] = value; \ 189 b_noinsert: ; \ 190 bilen[row] = nrow; \ 191 } 192 193 #undef __FUNC__ 194 #define __FUNC__ "MatSetValues_MPIAIJ" 195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 196 { 197 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 198 Scalar value; 199 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 200 int cstart = aij->cstart, cend = aij->cend,row,col; 201 int roworiented = aij->roworiented; 202 203 /* Some Variables required in the macro */ 204 Mat A = aij->A; 205 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 206 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 207 Scalar *aa = a->a; 208 209 Mat B = aij->B; 210 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 211 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 212 Scalar *ba = b->a; 213 214 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 215 int nonew = a->nonew,shift = a->indexshift; 216 Scalar *ap; 217 218 PetscFunctionBegin; 219 for ( i=0; i<m; i++ ) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_BOPT_g) 222 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 223 #endif 224 if (im[i] >= rstart && im[i] < rend) { 225 row = im[i] - rstart; 226 for ( j=0; j<n; j++ ) { 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } 233 else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_BOPT_g) 235 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!aij->colmap) { 240 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = TableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 244 col--; 245 #else 246 col = aij->colmap[in[j]] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 249 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 252 B = aij->B; 253 b = (Mat_SeqAIJ *) B->data; 254 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 255 ba = b->a; 256 } 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNC__ 277 #define __FUNC__ "MatGetValues_MPIAIJ" 278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 279 { 280 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 281 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 282 int cstart = aij->cstart, cend = aij->cend,row,col; 283 284 PetscFunctionBegin; 285 for ( i=0; i<m; i++ ) { 286 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 287 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 288 if (idxm[i] >= rstart && idxm[i] < rend) { 289 row = idxm[i] - rstart; 290 for ( j=0; j<n; j++ ) { 291 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 292 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 293 if (idxn[j] >= cstart && idxn[j] < cend){ 294 col = idxn[j] - cstart; 295 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 296 } else { 297 if (!aij->colmap) { 298 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 299 } 300 #if defined (PETSC_USE_CTABLE) 301 ierr = TableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 302 col --; 303 #else 304 col = aij->colmap[idxn[j]] - 1; 305 #endif 306 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 307 else { 308 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 309 } 310 } 311 } 312 } else { 313 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 314 } 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNC__ 320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 322 { 323 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 324 int ierr,nstash,reallocs; 325 InsertMode addv; 326 327 PetscFunctionBegin; 328 if (aij->donotstash) { 329 PetscFunctionReturn(0); 330 } 331 332 /* make sure all processors are either in INSERTMODE or ADDMODE */ 333 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 334 if (addv == (ADD_VALUES|INSERT_VALUES)) { 335 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 336 } 337 mat->insertmode = addv; /* in case this processor had no cache */ 338 339 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 340 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 341 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 342 PetscFunctionReturn(0); 343 } 344 345 346 #undef __FUNC__ 347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 349 { 350 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 351 int i,j,rstart,ncols,n,ierr,flg; 352 int *row,*col,other_disassembled; 353 Scalar *val; 354 InsertMode addv = mat->insertmode; 355 356 PetscFunctionBegin; 357 if (!aij->donotstash) { 358 while (1) { 359 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 360 if (!flg) break; 361 362 for ( i=0; i<n; ) { 363 /* Now identify the consecutive vals belonging to the same row */ 364 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 365 if (j < n) ncols = j-i; 366 else ncols = n-i; 367 /* Now assemble all these values with a single function call */ 368 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 369 i = j; 370 } 371 } 372 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 373 } 374 375 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 376 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 377 378 /* determine if any processor has disassembled, if so we must 379 also disassemble ourselfs, in order that we may reassemble. */ 380 /* 381 if nonzero structure of submatrix B cannot change then we know that 382 no processor disassembled thus we can skip this stuff 383 */ 384 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 385 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 386 if (mat->was_assembled && !other_disassembled) { 387 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 388 } 389 } 390 391 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 392 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 393 } 394 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 395 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 396 397 if (aij->rowvalues) { 398 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 399 aij->rowvalues = 0; 400 } 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNC__ 405 #define __FUNC__ "MatZeroEntries_MPIAIJ" 406 int MatZeroEntries_MPIAIJ(Mat A) 407 { 408 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 413 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNC__ 418 #define __FUNC__ "MatZeroRows_MPIAIJ" 419 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 420 { 421 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 422 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 423 int *procs,*nprocs,j,found,idx,nsends,*work,row; 424 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 425 int *rvalues,tag = A->tag,count,base,slen,n,*source; 426 int *lens,imdex,*lrows,*values,rstart=l->rstart; 427 MPI_Comm comm = A->comm; 428 MPI_Request *send_waits,*recv_waits; 429 MPI_Status recv_status,*send_status; 430 IS istmp; 431 432 PetscFunctionBegin; 433 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 434 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 435 436 /* first count number of contributors to each processor */ 437 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 438 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 439 procs = nprocs + size; 440 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 441 for ( i=0; i<N; i++ ) { 442 idx = rows[i]; 443 found = 0; 444 for ( j=0; j<size; j++ ) { 445 if (idx >= owners[j] && idx < owners[j+1]) { 446 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 447 } 448 } 449 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 450 } 451 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 452 453 /* inform other processors of number of messages and max length*/ 454 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 455 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 456 nrecvs = work[rank]; 457 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 458 nmax = work[rank]; 459 ierr = PetscFree(work);CHKERRQ(ierr); 460 461 /* post receives: */ 462 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 463 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 464 for ( i=0; i<nrecvs; i++ ) { 465 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 466 } 467 468 /* do sends: 469 1) starts[i] gives the starting index in svalues for stuff going to 470 the ith processor 471 */ 472 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 473 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 474 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 475 starts[0] = 0; 476 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 477 for ( i=0; i<N; i++ ) { 478 svalues[starts[owner[i]]++] = rows[i]; 479 } 480 ISRestoreIndices(is,&rows); 481 482 starts[0] = 0; 483 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 484 count = 0; 485 for ( i=0; i<size; i++ ) { 486 if (procs[i]) { 487 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 488 } 489 } 490 ierr = PetscFree(starts);CHKERRQ(ierr); 491 492 base = owners[rank]; 493 494 /* wait on receives */ 495 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 496 source = lens + nrecvs; 497 count = nrecvs; slen = 0; 498 while (count) { 499 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 500 /* unpack receives into our local space */ 501 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 502 source[imdex] = recv_status.MPI_SOURCE; 503 lens[imdex] = n; 504 slen += n; 505 count--; 506 } 507 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 508 509 /* move the data into the send scatter */ 510 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 511 count = 0; 512 for ( i=0; i<nrecvs; i++ ) { 513 values = rvalues + i*nmax; 514 for ( j=0; j<lens[i]; j++ ) { 515 lrows[count++] = values[j] - base; 516 } 517 } 518 ierr = PetscFree(rvalues);CHKERRQ(ierr); 519 ierr = PetscFree(lens);CHKERRQ(ierr); 520 ierr = PetscFree(owner);CHKERRQ(ierr); 521 ierr = PetscFree(nprocs);CHKERRQ(ierr); 522 523 /* actually zap the local rows */ 524 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 525 PLogObjectParent(A,istmp); 526 527 /* 528 Zero the required rows. If the "diagonal block" of the matrix 529 is square and the user wishes to set the diagonal we use seperate 530 code so that MatSetValues() is not called for each diagonal allocating 531 new memory, thus calling lots of mallocs and slowing things down. 532 533 Contributed by: Mathew Knepley 534 */ 535 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 536 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 537 if (diag && (l->A->M == l->A->N)) { 538 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 539 } else if (diag) { 540 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 541 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 542 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 544 } 545 for ( i = 0; i < slen; i++ ) { 546 row = lrows[i] + rstart; 547 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 548 } 549 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 } else { 552 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 553 } 554 ierr = ISDestroy(istmp);CHKERRQ(ierr); 555 ierr = PetscFree(lrows);CHKERRQ(ierr); 556 557 /* wait on sends */ 558 if (nsends) { 559 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 560 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 561 ierr = PetscFree(send_status);CHKERRQ(ierr); 562 } 563 ierr = PetscFree(send_waits);CHKERRQ(ierr); 564 ierr = PetscFree(svalues);CHKERRQ(ierr); 565 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNC__ 570 #define __FUNC__ "MatMult_MPIAIJ" 571 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 572 { 573 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 574 int ierr,nt; 575 576 PetscFunctionBegin; 577 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 578 if (nt != a->n) { 579 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 580 } 581 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 582 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 583 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 584 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNC__ 589 #define __FUNC__ "MatMultAdd_MPIAIJ" 590 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 591 { 592 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 593 int ierr; 594 595 PetscFunctionBegin; 596 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 597 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 598 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 600 PetscFunctionReturn(0); 601 } 602 603 #undef __FUNC__ 604 #define __FUNC__ "MatMultTrans_MPIAIJ" 605 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 606 { 607 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 608 int ierr; 609 610 PetscFunctionBegin; 611 /* do nondiagonal part */ 612 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 613 /* send it on its way */ 614 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 615 /* do local part */ 616 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 617 /* receive remote parts: note this assumes the values are not actually */ 618 /* inserted in yy until the next line, which is true for my implementation*/ 619 /* but is not perhaps always true. */ 620 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNC__ 625 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 626 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 627 { 628 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 629 int ierr; 630 631 PetscFunctionBegin; 632 /* do nondiagonal part */ 633 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 634 /* send it on its way */ 635 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 636 /* do local part */ 637 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 638 /* receive remote parts: note this assumes the values are not actually */ 639 /* inserted in yy until the next line, which is true for my implementation*/ 640 /* but is not perhaps always true. */ 641 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 642 PetscFunctionReturn(0); 643 } 644 645 /* 646 This only works correctly for square matrices where the subblock A->A is the 647 diagonal block 648 */ 649 #undef __FUNC__ 650 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 651 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 652 { 653 int ierr; 654 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 655 656 PetscFunctionBegin; 657 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 658 if (a->rstart != a->cstart || a->rend != a->cend) { 659 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 660 } 661 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 662 PetscFunctionReturn(0); 663 } 664 665 #undef __FUNC__ 666 #define __FUNC__ "MatScale_MPIAIJ" 667 int MatScale_MPIAIJ(Scalar *aa,Mat A) 668 { 669 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 670 int ierr; 671 672 PetscFunctionBegin; 673 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 674 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 675 PetscFunctionReturn(0); 676 } 677 678 #undef __FUNC__ 679 #define __FUNC__ "MatDestroy_MPIAIJ" 680 int MatDestroy_MPIAIJ(Mat mat) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 683 int ierr; 684 685 PetscFunctionBegin; 686 if (--mat->refct > 0) PetscFunctionReturn(0); 687 688 if (mat->mapping) { 689 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 690 } 691 if (mat->bmapping) { 692 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 693 } 694 if (mat->rmap) { 695 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 696 } 697 if (mat->cmap) { 698 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 699 } 700 #if defined(PETSC_USE_LOG) 701 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 702 #endif 703 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 704 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 705 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 706 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 707 #if defined (PETSC_USE_CTABLE) 708 if (aij->colmap) TableDelete(aij->colmap); 709 #else 710 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 711 #endif 712 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 713 if (aij->lvec) VecDestroy(aij->lvec); 714 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 715 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 716 ierr = PetscFree(aij);CHKERRQ(ierr); 717 PLogObjectDestroy(mat); 718 PetscHeaderDestroy(mat); 719 PetscFunctionReturn(0); 720 } 721 722 #undef __FUNC__ 723 #define __FUNC__ "MatView_MPIAIJ_Binary" 724 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 725 { 726 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 727 int ierr; 728 729 PetscFunctionBegin; 730 if (aij->size == 1) { 731 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 732 } 733 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 734 PetscFunctionReturn(0); 735 } 736 737 #undef __FUNC__ 738 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 739 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 740 { 741 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 742 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 743 int ierr, format,shift = C->indexshift,rank = aij->rank ; 744 int size = aij->size; 745 FILE *fd; 746 ViewerType vtype; 747 748 PetscFunctionBegin; 749 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 750 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 751 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 752 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 753 MatInfo info; 754 int flg; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 757 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 758 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 759 PetscSequentialPhaseBegin(mat->comm,1); 760 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 761 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 762 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 763 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 764 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 765 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 766 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 767 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 768 fflush(fd); 769 PetscSequentialPhaseEnd(mat->comm,1); 770 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 771 PetscFunctionReturn(0); 772 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 773 PetscFunctionReturn(0); 774 } 775 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 776 Draw draw; 777 PetscTruth isnull; 778 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 779 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 780 } 781 782 if (size == 1) { 783 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 784 } else { 785 /* assemble the entire matrix onto first processor. */ 786 Mat A; 787 Mat_SeqAIJ *Aloc; 788 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 789 Scalar *a; 790 791 if (!rank) { 792 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 793 } else { 794 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 795 } 796 PLogObjectParent(mat,A); 797 798 /* copy over the A part */ 799 Aloc = (Mat_SeqAIJ*) aij->A->data; 800 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 801 row = aij->rstart; 802 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 803 for ( i=0; i<m; i++ ) { 804 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 805 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 806 } 807 aj = Aloc->j; 808 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 809 810 /* copy over the B part */ 811 Aloc = (Mat_SeqAIJ*) aij->B->data; 812 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 813 row = aij->rstart; 814 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols); 815 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 816 for ( i=0; i<m; i++ ) { 817 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 818 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 819 } 820 ierr = PetscFree(ct);CHKERRQ(ierr); 821 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 /* 824 Everyone has to call to draw the matrix since the graphics waits are 825 synchronized across all processors that share the Draw object 826 */ 827 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 828 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 829 } 830 ierr = MatDestroy(A);CHKERRQ(ierr); 831 } 832 PetscFunctionReturn(0); 833 } 834 835 #undef __FUNC__ 836 #define __FUNC__ "MatView_MPIAIJ" 837 int MatView_MPIAIJ(Mat mat,Viewer viewer) 838 { 839 int ierr; 840 ViewerType vtype; 841 842 PetscFunctionBegin; 843 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 844 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 845 PetscTypeCompare(vtype,SOCKET_VIEWER) || PetscTypeCompare(vtype,BINARY_VIEWER)) { 846 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 847 /* 848 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 849 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 850 */ 851 } else { 852 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 853 } 854 PetscFunctionReturn(0); 855 } 856 857 /* 858 This has to provide several versions. 859 860 2) a) use only local smoothing updating outer values only once. 861 b) local smoothing updating outer values each inner iteration 862 3) color updating out values betwen colors. 863 */ 864 #undef __FUNC__ 865 #define __FUNC__ "MatRelax_MPIAIJ" 866 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 867 double fshift,int its,Vec xx) 868 { 869 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 870 Mat AA = mat->A, BB = mat->B; 871 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 872 Scalar *b,*x,*xs,*ls,d,*v,sum; 873 int ierr,*idx, *diag; 874 int n = mat->n, m = mat->m, i,shift = A->indexshift; 875 876 PetscFunctionBegin; 877 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 878 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 879 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 880 xs = x + shift; /* shift by one for index start of 1 */ 881 ls = ls + shift; 882 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);} 883 diag = A->diag; 884 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 885 if (flag & SOR_ZERO_INITIAL_GUESS) { 886 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 887 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 888 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 889 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 893 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 894 while (its--) { 895 /* go down through the rows */ 896 for ( i=0; i<m; i++ ) { 897 n = A->i[i+1] - A->i[i]; 898 PLogFlops(4*n+3); 899 idx = A->j + A->i[i] + shift; 900 v = A->a + A->i[i] + shift; 901 sum = b[i]; 902 SPARSEDENSEMDOT(sum,xs,v,idx,n); 903 d = fshift + A->a[diag[i]+shift]; 904 n = B->i[i+1] - B->i[i]; 905 idx = B->j + B->i[i] + shift; 906 v = B->a + B->i[i] + shift; 907 SPARSEDENSEMDOT(sum,ls,v,idx,n); 908 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 909 } 910 /* come up through the rows */ 911 for ( i=m-1; i>-1; i-- ) { 912 n = A->i[i+1] - A->i[i]; 913 PLogFlops(4*n+3) 914 idx = A->j + A->i[i] + shift; 915 v = A->a + A->i[i] + shift; 916 sum = b[i]; 917 SPARSEDENSEMDOT(sum,xs,v,idx,n); 918 d = fshift + A->a[diag[i]+shift]; 919 n = B->i[i+1] - B->i[i]; 920 idx = B->j + B->i[i] + shift; 921 v = B->a + B->i[i] + shift; 922 SPARSEDENSEMDOT(sum,ls,v,idx,n); 923 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 924 } 925 } 926 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 927 if (flag & SOR_ZERO_INITIAL_GUESS) { 928 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 929 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 930 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 931 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 932 PetscFunctionReturn(0); 933 } 934 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 935 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 936 while (its--) { 937 for ( i=0; i<m; i++ ) { 938 n = A->i[i+1] - A->i[i]; 939 PLogFlops(4*n+3); 940 idx = A->j + A->i[i] + shift; 941 v = A->a + A->i[i] + shift; 942 sum = b[i]; 943 SPARSEDENSEMDOT(sum,xs,v,idx,n); 944 d = fshift + A->a[diag[i]+shift]; 945 n = B->i[i+1] - B->i[i]; 946 idx = B->j + B->i[i] + shift; 947 v = B->a + B->i[i] + shift; 948 SPARSEDENSEMDOT(sum,ls,v,idx,n); 949 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 950 } 951 } 952 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 953 if (flag & SOR_ZERO_INITIAL_GUESS) { 954 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 955 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 956 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 957 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 961 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 962 while (its--) { 963 for ( i=m-1; i>-1; i-- ) { 964 n = A->i[i+1] - A->i[i]; 965 PLogFlops(4*n+3); 966 idx = A->j + A->i[i] + shift; 967 v = A->a + A->i[i] + shift; 968 sum = b[i]; 969 SPARSEDENSEMDOT(sum,xs,v,idx,n); 970 d = fshift + A->a[diag[i]+shift]; 971 n = B->i[i+1] - B->i[i]; 972 idx = B->j + B->i[i] + shift; 973 v = B->a + B->i[i] + shift; 974 SPARSEDENSEMDOT(sum,ls,v,idx,n); 975 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 976 } 977 } 978 } else { 979 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 980 } 981 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 982 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 983 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 #undef __FUNC__ 988 #define __FUNC__ "MatGetInfo_MPIAIJ" 989 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 990 { 991 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 992 Mat A = mat->A, B = mat->B; 993 int ierr; 994 double isend[5], irecv[5]; 995 996 PetscFunctionBegin; 997 info->block_size = 1.0; 998 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 999 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1000 isend[3] = info->memory; isend[4] = info->mallocs; 1001 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1002 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1003 isend[3] += info->memory; isend[4] += info->mallocs; 1004 if (flag == MAT_LOCAL) { 1005 info->nz_used = isend[0]; 1006 info->nz_allocated = isend[1]; 1007 info->nz_unneeded = isend[2]; 1008 info->memory = isend[3]; 1009 info->mallocs = isend[4]; 1010 } else if (flag == MAT_GLOBAL_MAX) { 1011 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1012 info->nz_used = irecv[0]; 1013 info->nz_allocated = irecv[1]; 1014 info->nz_unneeded = irecv[2]; 1015 info->memory = irecv[3]; 1016 info->mallocs = irecv[4]; 1017 } else if (flag == MAT_GLOBAL_SUM) { 1018 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1019 info->nz_used = irecv[0]; 1020 info->nz_allocated = irecv[1]; 1021 info->nz_unneeded = irecv[2]; 1022 info->memory = irecv[3]; 1023 info->mallocs = irecv[4]; 1024 } 1025 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1026 info->fill_ratio_needed = 0; 1027 info->factor_mallocs = 0; 1028 info->rows_global = (double)mat->M; 1029 info->columns_global = (double)mat->N; 1030 info->rows_local = (double)mat->m; 1031 info->columns_local = (double)mat->N; 1032 1033 PetscFunctionReturn(0); 1034 } 1035 1036 #undef __FUNC__ 1037 #define __FUNC__ "MatSetOption_MPIAIJ" 1038 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1039 { 1040 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1041 int ierr; 1042 1043 PetscFunctionBegin; 1044 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1045 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1046 op == MAT_COLUMNS_UNSORTED || 1047 op == MAT_COLUMNS_SORTED || 1048 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1049 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1050 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1051 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1052 } else if (op == MAT_ROW_ORIENTED) { 1053 a->roworiented = 1; 1054 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1055 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1056 } else if (op == MAT_ROWS_SORTED || 1057 op == MAT_ROWS_UNSORTED || 1058 op == MAT_SYMMETRIC || 1059 op == MAT_STRUCTURALLY_SYMMETRIC || 1060 op == MAT_YES_NEW_DIAGONALS) 1061 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1062 else if (op == MAT_COLUMN_ORIENTED) { 1063 a->roworiented = 0; 1064 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1065 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1066 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1067 a->donotstash = 1; 1068 } else if (op == MAT_NO_NEW_DIAGONALS){ 1069 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1070 } else { 1071 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1072 } 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNC__ 1077 #define __FUNC__ "MatGetSize_MPIAIJ" 1078 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1079 { 1080 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1081 1082 PetscFunctionBegin; 1083 if (m) *m = mat->M; 1084 if (n) *n = mat->N; 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNC__ 1089 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1090 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1091 { 1092 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1093 1094 PetscFunctionBegin; 1095 if (m) *m = mat->m; 1096 if (n) *n = mat->n; 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNC__ 1101 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1102 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1103 { 1104 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1105 1106 PetscFunctionBegin; 1107 *m = mat->rstart; *n = mat->rend; 1108 PetscFunctionReturn(0); 1109 } 1110 1111 #undef __FUNC__ 1112 #define __FUNC__ "MatGetRow_MPIAIJ" 1113 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1114 { 1115 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1116 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1117 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1118 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1119 int *cmap, *idx_p; 1120 1121 PetscFunctionBegin; 1122 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1123 mat->getrowactive = PETSC_TRUE; 1124 1125 if (!mat->rowvalues && (idx || v)) { 1126 /* 1127 allocate enough space to hold information from the longest row. 1128 */ 1129 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1130 int max = 1,m = mat->m,tmp; 1131 for ( i=0; i<m; i++ ) { 1132 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1133 if (max < tmp) { max = tmp; } 1134 } 1135 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1136 mat->rowindices = (int *) (mat->rowvalues + max); 1137 } 1138 1139 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1140 lrow = row - rstart; 1141 1142 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1143 if (!v) {pvA = 0; pvB = 0;} 1144 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1145 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1146 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1147 nztot = nzA + nzB; 1148 1149 cmap = mat->garray; 1150 if (v || idx) { 1151 if (nztot) { 1152 /* Sort by increasing column numbers, assuming A and B already sorted */ 1153 int imark = -1; 1154 if (v) { 1155 *v = v_p = mat->rowvalues; 1156 for ( i=0; i<nzB; i++ ) { 1157 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1158 else break; 1159 } 1160 imark = i; 1161 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1162 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1163 } 1164 if (idx) { 1165 *idx = idx_p = mat->rowindices; 1166 if (imark > -1) { 1167 for ( i=0; i<imark; i++ ) { 1168 idx_p[i] = cmap[cworkB[i]]; 1169 } 1170 } else { 1171 for ( i=0; i<nzB; i++ ) { 1172 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1173 else break; 1174 } 1175 imark = i; 1176 } 1177 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1178 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1179 } 1180 } else { 1181 if (idx) *idx = 0; 1182 if (v) *v = 0; 1183 } 1184 } 1185 *nz = nztot; 1186 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1187 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNC__ 1192 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1193 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1194 { 1195 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1196 1197 PetscFunctionBegin; 1198 if (aij->getrowactive == PETSC_FALSE) { 1199 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1200 } 1201 aij->getrowactive = PETSC_FALSE; 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNC__ 1206 #define __FUNC__ "MatNorm_MPIAIJ" 1207 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1208 { 1209 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1210 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1211 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1212 double sum = 0.0; 1213 Scalar *v; 1214 1215 PetscFunctionBegin; 1216 if (aij->size == 1) { 1217 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1218 } else { 1219 if (type == NORM_FROBENIUS) { 1220 v = amat->a; 1221 for (i=0; i<amat->nz; i++ ) { 1222 #if defined(PETSC_USE_COMPLEX) 1223 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1224 #else 1225 sum += (*v)*(*v); v++; 1226 #endif 1227 } 1228 v = bmat->a; 1229 for (i=0; i<bmat->nz; i++ ) { 1230 #if defined(PETSC_USE_COMPLEX) 1231 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1232 #else 1233 sum += (*v)*(*v); v++; 1234 #endif 1235 } 1236 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1237 *norm = sqrt(*norm); 1238 } else if (type == NORM_1) { /* max column norm */ 1239 double *tmp, *tmp2; 1240 int *jj, *garray = aij->garray; 1241 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp); 1242 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2); 1243 ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr); 1244 *norm = 0.0; 1245 v = amat->a; jj = amat->j; 1246 for ( j=0; j<amat->nz; j++ ) { 1247 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1248 } 1249 v = bmat->a; jj = bmat->j; 1250 for ( j=0; j<bmat->nz; j++ ) { 1251 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1252 } 1253 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1254 for ( j=0; j<aij->N; j++ ) { 1255 if (tmp2[j] > *norm) *norm = tmp2[j]; 1256 } 1257 ierr = PetscFree(tmp);CHKERRQ(ierr); 1258 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1259 } else if (type == NORM_INFINITY) { /* max row norm */ 1260 double ntemp = 0.0; 1261 for ( j=0; j<amat->m; j++ ) { 1262 v = amat->a + amat->i[j] + shift; 1263 sum = 0.0; 1264 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1265 sum += PetscAbsScalar(*v); v++; 1266 } 1267 v = bmat->a + bmat->i[j] + shift; 1268 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1269 sum += PetscAbsScalar(*v); v++; 1270 } 1271 if (sum > ntemp) ntemp = sum; 1272 } 1273 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1274 } else { 1275 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1276 } 1277 } 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNC__ 1282 #define __FUNC__ "MatTranspose_MPIAIJ" 1283 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1284 { 1285 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1286 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1287 int ierr,shift = Aloc->indexshift; 1288 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1289 Mat B; 1290 Scalar *array; 1291 1292 PetscFunctionBegin; 1293 if (matout == PETSC_NULL && M != N) { 1294 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1295 } 1296 1297 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1298 1299 /* copy over the A part */ 1300 Aloc = (Mat_SeqAIJ*) a->A->data; 1301 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1302 row = a->rstart; 1303 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1304 for ( i=0; i<m; i++ ) { 1305 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1306 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1307 } 1308 aj = Aloc->j; 1309 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1310 1311 /* copy over the B part */ 1312 Aloc = (Mat_SeqAIJ*) a->B->data; 1313 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1314 row = a->rstart; 1315 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols); 1316 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1317 for ( i=0; i<m; i++ ) { 1318 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1319 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1320 } 1321 ierr = PetscFree(ct);CHKERRQ(ierr); 1322 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1323 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1324 if (matout != PETSC_NULL) { 1325 *matout = B; 1326 } else { 1327 PetscOps *Abops; 1328 struct _MatOps *Aops; 1329 1330 /* This isn't really an in-place transpose .... but free data structures from a */ 1331 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 1332 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1333 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1334 #if defined (PETSC_USE_CTABLE) 1335 if (a->colmap) TableDelete(a->colmap); 1336 #else 1337 if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);} 1338 #endif 1339 if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);} 1340 if (a->lvec) VecDestroy(a->lvec); 1341 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1342 ierr = PetscFree(a);CHKERRQ(ierr); 1343 1344 /* 1345 This is horrible, horrible code. We need to keep the 1346 A pointers for the bops and ops but copy everything 1347 else from C. 1348 */ 1349 Abops = A->bops; 1350 Aops = A->ops; 1351 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1352 A->bops = Abops; 1353 A->ops = Aops; 1354 PetscHeaderDestroy(B); 1355 } 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNC__ 1360 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1361 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1362 { 1363 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1364 Mat a = aij->A, b = aij->B; 1365 int ierr,s1,s2,s3; 1366 1367 PetscFunctionBegin; 1368 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1369 if (rr) { 1370 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1371 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1372 /* Overlap communication with computation. */ 1373 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1374 } 1375 if (ll) { 1376 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1377 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1378 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1379 } 1380 /* scale the diagonal block */ 1381 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1382 1383 if (rr) { 1384 /* Do a scatter end and then right scale the off-diagonal block */ 1385 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1386 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1387 } 1388 1389 PetscFunctionReturn(0); 1390 } 1391 1392 1393 #undef __FUNC__ 1394 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1395 int MatPrintHelp_MPIAIJ(Mat A) 1396 { 1397 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1398 int ierr; 1399 1400 PetscFunctionBegin; 1401 if (!a->rank) { 1402 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1403 } 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNC__ 1408 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1409 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1410 { 1411 PetscFunctionBegin; 1412 *bs = 1; 1413 PetscFunctionReturn(0); 1414 } 1415 #undef __FUNC__ 1416 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1417 int MatSetUnfactored_MPIAIJ(Mat A) 1418 { 1419 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1420 int ierr; 1421 1422 PetscFunctionBegin; 1423 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } 1426 1427 #undef __FUNC__ 1428 #define __FUNC__ "MatEqual_MPIAIJ" 1429 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1430 { 1431 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1432 Mat a, b, c, d; 1433 PetscTruth flg; 1434 int ierr; 1435 1436 PetscFunctionBegin; 1437 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1438 a = matA->A; b = matA->B; 1439 c = matB->A; d = matB->B; 1440 1441 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1442 if (flg == PETSC_TRUE) { 1443 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1444 } 1445 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1446 PetscFunctionReturn(0); 1447 } 1448 1449 #undef __FUNC__ 1450 #define __FUNC__ "MatCopy_MPIAIJ" 1451 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1452 { 1453 int ierr; 1454 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1455 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1456 1457 PetscFunctionBegin; 1458 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1459 /* because of the column compression in the off-processor part of the matrix a->B, 1460 the number of columns in a->B and b->B may be different, hence we cannot call 1461 the MatCopy() directly on the two parts. If need be, we can provide a more 1462 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1463 then copying the submatrices */ 1464 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1465 } else { 1466 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1467 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1468 } 1469 PetscFunctionReturn(0); 1470 } 1471 1472 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1473 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1474 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1475 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1476 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1477 1478 /* -------------------------------------------------------------------*/ 1479 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1480 MatGetRow_MPIAIJ, 1481 MatRestoreRow_MPIAIJ, 1482 MatMult_MPIAIJ, 1483 MatMultAdd_MPIAIJ, 1484 MatMultTrans_MPIAIJ, 1485 MatMultTransAdd_MPIAIJ, 1486 0, 1487 0, 1488 0, 1489 0, 1490 0, 1491 0, 1492 MatRelax_MPIAIJ, 1493 MatTranspose_MPIAIJ, 1494 MatGetInfo_MPIAIJ, 1495 MatEqual_MPIAIJ, 1496 MatGetDiagonal_MPIAIJ, 1497 MatDiagonalScale_MPIAIJ, 1498 MatNorm_MPIAIJ, 1499 MatAssemblyBegin_MPIAIJ, 1500 MatAssemblyEnd_MPIAIJ, 1501 0, 1502 MatSetOption_MPIAIJ, 1503 MatZeroEntries_MPIAIJ, 1504 MatZeroRows_MPIAIJ, 1505 0, 1506 0, 1507 0, 1508 0, 1509 MatGetSize_MPIAIJ, 1510 MatGetLocalSize_MPIAIJ, 1511 MatGetOwnershipRange_MPIAIJ, 1512 0, 1513 0, 1514 0, 1515 0, 1516 MatDuplicate_MPIAIJ, 1517 0, 1518 0, 1519 0, 1520 0, 1521 0, 1522 MatGetSubMatrices_MPIAIJ, 1523 MatIncreaseOverlap_MPIAIJ, 1524 MatGetValues_MPIAIJ, 1525 MatCopy_MPIAIJ, 1526 MatPrintHelp_MPIAIJ, 1527 MatScale_MPIAIJ, 1528 0, 1529 0, 1530 0, 1531 MatGetBlockSize_MPIAIJ, 1532 0, 1533 0, 1534 0, 1535 0, 1536 MatFDColoringCreate_MPIAIJ, 1537 0, 1538 MatSetUnfactored_MPIAIJ, 1539 0, 1540 0, 1541 MatGetSubMatrix_MPIAIJ, 1542 0, 1543 0, 1544 MatGetMaps_Petsc}; 1545 1546 /* ----------------------------------------------------------------------------------------*/ 1547 1548 EXTERN_C_BEGIN 1549 #undef __FUNC__ 1550 #define __FUNC__ "MatStoreValues_MPIAIJ" 1551 int MatStoreValues_MPIAIJ(Mat mat) 1552 { 1553 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1554 int ierr; 1555 1556 PetscFunctionBegin; 1557 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1558 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 EXTERN_C_END 1562 1563 EXTERN_C_BEGIN 1564 #undef __FUNC__ 1565 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1566 int MatRetrieveValues_MPIAIJ(Mat mat) 1567 { 1568 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1569 int ierr; 1570 1571 PetscFunctionBegin; 1572 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1573 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1574 PetscFunctionReturn(0); 1575 } 1576 EXTERN_C_END 1577 1578 #include "pc.h" 1579 EXTERN_C_BEGIN 1580 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1581 EXTERN_C_END 1582 1583 #undef __FUNC__ 1584 #define __FUNC__ "MatCreateMPIAIJ" 1585 /*@C 1586 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1587 (the default parallel PETSc format). For good matrix assembly performance 1588 the user should preallocate the matrix storage by setting the parameters 1589 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1590 performance can be increased by more than a factor of 50. 1591 1592 Collective on MPI_Comm 1593 1594 Input Parameters: 1595 + comm - MPI communicator 1596 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1597 This value should be the same as the local size used in creating the 1598 y vector for the matrix-vector product y = Ax. 1599 . n - This value should be the same as the local size used in creating the 1600 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1601 calculated if N is given) For square matrices n is almost always m. 1602 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1603 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1604 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1605 (same value is used for all local rows) 1606 . d_nnz - array containing the number of nonzeros in the various rows of the 1607 DIAGONAL portion of the local submatrix (possibly different for each row) 1608 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1609 The size of this array is equal to the number of local rows, i.e 'm'. 1610 You must leave room for the diagonal entry even if it is zero. 1611 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1612 submatrix (same value is used for all local rows). 1613 - o_nnz - array containing the number of nonzeros in the various rows of the 1614 OFF-DIAGONAL portion of the local submatrix (possibly different for 1615 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1616 structure. The size of this array is equal to the number 1617 of local rows, i.e 'm'. 1618 1619 Output Parameter: 1620 . A - the matrix 1621 1622 Notes: 1623 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1624 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1625 storage requirements for this matrix. 1626 1627 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1628 processor than it must be used on all processors that share the object for 1629 that argument. 1630 1631 The AIJ format (also called the Yale sparse matrix format or 1632 compressed row storage), is fully compatible with standard Fortran 77 1633 storage. That is, the stored row and column indices can begin at 1634 either one (as in Fortran) or zero. See the users manual for details. 1635 1636 The user MUST specify either the local or global matrix dimensions 1637 (possibly both). 1638 1639 The parallel matrix is partitioned such that the first m0 rows belong to 1640 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1641 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1642 1643 The DIAGONAL portion of the local submatrix of a processor can be defined 1644 as the submatrix which is obtained by extraction the part corresponding 1645 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1646 first row that belongs to the processor, and r2 is the last row belonging 1647 to the this processor. This is a square mxm matrix. The remaining portion 1648 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1649 1650 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1651 1652 By default, this format uses inodes (identical nodes) when possible. 1653 We search for consecutive rows with the same nonzero structure, thereby 1654 reusing matrix information to achieve increased efficiency. 1655 1656 Options Database Keys: 1657 + -mat_aij_no_inode - Do not use inodes 1658 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1659 - -mat_aij_oneindex - Internally use indexing starting at 1 1660 rather than 0. Note that when calling MatSetValues(), 1661 the user still MUST index entries starting at 0! 1662 . -mat_mpi - use the parallel matrix data structures even on one processor 1663 (defaults to using SeqBAIJ format on one processor) 1664 . -mat_mpi - use the parallel matrix data structures even on one processor 1665 (defaults to using SeqAIJ format on one processor) 1666 1667 1668 Example usage: 1669 1670 Consider the following 8x8 matrix with 34 non-zero values, that is 1671 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1672 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1673 as follows: 1674 1675 .vb 1676 1 2 0 | 0 3 0 | 0 4 1677 Proc0 0 5 6 | 7 0 0 | 8 0 1678 9 0 10 | 11 0 0 | 12 0 1679 ------------------------------------- 1680 13 0 14 | 15 16 17 | 0 0 1681 Proc1 0 18 0 | 19 20 21 | 0 0 1682 0 0 0 | 22 23 0 | 24 0 1683 ------------------------------------- 1684 Proc2 25 26 27 | 0 0 28 | 29 0 1685 30 0 0 | 31 32 33 | 0 34 1686 .ve 1687 1688 This can be represented as a collection of submatrices as: 1689 1690 .vb 1691 A B C 1692 D E F 1693 G H I 1694 .ve 1695 1696 Where the submatrices A,B,C are owned by proc0, D,E,F are 1697 owned by proc1, G,H,I are owned by proc2. 1698 1699 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1700 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1701 The 'M','N' parameters are 8,8, and have the same values on all procs. 1702 1703 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1704 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1705 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1706 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1707 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1708 matrix, ans [DF] as another SeqAIJ matrix. 1709 1710 When d_nz, o_nz parameters are specified, d_nz storage elements are 1711 allocated for every row of the local diagonal submatrix, and o_nz 1712 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1713 One way to choose d_nz and o_nz is to use the max nonzerors per local 1714 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1715 In this case, the values of d_nz,o_nz are: 1716 .vb 1717 proc0 : dnz = 2, o_nz = 2 1718 proc1 : dnz = 3, o_nz = 2 1719 proc2 : dnz = 1, o_nz = 4 1720 .ve 1721 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1722 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1723 for proc3. i.e we are using 12+15+10=37 storage locations to store 1724 34 values. 1725 1726 When d_nnz, o_nnz parameters are specified, the storage is specified 1727 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1728 In the above case the values for d_nnz,o_nnz are: 1729 .vb 1730 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1731 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1732 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1733 .ve 1734 Here the space allocated is sum of all the above values i.e 34, and 1735 hence pre-allocation is perfect. 1736 1737 Level: intermediate 1738 1739 .keywords: matrix, aij, compressed row, sparse, parallel 1740 1741 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1742 @*/ 1743 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1744 { 1745 Mat B; 1746 Mat_MPIAIJ *b; 1747 int ierr, i,size,flag1 = 0, flag2 = 0; 1748 1749 PetscFunctionBegin; 1750 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1751 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr); 1752 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1753 if (!flag1 && !flag2 && size == 1) { 1754 if (M == PETSC_DECIDE) M = m; 1755 if (N == PETSC_DECIDE) N = n; 1756 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1757 PetscFunctionReturn(0); 1758 } 1759 1760 *A = 0; 1761 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1762 PLogObjectCreate(B); 1763 B->data = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b); 1764 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1765 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1766 B->ops->destroy = MatDestroy_MPIAIJ; 1767 B->ops->view = MatView_MPIAIJ; 1768 B->factor = 0; 1769 B->assembled = PETSC_FALSE; 1770 B->mapping = 0; 1771 1772 B->insertmode = NOT_SET_VALUES; 1773 b->size = size; 1774 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1775 1776 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1777 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1778 } 1779 1780 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1781 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1782 b->m = m; B->m = m; 1783 b->n = n; B->n = n; 1784 b->N = N; B->N = N; 1785 b->M = M; B->M = M; 1786 1787 /* the information in the maps duplicates the information computed below, eventually 1788 we should remove the duplicate information that is not contained in the maps */ 1789 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1790 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1791 1792 /* build local table of row and column ownerships */ 1793 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1794 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1795 b->cowners = b->rowners + b->size + 2; 1796 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1797 b->rowners[0] = 0; 1798 for ( i=2; i<=b->size; i++ ) { 1799 b->rowners[i] += b->rowners[i-1]; 1800 } 1801 b->rstart = b->rowners[b->rank]; 1802 b->rend = b->rowners[b->rank+1]; 1803 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1804 b->cowners[0] = 0; 1805 for ( i=2; i<=b->size; i++ ) { 1806 b->cowners[i] += b->cowners[i-1]; 1807 } 1808 b->cstart = b->cowners[b->rank]; 1809 b->cend = b->cowners[b->rank+1]; 1810 1811 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1812 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1813 PLogObjectParent(B,b->A); 1814 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1815 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1816 PLogObjectParent(B,b->B); 1817 1818 /* build cache for off array entries formed */ 1819 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 1820 b->donotstash = 0; 1821 b->colmap = 0; 1822 b->garray = 0; 1823 b->roworiented = 1; 1824 1825 /* stuff used for matrix vector multiply */ 1826 b->lvec = 0; 1827 b->Mvctx = 0; 1828 1829 /* stuff for MatGetRow() */ 1830 b->rowindices = 0; 1831 b->rowvalues = 0; 1832 b->getrowactive = PETSC_FALSE; 1833 1834 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1835 "MatStoreValues_MPIAIJ", 1836 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1837 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1838 "MatRetrieveValues_MPIAIJ", 1839 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1840 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 1841 "MatGetDiagonalBlock_MPIAIJ", 1842 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1843 *A = B; 1844 PetscFunctionReturn(0); 1845 } 1846 1847 #undef __FUNC__ 1848 #define __FUNC__ "MatDuplicate_MPIAIJ" 1849 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1850 { 1851 Mat mat; 1852 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1853 int ierr, len=0, flg; 1854 1855 PetscFunctionBegin; 1856 *newmat = 0; 1857 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1858 PLogObjectCreate(mat); 1859 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a); 1860 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1861 mat->ops->destroy = MatDestroy_MPIAIJ; 1862 mat->ops->view = MatView_MPIAIJ; 1863 mat->factor = matin->factor; 1864 mat->assembled = PETSC_TRUE; 1865 mat->insertmode = NOT_SET_VALUES; 1866 1867 a->m = mat->m = oldmat->m; 1868 a->n = mat->n = oldmat->n; 1869 a->M = mat->M = oldmat->M; 1870 a->N = mat->N = oldmat->N; 1871 1872 a->rstart = oldmat->rstart; 1873 a->rend = oldmat->rend; 1874 a->cstart = oldmat->cstart; 1875 a->cend = oldmat->cend; 1876 a->size = oldmat->size; 1877 a->rank = oldmat->rank; 1878 a->donotstash = oldmat->donotstash; 1879 a->roworiented = oldmat->roworiented; 1880 a->rowindices = 0; 1881 a->rowvalues = 0; 1882 a->getrowactive = PETSC_FALSE; 1883 1884 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1885 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1886 a->cowners = a->rowners + a->size + 2; 1887 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1888 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1889 if (oldmat->colmap) { 1890 #if defined (PETSC_USE_CTABLE) 1891 ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1892 #else 1893 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1894 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1895 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr); 1896 #endif 1897 } else a->colmap = 0; 1898 if (oldmat->garray) { 1899 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1900 a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray); 1901 PLogObjectMemory(mat,len*sizeof(int)); 1902 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1903 } else a->garray = 0; 1904 1905 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1906 PLogObjectParent(mat,a->lvec); 1907 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1908 PLogObjectParent(mat,a->Mvctx); 1909 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1910 PLogObjectParent(mat,a->A); 1911 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1912 PLogObjectParent(mat,a->B); 1913 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1914 if (flg) { 1915 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1916 } 1917 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1918 *newmat = mat; 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #include "sys.h" 1923 1924 #undef __FUNC__ 1925 #define __FUNC__ "MatLoad_MPIAIJ" 1926 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1927 { 1928 Mat A; 1929 Scalar *vals,*svals; 1930 MPI_Comm comm = ((PetscObject)viewer)->comm; 1931 MPI_Status status; 1932 int i, nz, ierr, j,rstart, rend, fd; 1933 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1934 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1935 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1936 1937 PetscFunctionBegin; 1938 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1939 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1940 if (!rank) { 1941 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1942 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1943 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1944 if (header[3] < 0) { 1945 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1946 } 1947 } 1948 1949 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1950 M = header[1]; N = header[2]; 1951 /* determine ownership of all rows */ 1952 m = M/size + ((M % size) > rank); 1953 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1954 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1955 rowners[0] = 0; 1956 for ( i=2; i<=size; i++ ) { 1957 rowners[i] += rowners[i-1]; 1958 } 1959 rstart = rowners[rank]; 1960 rend = rowners[rank+1]; 1961 1962 /* distribute row lengths to all processors */ 1963 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1964 offlens = ourlens + (rend-rstart); 1965 if (!rank) { 1966 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1967 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1968 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1969 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1970 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1971 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1972 } else { 1973 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1974 } 1975 1976 if (!rank) { 1977 /* calculate the number of nonzeros on each processor */ 1978 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1979 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1980 for ( i=0; i<size; i++ ) { 1981 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1982 procsnz[i] += rowlengths[j]; 1983 } 1984 } 1985 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1986 1987 /* determine max buffer needed and allocate it */ 1988 maxnz = 0; 1989 for ( i=0; i<size; i++ ) { 1990 maxnz = PetscMax(maxnz,procsnz[i]); 1991 } 1992 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1993 1994 /* read in my part of the matrix column indices */ 1995 nz = procsnz[0]; 1996 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1997 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1998 1999 /* read in every one elses and ship off */ 2000 for ( i=1; i<size; i++ ) { 2001 nz = procsnz[i]; 2002 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2003 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2004 } 2005 ierr = PetscFree(cols);CHKERRQ(ierr); 2006 } else { 2007 /* determine buffer space needed for message */ 2008 nz = 0; 2009 for ( i=0; i<m; i++ ) { 2010 nz += ourlens[i]; 2011 } 2012 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 2013 2014 /* receive message of column indices*/ 2015 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2016 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2017 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2018 } 2019 2020 /* determine column ownership if matrix is not square */ 2021 if (N != M) { 2022 n = N/size + ((N % size) > rank); 2023 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2024 cstart = cend - n; 2025 } else { 2026 cstart = rstart; 2027 cend = rend; 2028 n = cend - cstart; 2029 } 2030 2031 /* loop over local rows, determining number of off diagonal entries */ 2032 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2033 jj = 0; 2034 for ( i=0; i<m; i++ ) { 2035 for ( j=0; j<ourlens[i]; j++ ) { 2036 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2037 jj++; 2038 } 2039 } 2040 2041 /* create our matrix */ 2042 for ( i=0; i<m; i++ ) { 2043 ourlens[i] -= offlens[i]; 2044 } 2045 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2046 A = *newmat; 2047 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2048 for ( i=0; i<m; i++ ) { 2049 ourlens[i] += offlens[i]; 2050 } 2051 2052 if (!rank) { 2053 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 2054 2055 /* read in my part of the matrix numerical values */ 2056 nz = procsnz[0]; 2057 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2058 2059 /* insert into matrix */ 2060 jj = rstart; 2061 smycols = mycols; 2062 svals = vals; 2063 for ( i=0; i<m; i++ ) { 2064 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2065 smycols += ourlens[i]; 2066 svals += ourlens[i]; 2067 jj++; 2068 } 2069 2070 /* read in other processors and ship out */ 2071 for ( i=1; i<size; i++ ) { 2072 nz = procsnz[i]; 2073 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2074 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2075 } 2076 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2077 } else { 2078 /* receive numeric values */ 2079 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 2080 2081 /* receive message of values*/ 2082 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2083 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2084 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2085 2086 /* insert into matrix */ 2087 jj = rstart; 2088 smycols = mycols; 2089 svals = vals; 2090 for ( i=0; i<m; i++ ) { 2091 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2092 smycols += ourlens[i]; 2093 svals += ourlens[i]; 2094 jj++; 2095 } 2096 } 2097 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2098 ierr = PetscFree(vals);CHKERRQ(ierr); 2099 ierr = PetscFree(mycols);CHKERRQ(ierr); 2100 ierr = PetscFree(rowners);CHKERRQ(ierr); 2101 2102 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2103 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2104 PetscFunctionReturn(0); 2105 } 2106 2107 #undef __FUNC__ 2108 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2109 /* 2110 Not great since it makes two copies of the submatrix, first an SeqAIJ 2111 in local and then by concatenating the local matrices the end result. 2112 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2113 */ 2114 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2115 { 2116 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2117 Mat *local,M, Mreuse; 2118 Scalar *vwork,*aa; 2119 MPI_Comm comm = mat->comm; 2120 Mat_SeqAIJ *aij; 2121 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2122 2123 PetscFunctionBegin; 2124 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2125 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2126 2127 if (call == MAT_REUSE_MATRIX) { 2128 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2129 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2130 local = &Mreuse; 2131 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2132 } else { 2133 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2134 Mreuse = *local; 2135 ierr = PetscFree(local);CHKERRQ(ierr); 2136 } 2137 2138 /* 2139 m - number of local rows 2140 n - number of columns (same on all processors) 2141 rstart - first row in new global matrix generated 2142 */ 2143 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2144 if (call == MAT_INITIAL_MATRIX) { 2145 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2146 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2147 ii = aij->i; 2148 jj = aij->j; 2149 2150 /* 2151 Determine the number of non-zeros in the diagonal and off-diagonal 2152 portions of the matrix in order to do correct preallocation 2153 */ 2154 2155 /* first get start and end of "diagonal" columns */ 2156 if (csize == PETSC_DECIDE) { 2157 nlocal = n/size + ((n % size) > rank); 2158 } else { 2159 nlocal = csize; 2160 } 2161 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2162 rstart = rend - nlocal; 2163 if (rank == size - 1 && rend != n) { 2164 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2165 } 2166 2167 /* next, compute all the lengths */ 2168 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2169 olens = dlens + m; 2170 for ( i=0; i<m; i++ ) { 2171 jend = ii[i+1] - ii[i]; 2172 olen = 0; 2173 dlen = 0; 2174 for ( j=0; j<jend; j++ ) { 2175 if ( *jj < rstart || *jj >= rend) olen++; 2176 else dlen++; 2177 jj++; 2178 } 2179 olens[i] = olen; 2180 dlens[i] = dlen; 2181 } 2182 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2183 ierr = PetscFree(dlens);CHKERRQ(ierr); 2184 } else { 2185 int ml,nl; 2186 2187 M = *newmat; 2188 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2189 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2190 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2191 /* 2192 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2193 rather than the slower MatSetValues(). 2194 */ 2195 M->was_assembled = PETSC_TRUE; 2196 M->assembled = PETSC_FALSE; 2197 } 2198 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2199 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2200 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2201 ii = aij->i; 2202 jj = aij->j; 2203 aa = aij->a; 2204 for (i=0; i<m; i++) { 2205 row = rstart + i; 2206 nz = ii[i+1] - ii[i]; 2207 cwork = jj; jj += nz; 2208 vwork = aa; aa += nz; 2209 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2210 } 2211 2212 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2213 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2214 *newmat = M; 2215 2216 /* save submatrix used in processor for next request */ 2217 if (call == MAT_INITIAL_MATRIX) { 2218 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2219 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2220 } 2221 2222 PetscFunctionReturn(0); 2223 } 2224