1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.302 1999/10/01 21:21:16 bsmith Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 extern int MatSetUpMultiply_MPIAIJ(Mat); 10 extern int DisAssemble_MPIAIJ(Mat); 11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatPrintHelp_SeqAIJ(Mat); 15 16 /* local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 26 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 27 int n = B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = TableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr); 32 for ( i=0; i<n; i++ ){ 33 ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 37 PLogObjectMemory(mat,aij->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr); 39 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for ( _i=0; _i<nrow; _i++ ) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 71 Scalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 77 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 78 new_j = (int *) (new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 83 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(Scalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 94 if (!a->singlemalloc) { \ 95 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 96 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 97 } \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = 1; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for ( ii=N; ii>=_i; ii-- ) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for ( _i=0; _i<nrow; _i++ ) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 145 Scalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 151 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 152 new_j = (int *) (new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 157 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(Scalar));CHKERRQ(ierr); \ 165 /* free up old matrix storage */ \ 166 \ 167 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 168 if (!b->singlemalloc) { \ 169 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 170 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 171 } \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = 1; \ 174 \ 175 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 176 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 177 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 178 b->maxnz += CHUNKSIZE; \ 179 b->reallocs++; \ 180 } \ 181 N = nrow++ - 1; b->nz++; \ 182 /* shift up all the later entries in this row */ \ 183 for ( ii=N; ii>=_i; ii-- ) { \ 184 rp[ii+1] = rp[ii]; \ 185 ap[ii+1] = ap[ii]; \ 186 } \ 187 rp[_i] = col1; \ 188 ap[_i] = value; \ 189 b_noinsert: ; \ 190 bilen[row] = nrow; \ 191 } 192 193 #undef __FUNC__ 194 #define __FUNC__ "MatSetValues_MPIAIJ" 195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 196 { 197 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 198 Scalar value; 199 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 200 int cstart = aij->cstart, cend = aij->cend,row,col; 201 int roworiented = aij->roworiented; 202 203 /* Some Variables required in the macro */ 204 Mat A = aij->A; 205 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 206 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 207 Scalar *aa = a->a; 208 209 Mat B = aij->B; 210 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 211 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 212 Scalar *ba = b->a; 213 214 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 215 int nonew = a->nonew,shift = a->indexshift; 216 Scalar *ap; 217 218 PetscFunctionBegin; 219 for ( i=0; i<m; i++ ) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_BOPT_g) 222 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 223 #endif 224 if (im[i] >= rstart && im[i] < rend) { 225 row = im[i] - rstart; 226 for ( j=0; j<n; j++ ) { 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } 233 else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_BOPT_g) 235 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!aij->colmap) { 240 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = TableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 244 col--; 245 #else 246 col = aij->colmap[in[j]] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 249 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 252 B = aij->B; 253 b = (Mat_SeqAIJ *) B->data; 254 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 255 ba = b->a; 256 } 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNC__ 277 #define __FUNC__ "MatGetValues_MPIAIJ" 278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 279 { 280 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 281 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 282 int cstart = aij->cstart, cend = aij->cend,row,col; 283 284 PetscFunctionBegin; 285 for ( i=0; i<m; i++ ) { 286 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 287 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 288 if (idxm[i] >= rstart && idxm[i] < rend) { 289 row = idxm[i] - rstart; 290 for ( j=0; j<n; j++ ) { 291 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 292 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 293 if (idxn[j] >= cstart && idxn[j] < cend){ 294 col = idxn[j] - cstart; 295 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 296 } else { 297 if (!aij->colmap) { 298 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 299 } 300 #if defined (PETSC_USE_CTABLE) 301 ierr = TableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 302 col --; 303 #else 304 col = aij->colmap[idxn[j]] - 1; 305 #endif 306 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 307 else { 308 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 309 } 310 } 311 } 312 } else { 313 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 314 } 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNC__ 320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 322 { 323 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 324 int ierr,nstash,reallocs; 325 InsertMode addv; 326 327 PetscFunctionBegin; 328 if (aij->donotstash) { 329 PetscFunctionReturn(0); 330 } 331 332 /* make sure all processors are either in INSERTMODE or ADDMODE */ 333 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 334 if (addv == (ADD_VALUES|INSERT_VALUES)) { 335 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 336 } 337 mat->insertmode = addv; /* in case this processor had no cache */ 338 339 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 340 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 341 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 342 PetscFunctionReturn(0); 343 } 344 345 346 #undef __FUNC__ 347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 349 { 350 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 351 int i,j,rstart,ncols,n,ierr,flg; 352 int *row,*col,other_disassembled; 353 Scalar *val; 354 InsertMode addv = mat->insertmode; 355 356 PetscFunctionBegin; 357 if (!aij->donotstash) { 358 while (1) { 359 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 360 if (!flg) break; 361 362 for ( i=0; i<n; ) { 363 /* Now identify the consecutive vals belonging to the same row */ 364 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 365 if (j < n) ncols = j-i; 366 else ncols = n-i; 367 /* Now assemble all these values with a single function call */ 368 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 369 i = j; 370 } 371 } 372 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 373 } 374 375 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 376 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 377 378 /* determine if any processor has disassembled, if so we must 379 also disassemble ourselfs, in order that we may reassemble. */ 380 /* 381 if nonzero structure of submatrix B cannot change then we know that 382 no processor disassembled thus we can skip this stuff 383 */ 384 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 385 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 386 if (mat->was_assembled && !other_disassembled) { 387 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 388 } 389 } 390 391 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 392 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 393 } 394 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 395 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 396 397 if (aij->rowvalues) { 398 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 399 aij->rowvalues = 0; 400 } 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNC__ 405 #define __FUNC__ "MatZeroEntries_MPIAIJ" 406 int MatZeroEntries_MPIAIJ(Mat A) 407 { 408 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 413 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNC__ 418 #define __FUNC__ "MatZeroRows_MPIAIJ" 419 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 420 { 421 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 422 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 423 int *procs,*nprocs,j,found,idx,nsends,*work,row; 424 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 425 int *rvalues,tag = A->tag,count,base,slen,n,*source; 426 int *lens,imdex,*lrows,*values,rstart=l->rstart; 427 MPI_Comm comm = A->comm; 428 MPI_Request *send_waits,*recv_waits; 429 MPI_Status recv_status,*send_status; 430 IS istmp; 431 432 PetscFunctionBegin; 433 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 434 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 435 436 /* first count number of contributors to each processor */ 437 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 438 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 439 procs = nprocs + size; 440 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 441 for ( i=0; i<N; i++ ) { 442 idx = rows[i]; 443 found = 0; 444 for ( j=0; j<size; j++ ) { 445 if (idx >= owners[j] && idx < owners[j+1]) { 446 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 447 } 448 } 449 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 450 } 451 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 452 453 /* inform other processors of number of messages and max length*/ 454 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 455 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 456 nrecvs = work[rank]; 457 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 458 nmax = work[rank]; 459 ierr = PetscFree(work);CHKERRQ(ierr); 460 461 /* post receives: */ 462 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 463 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 464 for ( i=0; i<nrecvs; i++ ) { 465 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 466 } 467 468 /* do sends: 469 1) starts[i] gives the starting index in svalues for stuff going to 470 the ith processor 471 */ 472 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 473 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 474 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 475 starts[0] = 0; 476 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 477 for ( i=0; i<N; i++ ) { 478 svalues[starts[owner[i]]++] = rows[i]; 479 } 480 ISRestoreIndices(is,&rows); 481 482 starts[0] = 0; 483 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 484 count = 0; 485 for ( i=0; i<size; i++ ) { 486 if (procs[i]) { 487 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 488 } 489 } 490 ierr = PetscFree(starts);CHKERRQ(ierr); 491 492 base = owners[rank]; 493 494 /* wait on receives */ 495 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 496 source = lens + nrecvs; 497 count = nrecvs; slen = 0; 498 while (count) { 499 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 500 /* unpack receives into our local space */ 501 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 502 source[imdex] = recv_status.MPI_SOURCE; 503 lens[imdex] = n; 504 slen += n; 505 count--; 506 } 507 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 508 509 /* move the data into the send scatter */ 510 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 511 count = 0; 512 for ( i=0; i<nrecvs; i++ ) { 513 values = rvalues + i*nmax; 514 for ( j=0; j<lens[i]; j++ ) { 515 lrows[count++] = values[j] - base; 516 } 517 } 518 ierr = PetscFree(rvalues);CHKERRQ(ierr); 519 ierr = PetscFree(lens);CHKERRQ(ierr); 520 ierr = PetscFree(owner);CHKERRQ(ierr); 521 ierr = PetscFree(nprocs);CHKERRQ(ierr); 522 523 /* actually zap the local rows */ 524 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 525 PLogObjectParent(A,istmp); 526 527 /* 528 Zero the required rows. If the "diagonal block" of the matrix 529 is square and the user wishes to set the diagonal we use seperate 530 code so that MatSetValues() is not called for each diagonal allocating 531 new memory, thus calling lots of mallocs and slowing things down. 532 533 Contributed by: Mathew Knepley 534 */ 535 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 536 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 537 if (diag && (l->A->M == l->A->N)) { 538 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 539 } else if (diag) { 540 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 541 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 542 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 544 } 545 for ( i = 0; i < slen; i++ ) { 546 row = lrows[i] + rstart; 547 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 548 } 549 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 } else { 552 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 553 } 554 ierr = ISDestroy(istmp);CHKERRQ(ierr); 555 ierr = PetscFree(lrows);CHKERRQ(ierr); 556 557 /* wait on sends */ 558 if (nsends) { 559 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 560 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 561 ierr = PetscFree(send_status);CHKERRQ(ierr); 562 } 563 ierr = PetscFree(send_waits);CHKERRQ(ierr); 564 ierr = PetscFree(svalues);CHKERRQ(ierr); 565 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNC__ 570 #define __FUNC__ "MatMult_MPIAIJ" 571 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 572 { 573 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 574 int ierr,nt; 575 576 PetscFunctionBegin; 577 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 578 if (nt != a->n) { 579 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 580 } 581 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 582 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 583 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 584 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNC__ 589 #define __FUNC__ "MatMultAdd_MPIAIJ" 590 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 591 { 592 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 593 int ierr; 594 595 PetscFunctionBegin; 596 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 597 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 598 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 600 PetscFunctionReturn(0); 601 } 602 603 #undef __FUNC__ 604 #define __FUNC__ "MatMultTrans_MPIAIJ" 605 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 606 { 607 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 608 int ierr; 609 610 PetscFunctionBegin; 611 /* do nondiagonal part */ 612 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 613 /* send it on its way */ 614 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 615 /* do local part */ 616 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 617 /* receive remote parts: note this assumes the values are not actually */ 618 /* inserted in yy until the next line, which is true for my implementation*/ 619 /* but is not perhaps always true. */ 620 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNC__ 625 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 626 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 627 { 628 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 629 int ierr; 630 631 PetscFunctionBegin; 632 /* do nondiagonal part */ 633 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 634 /* send it on its way */ 635 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 636 /* do local part */ 637 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 638 /* receive remote parts: note this assumes the values are not actually */ 639 /* inserted in yy until the next line, which is true for my implementation*/ 640 /* but is not perhaps always true. */ 641 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 642 PetscFunctionReturn(0); 643 } 644 645 /* 646 This only works correctly for square matrices where the subblock A->A is the 647 diagonal block 648 */ 649 #undef __FUNC__ 650 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 651 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 652 { 653 int ierr; 654 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 655 656 PetscFunctionBegin; 657 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 658 if (a->rstart != a->cstart || a->rend != a->cend) { 659 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 660 } 661 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 662 PetscFunctionReturn(0); 663 } 664 665 #undef __FUNC__ 666 #define __FUNC__ "MatScale_MPIAIJ" 667 int MatScale_MPIAIJ(Scalar *aa,Mat A) 668 { 669 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 670 int ierr; 671 672 PetscFunctionBegin; 673 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 674 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 675 PetscFunctionReturn(0); 676 } 677 678 #undef __FUNC__ 679 #define __FUNC__ "MatDestroy_MPIAIJ" 680 int MatDestroy_MPIAIJ(Mat mat) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 683 int ierr; 684 685 PetscFunctionBegin; 686 687 if (mat->mapping) { 688 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 689 } 690 if (mat->bmapping) { 691 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 692 } 693 if (mat->rmap) { 694 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 695 } 696 if (mat->cmap) { 697 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 698 } 699 #if defined(PETSC_USE_LOG) 700 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 701 #endif 702 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 703 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 704 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 705 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 706 #if defined (PETSC_USE_CTABLE) 707 if (aij->colmap) TableDelete(aij->colmap); 708 #else 709 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 710 #endif 711 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 712 if (aij->lvec) VecDestroy(aij->lvec); 713 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 714 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 715 ierr = PetscFree(aij);CHKERRQ(ierr); 716 PLogObjectDestroy(mat); 717 PetscHeaderDestroy(mat); 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNC__ 722 #define __FUNC__ "MatView_MPIAIJ_Binary" 723 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 724 { 725 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 726 int ierr; 727 728 PetscFunctionBegin; 729 if (aij->size == 1) { 730 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 731 } 732 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 733 PetscFunctionReturn(0); 734 } 735 736 #undef __FUNC__ 737 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 738 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 739 { 740 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 741 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 742 int ierr, format,shift = C->indexshift,rank = aij->rank ; 743 int size = aij->size; 744 FILE *fd; 745 746 PetscFunctionBegin; 747 if (PetscTypeCompare(viewer,ASCII_VIEWER)) { 748 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 749 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 750 MatInfo info; 751 int flg; 752 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 753 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 754 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 755 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 756 ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr); 757 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 758 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 759 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 760 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 761 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 762 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 763 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 764 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 765 fflush(fd); 766 ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr); 767 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 768 PetscFunctionReturn(0); 769 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 770 PetscFunctionReturn(0); 771 } 772 } else if (PetscTypeCompare(viewer,DRAW_VIEWER)) { 773 Draw draw; 774 PetscTruth isnull; 775 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 776 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 777 } 778 779 if (size == 1) { 780 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 781 } else { 782 /* assemble the entire matrix onto first processor. */ 783 Mat A; 784 Mat_SeqAIJ *Aloc; 785 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 786 Scalar *a; 787 788 if (!rank) { 789 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 790 } else { 791 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 792 } 793 PLogObjectParent(mat,A); 794 795 /* copy over the A part */ 796 Aloc = (Mat_SeqAIJ*) aij->A->data; 797 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 798 row = aij->rstart; 799 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 800 for ( i=0; i<m; i++ ) { 801 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 802 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 803 } 804 aj = Aloc->j; 805 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 806 807 /* copy over the B part */ 808 Aloc = (Mat_SeqAIJ*) aij->B->data; 809 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 810 row = aij->rstart; 811 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols); 812 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 813 for ( i=0; i<m; i++ ) { 814 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 815 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 816 } 817 ierr = PetscFree(ct);CHKERRQ(ierr); 818 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 819 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 820 /* 821 Everyone has to call to draw the matrix since the graphics waits are 822 synchronized across all processors that share the Draw object 823 */ 824 if (!rank || PetscTypeCompare(viewer,DRAW_VIEWER)) { 825 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 826 } 827 ierr = MatDestroy(A);CHKERRQ(ierr); 828 } 829 PetscFunctionReturn(0); 830 } 831 832 #undef __FUNC__ 833 #define __FUNC__ "MatView_MPIAIJ" 834 int MatView_MPIAIJ(Mat mat,Viewer viewer) 835 { 836 int ierr; 837 838 PetscFunctionBegin; 839 if (PetscTypeCompare(viewer,ASCII_VIEWER) || PetscTypeCompare(viewer,DRAW_VIEWER) || 840 PetscTypeCompare(viewer,SOCKET_VIEWER) || PetscTypeCompare(viewer,BINARY_VIEWER)) { 841 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 842 /* 843 } else if (PetscTypeCompare(viewer,BINARY_VIEWER)) { 844 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 845 */ 846 } else { 847 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 848 } 849 PetscFunctionReturn(0); 850 } 851 852 /* 853 This has to provide several versions. 854 855 2) a) use only local smoothing updating outer values only once. 856 b) local smoothing updating outer values each inner iteration 857 3) color updating out values betwen colors. 858 */ 859 #undef __FUNC__ 860 #define __FUNC__ "MatRelax_MPIAIJ" 861 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 862 double fshift,int its,Vec xx) 863 { 864 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 865 Mat AA = mat->A, BB = mat->B; 866 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 867 Scalar *b,*x,*xs,*ls,d,*v,sum; 868 int ierr,*idx, *diag; 869 int n = mat->n, m = mat->m, i,shift = A->indexshift; 870 871 PetscFunctionBegin; 872 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);} 873 diag = A->diag; 874 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 875 if (flag & SOR_ZERO_INITIAL_GUESS) { 876 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 877 PetscFunctionReturn(0); 878 } 879 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 880 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 881 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 882 if (xx != bb) { 883 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 884 } else { 885 b = x; 886 } 887 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 888 xs = x + shift; /* shift by one for index start of 1 */ 889 ls = ls + shift; 890 while (its--) { 891 /* go down through the rows */ 892 for ( i=0; i<m; i++ ) { 893 n = A->i[i+1] - A->i[i]; 894 PLogFlops(4*n+3); 895 idx = A->j + A->i[i] + shift; 896 v = A->a + A->i[i] + shift; 897 sum = b[i]; 898 SPARSEDENSEMDOT(sum,xs,v,idx,n); 899 d = fshift + A->a[diag[i]+shift]; 900 n = B->i[i+1] - B->i[i]; 901 idx = B->j + B->i[i] + shift; 902 v = B->a + B->i[i] + shift; 903 SPARSEDENSEMDOT(sum,ls,v,idx,n); 904 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 905 } 906 /* come up through the rows */ 907 for ( i=m-1; i>-1; i-- ) { 908 n = A->i[i+1] - A->i[i]; 909 PLogFlops(4*n+3) 910 idx = A->j + A->i[i] + shift; 911 v = A->a + A->i[i] + shift; 912 sum = b[i]; 913 SPARSEDENSEMDOT(sum,xs,v,idx,n); 914 d = fshift + A->a[diag[i]+shift]; 915 n = B->i[i+1] - B->i[i]; 916 idx = B->j + B->i[i] + shift; 917 v = B->a + B->i[i] + shift; 918 SPARSEDENSEMDOT(sum,ls,v,idx,n); 919 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 920 } 921 } 922 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 923 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 924 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 925 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 926 if (flag & SOR_ZERO_INITIAL_GUESS) { 927 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 928 PetscFunctionReturn(0); 929 } 930 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 931 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 932 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 933 if (xx != bb) { 934 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 935 } else { 936 b = x; 937 } 938 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 939 xs = x + shift; /* shift by one for index start of 1 */ 940 ls = ls + shift; 941 while (its--) { 942 for ( i=0; i<m; i++ ) { 943 n = A->i[i+1] - A->i[i]; 944 PLogFlops(4*n+3); 945 idx = A->j + A->i[i] + shift; 946 v = A->a + A->i[i] + shift; 947 sum = b[i]; 948 SPARSEDENSEMDOT(sum,xs,v,idx,n); 949 d = fshift + A->a[diag[i]+shift]; 950 n = B->i[i+1] - B->i[i]; 951 idx = B->j + B->i[i] + shift; 952 v = B->a + B->i[i] + shift; 953 SPARSEDENSEMDOT(sum,ls,v,idx,n); 954 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 955 } 956 } 957 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 958 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 959 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 960 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 961 if (flag & SOR_ZERO_INITIAL_GUESS) { 962 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 966 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 967 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 968 if (xx != bb) { 969 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 970 } else { 971 b = x; 972 } 973 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 974 xs = x + shift; /* shift by one for index start of 1 */ 975 ls = ls + shift; 976 while (its--) { 977 for ( i=m-1; i>-1; i-- ) { 978 n = A->i[i+1] - A->i[i]; 979 PLogFlops(4*n+3); 980 idx = A->j + A->i[i] + shift; 981 v = A->a + A->i[i] + shift; 982 sum = b[i]; 983 SPARSEDENSEMDOT(sum,xs,v,idx,n); 984 d = fshift + A->a[diag[i]+shift]; 985 n = B->i[i+1] - B->i[i]; 986 idx = B->j + B->i[i] + shift; 987 v = B->a + B->i[i] + shift; 988 SPARSEDENSEMDOT(sum,ls,v,idx,n); 989 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 990 } 991 } 992 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 993 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 994 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 995 } else { 996 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 #undef __FUNC__ 1002 #define __FUNC__ "MatGetInfo_MPIAIJ" 1003 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1004 { 1005 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1006 Mat A = mat->A, B = mat->B; 1007 int ierr; 1008 double isend[5], irecv[5]; 1009 1010 PetscFunctionBegin; 1011 info->block_size = 1.0; 1012 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1013 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1014 isend[3] = info->memory; isend[4] = info->mallocs; 1015 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1016 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1017 isend[3] += info->memory; isend[4] += info->mallocs; 1018 if (flag == MAT_LOCAL) { 1019 info->nz_used = isend[0]; 1020 info->nz_allocated = isend[1]; 1021 info->nz_unneeded = isend[2]; 1022 info->memory = isend[3]; 1023 info->mallocs = isend[4]; 1024 } else if (flag == MAT_GLOBAL_MAX) { 1025 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1026 info->nz_used = irecv[0]; 1027 info->nz_allocated = irecv[1]; 1028 info->nz_unneeded = irecv[2]; 1029 info->memory = irecv[3]; 1030 info->mallocs = irecv[4]; 1031 } else if (flag == MAT_GLOBAL_SUM) { 1032 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1033 info->nz_used = irecv[0]; 1034 info->nz_allocated = irecv[1]; 1035 info->nz_unneeded = irecv[2]; 1036 info->memory = irecv[3]; 1037 info->mallocs = irecv[4]; 1038 } 1039 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1040 info->fill_ratio_needed = 0; 1041 info->factor_mallocs = 0; 1042 info->rows_global = (double)mat->M; 1043 info->columns_global = (double)mat->N; 1044 info->rows_local = (double)mat->m; 1045 info->columns_local = (double)mat->N; 1046 1047 PetscFunctionReturn(0); 1048 } 1049 1050 #undef __FUNC__ 1051 #define __FUNC__ "MatSetOption_MPIAIJ" 1052 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1055 int ierr; 1056 1057 PetscFunctionBegin; 1058 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1059 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1060 op == MAT_COLUMNS_UNSORTED || 1061 op == MAT_COLUMNS_SORTED || 1062 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1063 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1064 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1065 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1066 } else if (op == MAT_ROW_ORIENTED) { 1067 a->roworiented = 1; 1068 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1069 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1070 } else if (op == MAT_ROWS_SORTED || 1071 op == MAT_ROWS_UNSORTED || 1072 op == MAT_SYMMETRIC || 1073 op == MAT_STRUCTURALLY_SYMMETRIC || 1074 op == MAT_YES_NEW_DIAGONALS) 1075 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1076 else if (op == MAT_COLUMN_ORIENTED) { 1077 a->roworiented = 0; 1078 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1079 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1080 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1081 a->donotstash = 1; 1082 } else if (op == MAT_NO_NEW_DIAGONALS){ 1083 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1084 } else { 1085 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1086 } 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNC__ 1091 #define __FUNC__ "MatGetSize_MPIAIJ" 1092 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1093 { 1094 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1095 1096 PetscFunctionBegin; 1097 if (m) *m = mat->M; 1098 if (n) *n = mat->N; 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNC__ 1103 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1104 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1105 { 1106 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1107 1108 PetscFunctionBegin; 1109 if (m) *m = mat->m; 1110 if (n) *n = mat->n; 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNC__ 1115 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1116 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1117 { 1118 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1119 1120 PetscFunctionBegin; 1121 *m = mat->rstart; *n = mat->rend; 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNC__ 1126 #define __FUNC__ "MatGetRow_MPIAIJ" 1127 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1128 { 1129 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1130 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1131 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1132 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1133 int *cmap, *idx_p; 1134 1135 PetscFunctionBegin; 1136 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1137 mat->getrowactive = PETSC_TRUE; 1138 1139 if (!mat->rowvalues && (idx || v)) { 1140 /* 1141 allocate enough space to hold information from the longest row. 1142 */ 1143 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1144 int max = 1,m = mat->m,tmp; 1145 for ( i=0; i<m; i++ ) { 1146 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1147 if (max < tmp) { max = tmp; } 1148 } 1149 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1150 mat->rowindices = (int *) (mat->rowvalues + max); 1151 } 1152 1153 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1154 lrow = row - rstart; 1155 1156 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1157 if (!v) {pvA = 0; pvB = 0;} 1158 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1159 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1160 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1161 nztot = nzA + nzB; 1162 1163 cmap = mat->garray; 1164 if (v || idx) { 1165 if (nztot) { 1166 /* Sort by increasing column numbers, assuming A and B already sorted */ 1167 int imark = -1; 1168 if (v) { 1169 *v = v_p = mat->rowvalues; 1170 for ( i=0; i<nzB; i++ ) { 1171 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1172 else break; 1173 } 1174 imark = i; 1175 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1176 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1177 } 1178 if (idx) { 1179 *idx = idx_p = mat->rowindices; 1180 if (imark > -1) { 1181 for ( i=0; i<imark; i++ ) { 1182 idx_p[i] = cmap[cworkB[i]]; 1183 } 1184 } else { 1185 for ( i=0; i<nzB; i++ ) { 1186 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1187 else break; 1188 } 1189 imark = i; 1190 } 1191 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1192 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1193 } 1194 } else { 1195 if (idx) *idx = 0; 1196 if (v) *v = 0; 1197 } 1198 } 1199 *nz = nztot; 1200 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1201 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNC__ 1206 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1207 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1208 { 1209 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1210 1211 PetscFunctionBegin; 1212 if (aij->getrowactive == PETSC_FALSE) { 1213 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1214 } 1215 aij->getrowactive = PETSC_FALSE; 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNC__ 1220 #define __FUNC__ "MatNorm_MPIAIJ" 1221 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1222 { 1223 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1224 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1225 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1226 double sum = 0.0; 1227 Scalar *v; 1228 1229 PetscFunctionBegin; 1230 if (aij->size == 1) { 1231 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1232 } else { 1233 if (type == NORM_FROBENIUS) { 1234 v = amat->a; 1235 for (i=0; i<amat->nz; i++ ) { 1236 #if defined(PETSC_USE_COMPLEX) 1237 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1238 #else 1239 sum += (*v)*(*v); v++; 1240 #endif 1241 } 1242 v = bmat->a; 1243 for (i=0; i<bmat->nz; i++ ) { 1244 #if defined(PETSC_USE_COMPLEX) 1245 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1246 #else 1247 sum += (*v)*(*v); v++; 1248 #endif 1249 } 1250 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1251 *norm = sqrt(*norm); 1252 } else if (type == NORM_1) { /* max column norm */ 1253 double *tmp, *tmp2; 1254 int *jj, *garray = aij->garray; 1255 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp); 1256 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2); 1257 ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr); 1258 *norm = 0.0; 1259 v = amat->a; jj = amat->j; 1260 for ( j=0; j<amat->nz; j++ ) { 1261 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1262 } 1263 v = bmat->a; jj = bmat->j; 1264 for ( j=0; j<bmat->nz; j++ ) { 1265 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1266 } 1267 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1268 for ( j=0; j<aij->N; j++ ) { 1269 if (tmp2[j] > *norm) *norm = tmp2[j]; 1270 } 1271 ierr = PetscFree(tmp);CHKERRQ(ierr); 1272 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1273 } else if (type == NORM_INFINITY) { /* max row norm */ 1274 double ntemp = 0.0; 1275 for ( j=0; j<amat->m; j++ ) { 1276 v = amat->a + amat->i[j] + shift; 1277 sum = 0.0; 1278 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1279 sum += PetscAbsScalar(*v); v++; 1280 } 1281 v = bmat->a + bmat->i[j] + shift; 1282 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1283 sum += PetscAbsScalar(*v); v++; 1284 } 1285 if (sum > ntemp) ntemp = sum; 1286 } 1287 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1288 } else { 1289 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1290 } 1291 } 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #undef __FUNC__ 1296 #define __FUNC__ "MatTranspose_MPIAIJ" 1297 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1298 { 1299 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1300 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1301 int ierr,shift = Aloc->indexshift; 1302 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1303 Mat B; 1304 Scalar *array; 1305 1306 PetscFunctionBegin; 1307 if (matout == PETSC_NULL && M != N) { 1308 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1309 } 1310 1311 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1312 1313 /* copy over the A part */ 1314 Aloc = (Mat_SeqAIJ*) a->A->data; 1315 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1316 row = a->rstart; 1317 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1318 for ( i=0; i<m; i++ ) { 1319 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1320 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1321 } 1322 aj = Aloc->j; 1323 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1324 1325 /* copy over the B part */ 1326 Aloc = (Mat_SeqAIJ*) a->B->data; 1327 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1328 row = a->rstart; 1329 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols); 1330 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1331 for ( i=0; i<m; i++ ) { 1332 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1333 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1334 } 1335 ierr = PetscFree(ct);CHKERRQ(ierr); 1336 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1337 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1338 if (matout != PETSC_NULL) { 1339 *matout = B; 1340 } else { 1341 PetscOps *Abops; 1342 struct _MatOps *Aops; 1343 1344 /* This isn't really an in-place transpose .... but free data structures from a */ 1345 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 1346 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1347 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1348 #if defined (PETSC_USE_CTABLE) 1349 if (a->colmap) TableDelete(a->colmap); 1350 #else 1351 if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);} 1352 #endif 1353 if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);} 1354 if (a->lvec) VecDestroy(a->lvec); 1355 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1356 ierr = PetscFree(a);CHKERRQ(ierr); 1357 1358 /* 1359 This is horrible, horrible code. We need to keep the 1360 A pointers for the bops and ops but copy everything 1361 else from C. 1362 */ 1363 Abops = A->bops; 1364 Aops = A->ops; 1365 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1366 A->bops = Abops; 1367 A->ops = Aops; 1368 PetscHeaderDestroy(B); 1369 } 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNC__ 1374 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1375 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1376 { 1377 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1378 Mat a = aij->A, b = aij->B; 1379 int ierr,s1,s2,s3; 1380 1381 PetscFunctionBegin; 1382 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1383 if (rr) { 1384 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1385 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1386 /* Overlap communication with computation. */ 1387 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1388 } 1389 if (ll) { 1390 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1391 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1392 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1393 } 1394 /* scale the diagonal block */ 1395 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1396 1397 if (rr) { 1398 /* Do a scatter end and then right scale the off-diagonal block */ 1399 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1400 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1401 } 1402 1403 PetscFunctionReturn(0); 1404 } 1405 1406 1407 #undef __FUNC__ 1408 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1409 int MatPrintHelp_MPIAIJ(Mat A) 1410 { 1411 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1412 int ierr; 1413 1414 PetscFunctionBegin; 1415 if (!a->rank) { 1416 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1417 } 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNC__ 1422 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1423 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1424 { 1425 PetscFunctionBegin; 1426 *bs = 1; 1427 PetscFunctionReturn(0); 1428 } 1429 #undef __FUNC__ 1430 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1431 int MatSetUnfactored_MPIAIJ(Mat A) 1432 { 1433 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1434 int ierr; 1435 1436 PetscFunctionBegin; 1437 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNC__ 1442 #define __FUNC__ "MatEqual_MPIAIJ" 1443 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1444 { 1445 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1446 Mat a, b, c, d; 1447 PetscTruth flg; 1448 int ierr; 1449 1450 PetscFunctionBegin; 1451 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1452 a = matA->A; b = matA->B; 1453 c = matB->A; d = matB->B; 1454 1455 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1456 if (flg == PETSC_TRUE) { 1457 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1458 } 1459 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNC__ 1464 #define __FUNC__ "MatCopy_MPIAIJ" 1465 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1466 { 1467 int ierr; 1468 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1469 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1470 1471 PetscFunctionBegin; 1472 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1473 /* because of the column compression in the off-processor part of the matrix a->B, 1474 the number of columns in a->B and b->B may be different, hence we cannot call 1475 the MatCopy() directly on the two parts. If need be, we can provide a more 1476 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1477 then copying the submatrices */ 1478 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1479 } else { 1480 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1481 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1482 } 1483 PetscFunctionReturn(0); 1484 } 1485 1486 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1487 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1488 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1489 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1490 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1491 1492 /* -------------------------------------------------------------------*/ 1493 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1494 MatGetRow_MPIAIJ, 1495 MatRestoreRow_MPIAIJ, 1496 MatMult_MPIAIJ, 1497 MatMultAdd_MPIAIJ, 1498 MatMultTrans_MPIAIJ, 1499 MatMultTransAdd_MPIAIJ, 1500 0, 1501 0, 1502 0, 1503 0, 1504 0, 1505 0, 1506 MatRelax_MPIAIJ, 1507 MatTranspose_MPIAIJ, 1508 MatGetInfo_MPIAIJ, 1509 MatEqual_MPIAIJ, 1510 MatGetDiagonal_MPIAIJ, 1511 MatDiagonalScale_MPIAIJ, 1512 MatNorm_MPIAIJ, 1513 MatAssemblyBegin_MPIAIJ, 1514 MatAssemblyEnd_MPIAIJ, 1515 0, 1516 MatSetOption_MPIAIJ, 1517 MatZeroEntries_MPIAIJ, 1518 MatZeroRows_MPIAIJ, 1519 0, 1520 0, 1521 0, 1522 0, 1523 MatGetSize_MPIAIJ, 1524 MatGetLocalSize_MPIAIJ, 1525 MatGetOwnershipRange_MPIAIJ, 1526 0, 1527 0, 1528 0, 1529 0, 1530 MatDuplicate_MPIAIJ, 1531 0, 1532 0, 1533 0, 1534 0, 1535 0, 1536 MatGetSubMatrices_MPIAIJ, 1537 MatIncreaseOverlap_MPIAIJ, 1538 MatGetValues_MPIAIJ, 1539 MatCopy_MPIAIJ, 1540 MatPrintHelp_MPIAIJ, 1541 MatScale_MPIAIJ, 1542 0, 1543 0, 1544 0, 1545 MatGetBlockSize_MPIAIJ, 1546 0, 1547 0, 1548 0, 1549 0, 1550 MatFDColoringCreate_MPIAIJ, 1551 0, 1552 MatSetUnfactored_MPIAIJ, 1553 0, 1554 0, 1555 MatGetSubMatrix_MPIAIJ, 1556 0, 1557 0, 1558 MatGetMaps_Petsc}; 1559 1560 /* ----------------------------------------------------------------------------------------*/ 1561 1562 EXTERN_C_BEGIN 1563 #undef __FUNC__ 1564 #define __FUNC__ "MatStoreValues_MPIAIJ" 1565 int MatStoreValues_MPIAIJ(Mat mat) 1566 { 1567 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1568 int ierr; 1569 1570 PetscFunctionBegin; 1571 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1572 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1573 PetscFunctionReturn(0); 1574 } 1575 EXTERN_C_END 1576 1577 EXTERN_C_BEGIN 1578 #undef __FUNC__ 1579 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1580 int MatRetrieveValues_MPIAIJ(Mat mat) 1581 { 1582 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1583 int ierr; 1584 1585 PetscFunctionBegin; 1586 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1587 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1588 PetscFunctionReturn(0); 1589 } 1590 EXTERN_C_END 1591 1592 #include "pc.h" 1593 EXTERN_C_BEGIN 1594 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1595 EXTERN_C_END 1596 1597 #undef __FUNC__ 1598 #define __FUNC__ "MatCreateMPIAIJ" 1599 /*@C 1600 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1601 (the default parallel PETSc format). For good matrix assembly performance 1602 the user should preallocate the matrix storage by setting the parameters 1603 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1604 performance can be increased by more than a factor of 50. 1605 1606 Collective on MPI_Comm 1607 1608 Input Parameters: 1609 + comm - MPI communicator 1610 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1611 This value should be the same as the local size used in creating the 1612 y vector for the matrix-vector product y = Ax. 1613 . n - This value should be the same as the local size used in creating the 1614 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1615 calculated if N is given) For square matrices n is almost always m. 1616 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1617 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1618 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1619 (same value is used for all local rows) 1620 . d_nnz - array containing the number of nonzeros in the various rows of the 1621 DIAGONAL portion of the local submatrix (possibly different for each row) 1622 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1623 The size of this array is equal to the number of local rows, i.e 'm'. 1624 You must leave room for the diagonal entry even if it is zero. 1625 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1626 submatrix (same value is used for all local rows). 1627 - o_nnz - array containing the number of nonzeros in the various rows of the 1628 OFF-DIAGONAL portion of the local submatrix (possibly different for 1629 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1630 structure. The size of this array is equal to the number 1631 of local rows, i.e 'm'. 1632 1633 Output Parameter: 1634 . A - the matrix 1635 1636 Notes: 1637 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1638 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1639 storage requirements for this matrix. 1640 1641 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1642 processor than it must be used on all processors that share the object for 1643 that argument. 1644 1645 The AIJ format (also called the Yale sparse matrix format or 1646 compressed row storage), is fully compatible with standard Fortran 77 1647 storage. That is, the stored row and column indices can begin at 1648 either one (as in Fortran) or zero. See the users manual for details. 1649 1650 The user MUST specify either the local or global matrix dimensions 1651 (possibly both). 1652 1653 The parallel matrix is partitioned such that the first m0 rows belong to 1654 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1655 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1656 1657 The DIAGONAL portion of the local submatrix of a processor can be defined 1658 as the submatrix which is obtained by extraction the part corresponding 1659 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1660 first row that belongs to the processor, and r2 is the last row belonging 1661 to the this processor. This is a square mxm matrix. The remaining portion 1662 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1663 1664 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1665 1666 By default, this format uses inodes (identical nodes) when possible. 1667 We search for consecutive rows with the same nonzero structure, thereby 1668 reusing matrix information to achieve increased efficiency. 1669 1670 Options Database Keys: 1671 + -mat_aij_no_inode - Do not use inodes 1672 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1673 - -mat_aij_oneindex - Internally use indexing starting at 1 1674 rather than 0. Note that when calling MatSetValues(), 1675 the user still MUST index entries starting at 0! 1676 . -mat_mpi - use the parallel matrix data structures even on one processor 1677 (defaults to using SeqBAIJ format on one processor) 1678 . -mat_mpi - use the parallel matrix data structures even on one processor 1679 (defaults to using SeqAIJ format on one processor) 1680 1681 1682 Example usage: 1683 1684 Consider the following 8x8 matrix with 34 non-zero values, that is 1685 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1686 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1687 as follows: 1688 1689 .vb 1690 1 2 0 | 0 3 0 | 0 4 1691 Proc0 0 5 6 | 7 0 0 | 8 0 1692 9 0 10 | 11 0 0 | 12 0 1693 ------------------------------------- 1694 13 0 14 | 15 16 17 | 0 0 1695 Proc1 0 18 0 | 19 20 21 | 0 0 1696 0 0 0 | 22 23 0 | 24 0 1697 ------------------------------------- 1698 Proc2 25 26 27 | 0 0 28 | 29 0 1699 30 0 0 | 31 32 33 | 0 34 1700 .ve 1701 1702 This can be represented as a collection of submatrices as: 1703 1704 .vb 1705 A B C 1706 D E F 1707 G H I 1708 .ve 1709 1710 Where the submatrices A,B,C are owned by proc0, D,E,F are 1711 owned by proc1, G,H,I are owned by proc2. 1712 1713 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1714 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1715 The 'M','N' parameters are 8,8, and have the same values on all procs. 1716 1717 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1718 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1719 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1720 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1721 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1722 matrix, ans [DF] as another SeqAIJ matrix. 1723 1724 When d_nz, o_nz parameters are specified, d_nz storage elements are 1725 allocated for every row of the local diagonal submatrix, and o_nz 1726 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1727 One way to choose d_nz and o_nz is to use the max nonzerors per local 1728 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1729 In this case, the values of d_nz,o_nz are: 1730 .vb 1731 proc0 : dnz = 2, o_nz = 2 1732 proc1 : dnz = 3, o_nz = 2 1733 proc2 : dnz = 1, o_nz = 4 1734 .ve 1735 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1736 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1737 for proc3. i.e we are using 12+15+10=37 storage locations to store 1738 34 values. 1739 1740 When d_nnz, o_nnz parameters are specified, the storage is specified 1741 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1742 In the above case the values for d_nnz,o_nnz are: 1743 .vb 1744 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1745 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1746 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1747 .ve 1748 Here the space allocated is sum of all the above values i.e 34, and 1749 hence pre-allocation is perfect. 1750 1751 Level: intermediate 1752 1753 .keywords: matrix, aij, compressed row, sparse, parallel 1754 1755 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1756 @*/ 1757 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1758 { 1759 Mat B; 1760 Mat_MPIAIJ *b; 1761 int ierr, i,size,flag1 = 0, flag2 = 0; 1762 1763 PetscFunctionBegin; 1764 1765 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 1766 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 1767 if (d_nnz) { 1768 for (i=0; i<m; i++) { 1769 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1770 } 1771 } 1772 if (o_nnz) { 1773 for (i=0; i<m; i++) { 1774 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1775 } 1776 } 1777 1778 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1779 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr); 1780 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1781 if (!flag1 && !flag2 && size == 1) { 1782 if (M == PETSC_DECIDE) M = m; 1783 if (N == PETSC_DECIDE) N = n; 1784 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1785 PetscFunctionReturn(0); 1786 } 1787 1788 *A = 0; 1789 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1790 PLogObjectCreate(B); 1791 B->data = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b); 1792 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1793 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1794 B->ops->destroy = MatDestroy_MPIAIJ; 1795 B->ops->view = MatView_MPIAIJ; 1796 B->factor = 0; 1797 B->assembled = PETSC_FALSE; 1798 B->mapping = 0; 1799 1800 B->insertmode = NOT_SET_VALUES; 1801 b->size = size; 1802 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1803 1804 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1805 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1806 } 1807 1808 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1809 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1810 b->m = m; B->m = m; 1811 b->n = n; B->n = n; 1812 b->N = N; B->N = N; 1813 b->M = M; B->M = M; 1814 1815 /* the information in the maps duplicates the information computed below, eventually 1816 we should remove the duplicate information that is not contained in the maps */ 1817 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1818 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1819 1820 /* build local table of row and column ownerships */ 1821 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1822 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1823 b->cowners = b->rowners + b->size + 2; 1824 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1825 b->rowners[0] = 0; 1826 for ( i=2; i<=b->size; i++ ) { 1827 b->rowners[i] += b->rowners[i-1]; 1828 } 1829 b->rstart = b->rowners[b->rank]; 1830 b->rend = b->rowners[b->rank+1]; 1831 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1832 b->cowners[0] = 0; 1833 for ( i=2; i<=b->size; i++ ) { 1834 b->cowners[i] += b->cowners[i-1]; 1835 } 1836 b->cstart = b->cowners[b->rank]; 1837 b->cend = b->cowners[b->rank+1]; 1838 1839 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1840 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1841 PLogObjectParent(B,b->A); 1842 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1843 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1844 PLogObjectParent(B,b->B); 1845 1846 /* build cache for off array entries formed */ 1847 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 1848 b->donotstash = 0; 1849 b->colmap = 0; 1850 b->garray = 0; 1851 b->roworiented = 1; 1852 1853 /* stuff used for matrix vector multiply */ 1854 b->lvec = 0; 1855 b->Mvctx = 0; 1856 1857 /* stuff for MatGetRow() */ 1858 b->rowindices = 0; 1859 b->rowvalues = 0; 1860 b->getrowactive = PETSC_FALSE; 1861 1862 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1863 "MatStoreValues_MPIAIJ", 1864 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1865 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1866 "MatRetrieveValues_MPIAIJ", 1867 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1868 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 1869 "MatGetDiagonalBlock_MPIAIJ", 1870 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1871 *A = B; 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNC__ 1876 #define __FUNC__ "MatDuplicate_MPIAIJ" 1877 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1878 { 1879 Mat mat; 1880 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1881 int ierr, len=0, flg; 1882 1883 PetscFunctionBegin; 1884 *newmat = 0; 1885 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1886 PLogObjectCreate(mat); 1887 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a); 1888 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1889 mat->ops->destroy = MatDestroy_MPIAIJ; 1890 mat->ops->view = MatView_MPIAIJ; 1891 mat->factor = matin->factor; 1892 mat->assembled = PETSC_TRUE; 1893 mat->insertmode = NOT_SET_VALUES; 1894 1895 a->m = mat->m = oldmat->m; 1896 a->n = mat->n = oldmat->n; 1897 a->M = mat->M = oldmat->M; 1898 a->N = mat->N = oldmat->N; 1899 1900 a->rstart = oldmat->rstart; 1901 a->rend = oldmat->rend; 1902 a->cstart = oldmat->cstart; 1903 a->cend = oldmat->cend; 1904 a->size = oldmat->size; 1905 a->rank = oldmat->rank; 1906 a->donotstash = oldmat->donotstash; 1907 a->roworiented = oldmat->roworiented; 1908 a->rowindices = 0; 1909 a->rowvalues = 0; 1910 a->getrowactive = PETSC_FALSE; 1911 1912 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1913 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1914 a->cowners = a->rowners + a->size + 2; 1915 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1916 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1917 if (oldmat->colmap) { 1918 #if defined (PETSC_USE_CTABLE) 1919 ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1920 #else 1921 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1922 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1923 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr); 1924 #endif 1925 } else a->colmap = 0; 1926 if (oldmat->garray) { 1927 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1928 a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray); 1929 PLogObjectMemory(mat,len*sizeof(int)); 1930 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1931 } else a->garray = 0; 1932 1933 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1934 PLogObjectParent(mat,a->lvec); 1935 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1936 PLogObjectParent(mat,a->Mvctx); 1937 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1938 PLogObjectParent(mat,a->A); 1939 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1940 PLogObjectParent(mat,a->B); 1941 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1942 if (flg) { 1943 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1944 } 1945 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1946 *newmat = mat; 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #include "sys.h" 1951 1952 #undef __FUNC__ 1953 #define __FUNC__ "MatLoad_MPIAIJ" 1954 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1955 { 1956 Mat A; 1957 Scalar *vals,*svals; 1958 MPI_Comm comm = ((PetscObject)viewer)->comm; 1959 MPI_Status status; 1960 int i, nz, ierr, j,rstart, rend, fd; 1961 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1962 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1963 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1964 1965 PetscFunctionBegin; 1966 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1967 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1968 if (!rank) { 1969 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1970 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1971 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1972 if (header[3] < 0) { 1973 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1974 } 1975 } 1976 1977 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1978 M = header[1]; N = header[2]; 1979 /* determine ownership of all rows */ 1980 m = M/size + ((M % size) > rank); 1981 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1982 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1983 rowners[0] = 0; 1984 for ( i=2; i<=size; i++ ) { 1985 rowners[i] += rowners[i-1]; 1986 } 1987 rstart = rowners[rank]; 1988 rend = rowners[rank+1]; 1989 1990 /* distribute row lengths to all processors */ 1991 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1992 offlens = ourlens + (rend-rstart); 1993 if (!rank) { 1994 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1995 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1996 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1997 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1998 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1999 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2000 } else { 2001 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2002 } 2003 2004 if (!rank) { 2005 /* calculate the number of nonzeros on each processor */ 2006 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2007 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2008 for ( i=0; i<size; i++ ) { 2009 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2010 procsnz[i] += rowlengths[j]; 2011 } 2012 } 2013 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2014 2015 /* determine max buffer needed and allocate it */ 2016 maxnz = 0; 2017 for ( i=0; i<size; i++ ) { 2018 maxnz = PetscMax(maxnz,procsnz[i]); 2019 } 2020 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2021 2022 /* read in my part of the matrix column indices */ 2023 nz = procsnz[0]; 2024 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 2025 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2026 2027 /* read in every one elses and ship off */ 2028 for ( i=1; i<size; i++ ) { 2029 nz = procsnz[i]; 2030 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2031 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2032 } 2033 ierr = PetscFree(cols);CHKERRQ(ierr); 2034 } else { 2035 /* determine buffer space needed for message */ 2036 nz = 0; 2037 for ( i=0; i<m; i++ ) { 2038 nz += ourlens[i]; 2039 } 2040 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 2041 2042 /* receive message of column indices*/ 2043 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2044 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2045 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2046 } 2047 2048 /* determine column ownership if matrix is not square */ 2049 if (N != M) { 2050 n = N/size + ((N % size) > rank); 2051 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2052 cstart = cend - n; 2053 } else { 2054 cstart = rstart; 2055 cend = rend; 2056 n = cend - cstart; 2057 } 2058 2059 /* loop over local rows, determining number of off diagonal entries */ 2060 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2061 jj = 0; 2062 for ( i=0; i<m; i++ ) { 2063 for ( j=0; j<ourlens[i]; j++ ) { 2064 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2065 jj++; 2066 } 2067 } 2068 2069 /* create our matrix */ 2070 for ( i=0; i<m; i++ ) { 2071 ourlens[i] -= offlens[i]; 2072 } 2073 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2074 A = *newmat; 2075 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2076 for ( i=0; i<m; i++ ) { 2077 ourlens[i] += offlens[i]; 2078 } 2079 2080 if (!rank) { 2081 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 2082 2083 /* read in my part of the matrix numerical values */ 2084 nz = procsnz[0]; 2085 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2086 2087 /* insert into matrix */ 2088 jj = rstart; 2089 smycols = mycols; 2090 svals = vals; 2091 for ( i=0; i<m; i++ ) { 2092 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2093 smycols += ourlens[i]; 2094 svals += ourlens[i]; 2095 jj++; 2096 } 2097 2098 /* read in other processors and ship out */ 2099 for ( i=1; i<size; i++ ) { 2100 nz = procsnz[i]; 2101 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2102 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2103 } 2104 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2105 } else { 2106 /* receive numeric values */ 2107 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 2108 2109 /* receive message of values*/ 2110 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2111 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2112 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2113 2114 /* insert into matrix */ 2115 jj = rstart; 2116 smycols = mycols; 2117 svals = vals; 2118 for ( i=0; i<m; i++ ) { 2119 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2120 smycols += ourlens[i]; 2121 svals += ourlens[i]; 2122 jj++; 2123 } 2124 } 2125 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2126 ierr = PetscFree(vals);CHKERRQ(ierr); 2127 ierr = PetscFree(mycols);CHKERRQ(ierr); 2128 ierr = PetscFree(rowners);CHKERRQ(ierr); 2129 2130 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2131 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2132 PetscFunctionReturn(0); 2133 } 2134 2135 #undef __FUNC__ 2136 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2137 /* 2138 Not great since it makes two copies of the submatrix, first an SeqAIJ 2139 in local and then by concatenating the local matrices the end result. 2140 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2141 */ 2142 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2143 { 2144 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2145 Mat *local,M, Mreuse; 2146 Scalar *vwork,*aa; 2147 MPI_Comm comm = mat->comm; 2148 Mat_SeqAIJ *aij; 2149 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2150 2151 PetscFunctionBegin; 2152 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2153 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2154 2155 if (call == MAT_REUSE_MATRIX) { 2156 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2157 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2158 local = &Mreuse; 2159 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2160 } else { 2161 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2162 Mreuse = *local; 2163 ierr = PetscFree(local);CHKERRQ(ierr); 2164 } 2165 2166 /* 2167 m - number of local rows 2168 n - number of columns (same on all processors) 2169 rstart - first row in new global matrix generated 2170 */ 2171 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2172 if (call == MAT_INITIAL_MATRIX) { 2173 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2174 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2175 ii = aij->i; 2176 jj = aij->j; 2177 2178 /* 2179 Determine the number of non-zeros in the diagonal and off-diagonal 2180 portions of the matrix in order to do correct preallocation 2181 */ 2182 2183 /* first get start and end of "diagonal" columns */ 2184 if (csize == PETSC_DECIDE) { 2185 nlocal = n/size + ((n % size) > rank); 2186 } else { 2187 nlocal = csize; 2188 } 2189 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2190 rstart = rend - nlocal; 2191 if (rank == size - 1 && rend != n) { 2192 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2193 } 2194 2195 /* next, compute all the lengths */ 2196 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2197 olens = dlens + m; 2198 for ( i=0; i<m; i++ ) { 2199 jend = ii[i+1] - ii[i]; 2200 olen = 0; 2201 dlen = 0; 2202 for ( j=0; j<jend; j++ ) { 2203 if ( *jj < rstart || *jj >= rend) olen++; 2204 else dlen++; 2205 jj++; 2206 } 2207 olens[i] = olen; 2208 dlens[i] = dlen; 2209 } 2210 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2211 ierr = PetscFree(dlens);CHKERRQ(ierr); 2212 } else { 2213 int ml,nl; 2214 2215 M = *newmat; 2216 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2217 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2218 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2219 /* 2220 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2221 rather than the slower MatSetValues(). 2222 */ 2223 M->was_assembled = PETSC_TRUE; 2224 M->assembled = PETSC_FALSE; 2225 } 2226 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2227 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2228 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2229 ii = aij->i; 2230 jj = aij->j; 2231 aa = aij->a; 2232 for (i=0; i<m; i++) { 2233 row = rstart + i; 2234 nz = ii[i+1] - ii[i]; 2235 cwork = jj; jj += nz; 2236 vwork = aa; aa += nz; 2237 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2238 } 2239 2240 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2241 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2242 *newmat = M; 2243 2244 /* save submatrix used in processor for next request */ 2245 if (call == MAT_INITIAL_MATRIX) { 2246 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2247 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2248 } 2249 2250 PetscFunctionReturn(0); 2251 } 2252