1 /*$Id: mpiaij.c,v 1.319 2000/08/30 20:58:56 bsmith Exp balay $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ /*<a name="CreateColmap_MPIAIJ_Private"></a>*/"CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 27 int n = B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = PetscTableCreate(aij->n,&aij->colmap);CHKERRQ(ierr); 32 for (i=0; i<n; i++){ 33 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 aij->colmap = (int*)PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 37 PLogObjectMemory(mat,aij->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr); 39 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for (_i=low; _i<high; _i++) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 71 Scalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 77 new_a = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 78 new_j = (int*)(new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 83 for (ii=row+1; ii<a->m+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(Scalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 94 if (!a->singlemalloc) { \ 95 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 96 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 97 } \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = PETSC_TRUE; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for (ii=N; ii>=_i; ii--) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for (_i=low; _i<high; _i++) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 145 Scalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 151 new_a = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 152 new_j = (int*)(new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 157 for (ii=row+1; ii<b->m+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(Scalar));CHKERRQ(ierr); \ 165 /* free up old matrix storage */ \ 166 \ 167 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 168 if (!b->singlemalloc) { \ 169 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 170 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 171 } \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = PETSC_TRUE; \ 174 \ 175 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 176 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 177 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 178 b->maxnz += CHUNKSIZE; \ 179 b->reallocs++; \ 180 } \ 181 N = nrow++ - 1; b->nz++; \ 182 /* shift up all the later entries in this row */ \ 183 for (ii=N; ii>=_i; ii--) { \ 184 rp[ii+1] = rp[ii]; \ 185 ap[ii+1] = ap[ii]; \ 186 } \ 187 rp[_i] = col1; \ 188 ap[_i] = value; \ 189 b_noinsert: ; \ 190 bilen[row] = nrow; \ 191 } 192 193 #undef __FUNC__ 194 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIAIJ" 195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 196 { 197 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 198 Scalar value; 199 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 200 int cstart = aij->cstart,cend = aij->cend,row,col; 201 int roworiented = aij->roworiented; 202 203 /* Some Variables required in the macro */ 204 Mat A = aij->A; 205 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 206 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 207 Scalar *aa = a->a; 208 PetscTruth ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE:PETSC_FALSE); 209 Mat B = aij->B; 210 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 211 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j; 212 Scalar *ba = b->a; 213 214 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 215 int nonew = a->nonew,shift = a->indexshift; 216 Scalar *ap; 217 218 PetscFunctionBegin; 219 for (i=0; i<m; i++) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_BOPT_g) 222 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 223 #endif 224 if (im[i] >= rstart && im[i] < rend) { 225 row = im[i] - rstart; 226 for (j=0; j<n; j++) { 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 if (ignorezeroentries && value == 0.0) continue; 231 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 232 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 233 } 234 else if (in[j] < 0) continue; 235 #if defined(PETSC_USE_BOPT_g) 236 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 237 #endif 238 else { 239 if (mat->was_assembled) { 240 if (!aij->colmap) { 241 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 242 } 243 #if defined (PETSC_USE_CTABLE) 244 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 245 col--; 246 #else 247 col = aij->colmap[in[j]] - 1; 248 #endif 249 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 250 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 251 col = in[j]; 252 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 253 B = aij->B; 254 b = (Mat_SeqAIJ*)B->data; 255 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 256 ba = b->a; 257 } 258 } else col = in[j]; 259 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 260 if (ignorezeroentries && value == 0.0) continue; 261 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } 264 } 265 } else { 266 if (!aij->donotstash) { 267 if (roworiented) { 268 if (ignorezeroentries && v[i*n] == 0.0) continue; 269 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 270 } else { 271 if (ignorezeroentries && v[i] == 0.0) continue; 272 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 273 } 274 } 275 } 276 } 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNC__ 281 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIAIJ" 282 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 283 { 284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 285 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 286 int cstart = aij->cstart,cend = aij->cend,row,col; 287 288 PetscFunctionBegin; 289 for (i=0; i<m; i++) { 290 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 291 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 292 if (idxm[i] >= rstart && idxm[i] < rend) { 293 row = idxm[i] - rstart; 294 for (j=0; j<n; j++) { 295 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 296 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 297 if (idxn[j] >= cstart && idxn[j] < cend){ 298 col = idxn[j] - cstart; 299 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 300 } else { 301 if (!aij->colmap) { 302 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 303 } 304 #if defined (PETSC_USE_CTABLE) 305 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 306 col --; 307 #else 308 col = aij->colmap[idxn[j]] - 1; 309 #endif 310 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 311 else { 312 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 313 } 314 } 315 } 316 } else { 317 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 318 } 319 } 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNC__ 324 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIAIJ" 325 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 326 { 327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 328 int ierr,nstash,reallocs; 329 InsertMode addv; 330 331 PetscFunctionBegin; 332 if (aij->donotstash) { 333 PetscFunctionReturn(0); 334 } 335 336 /* make sure all processors are either in INSERTMODE or ADDMODE */ 337 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 338 if (addv == (ADD_VALUES|INSERT_VALUES)) { 339 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 340 } 341 mat->insertmode = addv; /* in case this processor had no cache */ 342 343 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 344 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 345 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 346 PetscFunctionReturn(0); 347 } 348 349 350 #undef __FUNC__ 351 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIAIJ" 352 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 353 { 354 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 355 int i,j,rstart,ncols,n,ierr,flg; 356 int *row,*col,other_disassembled; 357 Scalar *val; 358 InsertMode addv = mat->insertmode; 359 360 PetscFunctionBegin; 361 if (!aij->donotstash) { 362 while (1) { 363 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 364 if (!flg) break; 365 366 for (i=0; i<n;) { 367 /* Now identify the consecutive vals belonging to the same row */ 368 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 369 if (j < n) ncols = j-i; 370 else ncols = n-i; 371 /* Now assemble all these values with a single function call */ 372 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 373 i = j; 374 } 375 } 376 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 377 } 378 379 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 380 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 381 382 /* determine if any processor has disassembled, if so we must 383 also disassemble ourselfs, in order that we may reassemble. */ 384 /* 385 if nonzero structure of submatrix B cannot change then we know that 386 no processor disassembled thus we can skip this stuff 387 */ 388 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 389 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 390 if (mat->was_assembled && !other_disassembled) { 391 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 392 } 393 } 394 395 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 396 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 397 } 398 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 399 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 400 401 if (aij->rowvalues) { 402 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 403 aij->rowvalues = 0; 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNC__ 409 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIAIJ" 410 int MatZeroEntries_MPIAIJ(Mat A) 411 { 412 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 413 int ierr; 414 415 PetscFunctionBegin; 416 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 417 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNC__ 422 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIAIJ" 423 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 424 { 425 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 426 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 427 int *procs,*nprocs,j,found,idx,nsends,*work,row; 428 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 429 int *rvalues,tag = A->tag,count,base,slen,n,*source; 430 int *lens,imdex,*lrows,*values,rstart=l->rstart; 431 MPI_Comm comm = A->comm; 432 MPI_Request *send_waits,*recv_waits; 433 MPI_Status recv_status,*send_status; 434 IS istmp; 435 436 PetscFunctionBegin; 437 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 438 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 439 440 /* first count number of contributors to each processor */ 441 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 442 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 443 procs = nprocs + size; 444 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 445 for (i=0; i<N; i++) { 446 idx = rows[i]; 447 found = 0; 448 for (j=0; j<size; j++) { 449 if (idx >= owners[j] && idx < owners[j+1]) { 450 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 451 } 452 } 453 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 454 } 455 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 456 457 /* inform other processors of number of messages and max length*/ 458 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 459 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 460 nrecvs = work[size+rank]; 461 nmax = work[rank]; 462 ierr = PetscFree(work);CHKERRQ(ierr); 463 464 /* post receives: */ 465 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 466 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 467 for (i=0; i<nrecvs; i++) { 468 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 469 } 470 471 /* do sends: 472 1) starts[i] gives the starting index in svalues for stuff going to 473 the ith processor 474 */ 475 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 476 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 477 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 478 starts[0] = 0; 479 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 480 for (i=0; i<N; i++) { 481 svalues[starts[owner[i]]++] = rows[i]; 482 } 483 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 484 485 starts[0] = 0; 486 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 487 count = 0; 488 for (i=0; i<size; i++) { 489 if (procs[i]) { 490 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 491 } 492 } 493 ierr = PetscFree(starts);CHKERRQ(ierr); 494 495 base = owners[rank]; 496 497 /* wait on receives */ 498 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 499 source = lens + nrecvs; 500 count = nrecvs; slen = 0; 501 while (count) { 502 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 503 /* unpack receives into our local space */ 504 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 505 source[imdex] = recv_status.MPI_SOURCE; 506 lens[imdex] = n; 507 slen += n; 508 count--; 509 } 510 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 511 512 /* move the data into the send scatter */ 513 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 514 count = 0; 515 for (i=0; i<nrecvs; i++) { 516 values = rvalues + i*nmax; 517 for (j=0; j<lens[i]; j++) { 518 lrows[count++] = values[j] - base; 519 } 520 } 521 ierr = PetscFree(rvalues);CHKERRQ(ierr); 522 ierr = PetscFree(lens);CHKERRQ(ierr); 523 ierr = PetscFree(owner);CHKERRQ(ierr); 524 ierr = PetscFree(nprocs);CHKERRQ(ierr); 525 526 /* actually zap the local rows */ 527 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 528 PLogObjectParent(A,istmp); 529 530 /* 531 Zero the required rows. If the "diagonal block" of the matrix 532 is square and the user wishes to set the diagonal we use seperate 533 code so that MatSetValues() is not called for each diagonal allocating 534 new memory, thus calling lots of mallocs and slowing things down. 535 536 Contributed by: Mathew Knepley 537 */ 538 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 539 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 540 if (diag && (l->A->M == l->A->N)) { 541 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 542 } else if (diag) { 543 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 544 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 545 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 546 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 547 } 548 for (i = 0; i < slen; i++) { 549 row = lrows[i] + rstart; 550 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 551 } 552 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 } else { 555 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 556 } 557 ierr = ISDestroy(istmp);CHKERRQ(ierr); 558 ierr = PetscFree(lrows);CHKERRQ(ierr); 559 560 /* wait on sends */ 561 if (nsends) { 562 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 563 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 564 ierr = PetscFree(send_status);CHKERRQ(ierr); 565 } 566 ierr = PetscFree(send_waits);CHKERRQ(ierr); 567 ierr = PetscFree(svalues);CHKERRQ(ierr); 568 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNC__ 573 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIAIJ" 574 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 575 { 576 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 577 int ierr,nt; 578 579 PetscFunctionBegin; 580 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 581 if (nt != a->n) { 582 SETERRQ2(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A (%d) and xx (%d)",a->n,nt); 583 } 584 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 586 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNC__ 592 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIAIJ" 593 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 594 { 595 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 596 int ierr; 597 598 PetscFunctionBegin; 599 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 601 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 } 605 606 #undef __FUNC__ 607 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIAIJ" 608 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 609 { 610 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 611 int ierr; 612 613 PetscFunctionBegin; 614 /* do nondiagonal part */ 615 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 616 /* send it on its way */ 617 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 618 /* do local part */ 619 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 620 /* receive remote parts: note this assumes the values are not actually */ 621 /* inserted in yy until the next line, which is true for my implementation*/ 622 /* but is not perhaps always true. */ 623 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 627 #undef __FUNC__ 628 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIAIJ" 629 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 630 { 631 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 632 int ierr; 633 634 PetscFunctionBegin; 635 /* do nondiagonal part */ 636 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 637 /* send it on its way */ 638 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 639 /* do local part */ 640 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 641 /* receive remote parts: note this assumes the values are not actually */ 642 /* inserted in yy until the next line, which is true for my implementation*/ 643 /* but is not perhaps always true. */ 644 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 645 PetscFunctionReturn(0); 646 } 647 648 /* 649 This only works correctly for square matrices where the subblock A->A is the 650 diagonal block 651 */ 652 #undef __FUNC__ 653 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIAIJ" 654 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 655 { 656 int ierr; 657 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 658 659 PetscFunctionBegin; 660 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 661 if (a->rstart != a->cstart || a->rend != a->cend) { 662 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 663 } 664 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNC__ 669 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIAIJ" 670 int MatScale_MPIAIJ(Scalar *aa,Mat A) 671 { 672 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 673 int ierr; 674 675 PetscFunctionBegin; 676 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 677 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 #undef __FUNC__ 682 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIAIJ" 683 int MatDestroy_MPIAIJ(Mat mat) 684 { 685 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 686 int ierr; 687 688 PetscFunctionBegin; 689 690 if (mat->mapping) { 691 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 692 } 693 if (mat->bmapping) { 694 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 695 } 696 if (mat->rmap) { 697 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 698 } 699 if (mat->cmap) { 700 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 701 } 702 #if defined(PETSC_USE_LOG) 703 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 704 #endif 705 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 706 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 707 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 708 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 709 #if defined (PETSC_USE_CTABLE) 710 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 711 #else 712 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 713 #endif 714 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 715 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 716 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 717 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 718 ierr = PetscFree(aij);CHKERRQ(ierr); 719 PLogObjectDestroy(mat); 720 PetscHeaderDestroy(mat); 721 PetscFunctionReturn(0); 722 } 723 724 #undef __FUNC__ 725 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ_ASCIIorDraworSocket" 726 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 729 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 730 int ierr,format,shift = C->indexshift,rank = aij->rank,size = aij->size; 731 PetscTruth isdraw,isascii,flg; 732 Viewer sviewer; 733 734 PetscFunctionBegin; 735 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 736 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 737 if (isascii) { 738 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 739 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 740 MatInfo info; 741 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 742 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 743 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 744 if (flg) { 745 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 746 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 747 } else { 748 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 749 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 750 } 751 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 752 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 753 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 754 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 755 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 756 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 759 PetscFunctionReturn(0); 760 } 761 } else if (isdraw) { 762 Draw draw; 763 PetscTruth isnull; 764 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 765 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 766 } 767 768 if (size == 1) { 769 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 770 } else { 771 /* assemble the entire matrix onto first processor. */ 772 Mat A; 773 Mat_SeqAIJ *Aloc; 774 int M = aij->M,N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 775 Scalar *a; 776 777 if (!rank) { 778 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 779 } else { 780 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 781 } 782 PLogObjectParent(mat,A); 783 784 /* copy over the A part */ 785 Aloc = (Mat_SeqAIJ*)aij->A->data; 786 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 787 row = aij->rstart; 788 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 789 for (i=0; i<m; i++) { 790 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 791 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 792 } 793 aj = Aloc->j; 794 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 795 796 /* copy over the B part */ 797 Aloc = (Mat_SeqAIJ*)aij->B->data; 798 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 799 row = aij->rstart; 800 ct = cols = (int*)PetscMalloc((ai[m]+1)*sizeof(int));CHKPTRQ(cols); 801 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 802 for (i=0; i<m; i++) { 803 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 804 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 805 } 806 ierr = PetscFree(ct);CHKERRQ(ierr); 807 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 808 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 809 /* 810 Everyone has to call to draw the matrix since the graphics waits are 811 synchronized across all processors that share the Draw object 812 */ 813 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 814 if (!rank) { 815 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 816 } 817 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 818 ierr = MatDestroy(A);CHKERRQ(ierr); 819 } 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNC__ 824 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ" 825 int MatView_MPIAIJ(Mat mat,Viewer viewer) 826 { 827 int ierr; 828 PetscTruth isascii,isdraw,issocket,isbinary; 829 830 PetscFunctionBegin; 831 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 832 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 833 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 834 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 835 if (isascii || isdraw || isbinary || issocket) { 836 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 837 } else { 838 SETERRQ1(1,1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 839 } 840 PetscFunctionReturn(0); 841 } 842 843 /* 844 This has to provide several versions. 845 846 2) a) use only local smoothing updating outer values only once. 847 b) local smoothing updating outer values each inner iteration 848 3) color updating out values betwen colors. 849 */ 850 #undef __FUNC__ 851 #define __FUNC__ /*<a name=""></a>*/"MatRelax_MPIAIJ" 852 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag, 853 PetscReal fshift,int its,Vec xx) 854 { 855 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 856 Mat AA = mat->A,BB = mat->B; 857 Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data; 858 Scalar *b,*x,*xs,*ls,d,*v,sum; 859 int ierr,*idx,*diag; 860 int n = mat->n,m = mat->m,i,shift = A->indexshift; 861 862 PetscFunctionBegin; 863 if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);} 864 diag = A->diag; 865 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 866 if (flag & SOR_ZERO_INITIAL_GUESS) { 867 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 868 PetscFunctionReturn(0); 869 } 870 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 871 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 872 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 873 if (xx != bb) { 874 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 875 } else { 876 b = x; 877 } 878 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 879 xs = x + shift; /* shift by one for index start of 1 */ 880 ls = ls + shift; 881 while (its--) { 882 /* go down through the rows */ 883 for (i=0; i<m; i++) { 884 n = A->i[i+1] - A->i[i]; 885 PLogFlops(4*n+3); 886 idx = A->j + A->i[i] + shift; 887 v = A->a + A->i[i] + shift; 888 sum = b[i]; 889 SPARSEDENSEMDOT(sum,xs,v,idx,n); 890 d = fshift + A->a[diag[i]+shift]; 891 n = B->i[i+1] - B->i[i]; 892 idx = B->j + B->i[i] + shift; 893 v = B->a + B->i[i] + shift; 894 SPARSEDENSEMDOT(sum,ls,v,idx,n); 895 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 896 } 897 /* come up through the rows */ 898 for (i=m-1; i>-1; i--) { 899 n = A->i[i+1] - A->i[i]; 900 PLogFlops(4*n+3); 901 idx = A->j + A->i[i] + shift; 902 v = A->a + A->i[i] + shift; 903 sum = b[i]; 904 SPARSEDENSEMDOT(sum,xs,v,idx,n); 905 d = fshift + A->a[diag[i]+shift]; 906 n = B->i[i+1] - B->i[i]; 907 idx = B->j + B->i[i] + shift; 908 v = B->a + B->i[i] + shift; 909 SPARSEDENSEMDOT(sum,ls,v,idx,n); 910 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 911 } 912 } 913 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 914 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 915 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 916 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 917 if (flag & SOR_ZERO_INITIAL_GUESS) { 918 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 919 PetscFunctionReturn(0); 920 } 921 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 922 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 923 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 924 if (xx != bb) { 925 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 926 } else { 927 b = x; 928 } 929 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 930 xs = x + shift; /* shift by one for index start of 1 */ 931 ls = ls + shift; 932 while (its--) { 933 for (i=0; i<m; i++) { 934 n = A->i[i+1] - A->i[i]; 935 PLogFlops(4*n+3); 936 idx = A->j + A->i[i] + shift; 937 v = A->a + A->i[i] + shift; 938 sum = b[i]; 939 SPARSEDENSEMDOT(sum,xs,v,idx,n); 940 d = fshift + A->a[diag[i]+shift]; 941 n = B->i[i+1] - B->i[i]; 942 idx = B->j + B->i[i] + shift; 943 v = B->a + B->i[i] + shift; 944 SPARSEDENSEMDOT(sum,ls,v,idx,n); 945 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 946 } 947 } 948 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 949 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 950 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 951 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 952 if (flag & SOR_ZERO_INITIAL_GUESS) { 953 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 957 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 958 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 959 if (xx != bb) { 960 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 961 } else { 962 b = x; 963 } 964 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 965 xs = x + shift; /* shift by one for index start of 1 */ 966 ls = ls + shift; 967 while (its--) { 968 for (i=m-1; i>-1; i--) { 969 n = A->i[i+1] - A->i[i]; 970 PLogFlops(4*n+3); 971 idx = A->j + A->i[i] + shift; 972 v = A->a + A->i[i] + shift; 973 sum = b[i]; 974 SPARSEDENSEMDOT(sum,xs,v,idx,n); 975 d = fshift + A->a[diag[i]+shift]; 976 n = B->i[i+1] - B->i[i]; 977 idx = B->j + B->i[i] + shift; 978 v = B->a + B->i[i] + shift; 979 SPARSEDENSEMDOT(sum,ls,v,idx,n); 980 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 981 } 982 } 983 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 984 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 985 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 986 } else { 987 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 988 } 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNC__ 993 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIAIJ" 994 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 995 { 996 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 997 Mat A = mat->A,B = mat->B; 998 int ierr; 999 PetscReal isend[5],irecv[5]; 1000 1001 PetscFunctionBegin; 1002 info->block_size = 1.0; 1003 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1004 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1005 isend[3] = info->memory; isend[4] = info->mallocs; 1006 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1007 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1008 isend[3] += info->memory; isend[4] += info->mallocs; 1009 if (flag == MAT_LOCAL) { 1010 info->nz_used = isend[0]; 1011 info->nz_allocated = isend[1]; 1012 info->nz_unneeded = isend[2]; 1013 info->memory = isend[3]; 1014 info->mallocs = isend[4]; 1015 } else if (flag == MAT_GLOBAL_MAX) { 1016 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1017 info->nz_used = irecv[0]; 1018 info->nz_allocated = irecv[1]; 1019 info->nz_unneeded = irecv[2]; 1020 info->memory = irecv[3]; 1021 info->mallocs = irecv[4]; 1022 } else if (flag == MAT_GLOBAL_SUM) { 1023 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1024 info->nz_used = irecv[0]; 1025 info->nz_allocated = irecv[1]; 1026 info->nz_unneeded = irecv[2]; 1027 info->memory = irecv[3]; 1028 info->mallocs = irecv[4]; 1029 } 1030 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1031 info->fill_ratio_needed = 0; 1032 info->factor_mallocs = 0; 1033 info->rows_global = (double)mat->M; 1034 info->columns_global = (double)mat->N; 1035 info->rows_local = (double)mat->m; 1036 info->columns_local = (double)mat->N; 1037 1038 PetscFunctionReturn(0); 1039 } 1040 1041 #undef __FUNC__ 1042 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIAIJ" 1043 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1044 { 1045 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1046 int ierr; 1047 1048 PetscFunctionBegin; 1049 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1050 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1051 op == MAT_COLUMNS_UNSORTED || 1052 op == MAT_COLUMNS_SORTED || 1053 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1054 op == MAT_KEEP_ZEROED_ROWS || 1055 op == MAT_NEW_NONZERO_LOCATION_ERR || 1056 op == MAT_USE_INODES || 1057 op == MAT_DO_NOT_USE_INODES || 1058 op == MAT_IGNORE_ZERO_ENTRIES) { 1059 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1060 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1061 } else if (op == MAT_ROW_ORIENTED) { 1062 a->roworiented = PETSC_TRUE; 1063 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1064 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1065 } else if (op == MAT_ROWS_SORTED || 1066 op == MAT_ROWS_UNSORTED || 1067 op == MAT_SYMMETRIC || 1068 op == MAT_STRUCTURALLY_SYMMETRIC || 1069 op == MAT_YES_NEW_DIAGONALS) { 1070 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1071 } else if (op == MAT_COLUMN_ORIENTED) { 1072 a->roworiented = PETSC_FALSE; 1073 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1074 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1075 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1076 a->donotstash = PETSC_TRUE; 1077 } else if (op == MAT_NO_NEW_DIAGONALS){ 1078 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1079 } else { 1080 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1081 } 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNC__ 1086 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIAIJ" 1087 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1088 { 1089 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1090 1091 PetscFunctionBegin; 1092 if (m) *m = mat->M; 1093 if (n) *n = mat->N; 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNC__ 1098 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIAIJ" 1099 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1100 { 1101 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1102 1103 PetscFunctionBegin; 1104 if (m) *m = mat->m; 1105 if (n) *n = mat->n; 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNC__ 1110 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIAIJ" 1111 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1112 { 1113 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1114 1115 PetscFunctionBegin; 1116 if (m) *m = mat->rstart; 1117 if (n) *n = mat->rend; 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNC__ 1122 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIAIJ" 1123 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1124 { 1125 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1126 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1127 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1128 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1129 int *cmap,*idx_p; 1130 1131 PetscFunctionBegin; 1132 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1133 mat->getrowactive = PETSC_TRUE; 1134 1135 if (!mat->rowvalues && (idx || v)) { 1136 /* 1137 allocate enough space to hold information from the longest row. 1138 */ 1139 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1140 int max = 1,m = mat->m,tmp; 1141 for (i=0; i<m; i++) { 1142 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1143 if (max < tmp) { max = tmp; } 1144 } 1145 mat->rowvalues = (Scalar*)PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1146 mat->rowindices = (int*)(mat->rowvalues + max); 1147 } 1148 1149 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1150 lrow = row - rstart; 1151 1152 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1153 if (!v) {pvA = 0; pvB = 0;} 1154 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1155 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1156 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1157 nztot = nzA + nzB; 1158 1159 cmap = mat->garray; 1160 if (v || idx) { 1161 if (nztot) { 1162 /* Sort by increasing column numbers, assuming A and B already sorted */ 1163 int imark = -1; 1164 if (v) { 1165 *v = v_p = mat->rowvalues; 1166 for (i=0; i<nzB; i++) { 1167 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1168 else break; 1169 } 1170 imark = i; 1171 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1172 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1173 } 1174 if (idx) { 1175 *idx = idx_p = mat->rowindices; 1176 if (imark > -1) { 1177 for (i=0; i<imark; i++) { 1178 idx_p[i] = cmap[cworkB[i]]; 1179 } 1180 } else { 1181 for (i=0; i<nzB; i++) { 1182 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1183 else break; 1184 } 1185 imark = i; 1186 } 1187 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1188 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1189 } 1190 } else { 1191 if (idx) *idx = 0; 1192 if (v) *v = 0; 1193 } 1194 } 1195 *nz = nztot; 1196 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1197 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNC__ 1202 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIAIJ" 1203 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1204 { 1205 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1206 1207 PetscFunctionBegin; 1208 if (aij->getrowactive == PETSC_FALSE) { 1209 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1210 } 1211 aij->getrowactive = PETSC_FALSE; 1212 PetscFunctionReturn(0); 1213 } 1214 1215 #undef __FUNC__ 1216 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIAIJ" 1217 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1218 { 1219 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1220 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1221 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1222 PetscReal sum = 0.0; 1223 Scalar *v; 1224 1225 PetscFunctionBegin; 1226 if (aij->size == 1) { 1227 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1228 } else { 1229 if (type == NORM_FROBENIUS) { 1230 v = amat->a; 1231 for (i=0; i<amat->nz; i++) { 1232 #if defined(PETSC_USE_COMPLEX) 1233 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1234 #else 1235 sum += (*v)*(*v); v++; 1236 #endif 1237 } 1238 v = bmat->a; 1239 for (i=0; i<bmat->nz; i++) { 1240 #if defined(PETSC_USE_COMPLEX) 1241 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1242 #else 1243 sum += (*v)*(*v); v++; 1244 #endif 1245 } 1246 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1247 *norm = sqrt(*norm); 1248 } else if (type == NORM_1) { /* max column norm */ 1249 PetscReal *tmp,*tmp2; 1250 int *jj,*garray = aij->garray; 1251 tmp = (PetscReal*)PetscMalloc((aij->N+1)*sizeof(PetscReal));CHKPTRQ(tmp); 1252 tmp2 = (PetscReal*)PetscMalloc((aij->N+1)*sizeof(PetscReal));CHKPTRQ(tmp2); 1253 ierr = PetscMemzero(tmp,aij->N*sizeof(PetscReal));CHKERRQ(ierr); 1254 *norm = 0.0; 1255 v = amat->a; jj = amat->j; 1256 for (j=0; j<amat->nz; j++) { 1257 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1258 } 1259 v = bmat->a; jj = bmat->j; 1260 for (j=0; j<bmat->nz; j++) { 1261 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1262 } 1263 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1264 for (j=0; j<aij->N; j++) { 1265 if (tmp2[j] > *norm) *norm = tmp2[j]; 1266 } 1267 ierr = PetscFree(tmp);CHKERRQ(ierr); 1268 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1269 } else if (type == NORM_INFINITY) { /* max row norm */ 1270 PetscReal ntemp = 0.0; 1271 for (j=0; j<amat->m; j++) { 1272 v = amat->a + amat->i[j] + shift; 1273 sum = 0.0; 1274 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1275 sum += PetscAbsScalar(*v); v++; 1276 } 1277 v = bmat->a + bmat->i[j] + shift; 1278 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1279 sum += PetscAbsScalar(*v); v++; 1280 } 1281 if (sum > ntemp) ntemp = sum; 1282 } 1283 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1284 } else { 1285 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1286 } 1287 } 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNC__ 1292 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIAIJ" 1293 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1294 { 1295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1296 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1297 int ierr,shift = Aloc->indexshift; 1298 int M = a->M,N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1299 Mat B; 1300 Scalar *array; 1301 1302 PetscFunctionBegin; 1303 if (!matout && M != N) { 1304 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1305 } 1306 1307 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1308 1309 /* copy over the A part */ 1310 Aloc = (Mat_SeqAIJ*)a->A->data; 1311 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1312 row = a->rstart; 1313 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1314 for (i=0; i<m; i++) { 1315 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1316 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1317 } 1318 aj = Aloc->j; 1319 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1320 1321 /* copy over the B part */ 1322 Aloc = (Mat_SeqAIJ*)a->B->data; 1323 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1324 row = a->rstart; 1325 ct = cols = (int*)PetscMalloc((1+ai[m]-shift)*sizeof(int));CHKPTRQ(cols); 1326 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1327 for (i=0; i<m; i++) { 1328 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1329 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1330 } 1331 ierr = PetscFree(ct);CHKERRQ(ierr); 1332 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1333 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1334 if (matout) { 1335 *matout = B; 1336 } else { 1337 PetscOps *Abops; 1338 struct _MatOps *Aops; 1339 1340 /* This isn't really an in-place transpose .... but free data structures from a */ 1341 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 1342 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1343 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1344 #if defined (PETSC_USE_CTABLE) 1345 if (a->colmap) {ierr = PetscTableDelete(a->colmap);CHKERRQ(ierr);} 1346 #else 1347 if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);} 1348 #endif 1349 if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);} 1350 if (a->lvec) {ierr = VecDestroy(a->lvec);CHKERRQ(ierr);} 1351 if (a->Mvctx) {ierr = VecScatterDestroy(a->Mvctx);CHKERRQ(ierr);} 1352 ierr = PetscFree(a);CHKERRQ(ierr); 1353 1354 /* 1355 This is horrible, horrible code. We need to keep the 1356 A pointers for the bops and ops but copy everything 1357 else from C. 1358 */ 1359 Abops = A->bops; 1360 Aops = A->ops; 1361 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1362 A->bops = Abops; 1363 A->ops = Aops; 1364 PetscHeaderDestroy(B); 1365 } 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNC__ 1370 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIAIJ" 1371 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1372 { 1373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1374 Mat a = aij->A,b = aij->B; 1375 int ierr,s1,s2,s3; 1376 1377 PetscFunctionBegin; 1378 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1379 if (rr) { 1380 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1381 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1382 /* Overlap communication with computation. */ 1383 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1384 } 1385 if (ll) { 1386 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1387 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1388 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1389 } 1390 /* scale the diagonal block */ 1391 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1392 1393 if (rr) { 1394 /* Do a scatter end and then right scale the off-diagonal block */ 1395 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1396 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1397 } 1398 1399 PetscFunctionReturn(0); 1400 } 1401 1402 1403 #undef __FUNC__ 1404 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIAIJ" 1405 int MatPrintHelp_MPIAIJ(Mat A) 1406 { 1407 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1408 int ierr; 1409 1410 PetscFunctionBegin; 1411 if (!a->rank) { 1412 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1413 } 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNC__ 1418 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIAIJ" 1419 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1420 { 1421 PetscFunctionBegin; 1422 *bs = 1; 1423 PetscFunctionReturn(0); 1424 } 1425 #undef __FUNC__ 1426 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIAIJ" 1427 int MatSetUnfactored_MPIAIJ(Mat A) 1428 { 1429 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1430 int ierr; 1431 1432 PetscFunctionBegin; 1433 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNC__ 1438 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIAIJ" 1439 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1440 { 1441 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1442 Mat a,b,c,d; 1443 PetscTruth flg; 1444 int ierr; 1445 1446 PetscFunctionBegin; 1447 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1448 a = matA->A; b = matA->B; 1449 c = matB->A; d = matB->B; 1450 1451 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1452 if (flg == PETSC_TRUE) { 1453 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1454 } 1455 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNC__ 1460 #define __FUNC__ /*<a name=""></a>*/"MatCopy_MPIAIJ" 1461 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1462 { 1463 int ierr; 1464 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1465 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1466 1467 PetscFunctionBegin; 1468 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1469 /* because of the column compression in the off-processor part of the matrix a->B, 1470 the number of columns in a->B and b->B may be different, hence we cannot call 1471 the MatCopy() directly on the two parts. If need be, we can provide a more 1472 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1473 then copying the submatrices */ 1474 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1475 } else { 1476 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1477 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1478 } 1479 PetscFunctionReturn(0); 1480 } 1481 1482 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1483 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1484 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1485 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1486 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1487 1488 /* -------------------------------------------------------------------*/ 1489 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1490 MatGetRow_MPIAIJ, 1491 MatRestoreRow_MPIAIJ, 1492 MatMult_MPIAIJ, 1493 MatMultAdd_MPIAIJ, 1494 MatMultTranspose_MPIAIJ, 1495 MatMultTransposeAdd_MPIAIJ, 1496 0, 1497 0, 1498 0, 1499 0, 1500 0, 1501 0, 1502 MatRelax_MPIAIJ, 1503 MatTranspose_MPIAIJ, 1504 MatGetInfo_MPIAIJ, 1505 MatEqual_MPIAIJ, 1506 MatGetDiagonal_MPIAIJ, 1507 MatDiagonalScale_MPIAIJ, 1508 MatNorm_MPIAIJ, 1509 MatAssemblyBegin_MPIAIJ, 1510 MatAssemblyEnd_MPIAIJ, 1511 0, 1512 MatSetOption_MPIAIJ, 1513 MatZeroEntries_MPIAIJ, 1514 MatZeroRows_MPIAIJ, 1515 0, 1516 0, 1517 0, 1518 0, 1519 MatGetSize_MPIAIJ, 1520 MatGetLocalSize_MPIAIJ, 1521 MatGetOwnershipRange_MPIAIJ, 1522 0, 1523 0, 1524 0, 1525 0, 1526 MatDuplicate_MPIAIJ, 1527 0, 1528 0, 1529 0, 1530 0, 1531 0, 1532 MatGetSubMatrices_MPIAIJ, 1533 MatIncreaseOverlap_MPIAIJ, 1534 MatGetValues_MPIAIJ, 1535 MatCopy_MPIAIJ, 1536 MatPrintHelp_MPIAIJ, 1537 MatScale_MPIAIJ, 1538 0, 1539 0, 1540 0, 1541 MatGetBlockSize_MPIAIJ, 1542 0, 1543 0, 1544 0, 1545 0, 1546 MatFDColoringCreate_MPIAIJ, 1547 0, 1548 MatSetUnfactored_MPIAIJ, 1549 0, 1550 0, 1551 MatGetSubMatrix_MPIAIJ, 1552 MatDestroy_MPIAIJ, 1553 MatView_MPIAIJ, 1554 MatGetMaps_Petsc}; 1555 1556 /* ----------------------------------------------------------------------------------------*/ 1557 1558 EXTERN_C_BEGIN 1559 #undef __FUNC__ 1560 #define __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIAIJ" 1561 int MatStoreValues_MPIAIJ(Mat mat) 1562 { 1563 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1564 int ierr; 1565 1566 PetscFunctionBegin; 1567 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1568 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1569 PetscFunctionReturn(0); 1570 } 1571 EXTERN_C_END 1572 1573 EXTERN_C_BEGIN 1574 #undef __FUNC__ 1575 #define __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIAIJ" 1576 int MatRetrieveValues_MPIAIJ(Mat mat) 1577 { 1578 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1579 int ierr; 1580 1581 PetscFunctionBegin; 1582 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1583 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1584 PetscFunctionReturn(0); 1585 } 1586 EXTERN_C_END 1587 1588 #include "petscpc.h" 1589 EXTERN_C_BEGIN 1590 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1591 EXTERN_C_END 1592 1593 #undef __FUNC__ 1594 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIAIJ" 1595 /*@C 1596 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1597 (the default parallel PETSc format). For good matrix assembly performance 1598 the user should preallocate the matrix storage by setting the parameters 1599 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1600 performance can be increased by more than a factor of 50. 1601 1602 Collective on MPI_Comm 1603 1604 Input Parameters: 1605 + comm - MPI communicator 1606 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1607 This value should be the same as the local size used in creating the 1608 y vector for the matrix-vector product y = Ax. 1609 . n - This value should be the same as the local size used in creating the 1610 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1611 calculated if N is given) For square matrices n is almost always m. 1612 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1613 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1614 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1615 (same value is used for all local rows) 1616 . d_nnz - array containing the number of nonzeros in the various rows of the 1617 DIAGONAL portion of the local submatrix (possibly different for each row) 1618 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1619 The size of this array is equal to the number of local rows, i.e 'm'. 1620 You must leave room for the diagonal entry even if it is zero. 1621 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1622 submatrix (same value is used for all local rows). 1623 - o_nnz - array containing the number of nonzeros in the various rows of the 1624 OFF-DIAGONAL portion of the local submatrix (possibly different for 1625 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1626 structure. The size of this array is equal to the number 1627 of local rows, i.e 'm'. 1628 1629 Output Parameter: 1630 . A - the matrix 1631 1632 Notes: 1633 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1634 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1635 storage requirements for this matrix. 1636 1637 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1638 processor than it must be used on all processors that share the object for 1639 that argument. 1640 1641 The AIJ format (also called the Yale sparse matrix format or 1642 compressed row storage), is fully compatible with standard Fortran 77 1643 storage. That is, the stored row and column indices can begin at 1644 either one (as in Fortran) or zero. See the users manual for details. 1645 1646 The user MUST specify either the local or global matrix dimensions 1647 (possibly both). 1648 1649 The parallel matrix is partitioned such that the first m0 rows belong to 1650 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1651 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1652 1653 The DIAGONAL portion of the local submatrix of a processor can be defined 1654 as the submatrix which is obtained by extraction the part corresponding 1655 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1656 first row that belongs to the processor, and r2 is the last row belonging 1657 to the this processor. This is a square mxm matrix. The remaining portion 1658 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1659 1660 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1661 1662 By default, this format uses inodes (identical nodes) when possible. 1663 We search for consecutive rows with the same nonzero structure, thereby 1664 reusing matrix information to achieve increased efficiency. 1665 1666 Options Database Keys: 1667 + -mat_aij_no_inode - Do not use inodes 1668 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1669 - -mat_aij_oneindex - Internally use indexing starting at 1 1670 rather than 0. Note that when calling MatSetValues(), 1671 the user still MUST index entries starting at 0! 1672 . -mat_mpi - use the parallel matrix data structures even on one processor 1673 (defaults to using SeqBAIJ format on one processor) 1674 . -mat_mpi - use the parallel matrix data structures even on one processor 1675 (defaults to using SeqAIJ format on one processor) 1676 1677 1678 Example usage: 1679 1680 Consider the following 8x8 matrix with 34 non-zero values, that is 1681 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1682 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1683 as follows: 1684 1685 .vb 1686 1 2 0 | 0 3 0 | 0 4 1687 Proc0 0 5 6 | 7 0 0 | 8 0 1688 9 0 10 | 11 0 0 | 12 0 1689 ------------------------------------- 1690 13 0 14 | 15 16 17 | 0 0 1691 Proc1 0 18 0 | 19 20 21 | 0 0 1692 0 0 0 | 22 23 0 | 24 0 1693 ------------------------------------- 1694 Proc2 25 26 27 | 0 0 28 | 29 0 1695 30 0 0 | 31 32 33 | 0 34 1696 .ve 1697 1698 This can be represented as a collection of submatrices as: 1699 1700 .vb 1701 A B C 1702 D E F 1703 G H I 1704 .ve 1705 1706 Where the submatrices A,B,C are owned by proc0, D,E,F are 1707 owned by proc1, G,H,I are owned by proc2. 1708 1709 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1710 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1711 The 'M','N' parameters are 8,8, and have the same values on all procs. 1712 1713 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1714 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1715 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1716 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1717 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1718 matrix, ans [DF] as another SeqAIJ matrix. 1719 1720 When d_nz, o_nz parameters are specified, d_nz storage elements are 1721 allocated for every row of the local diagonal submatrix, and o_nz 1722 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1723 One way to choose d_nz and o_nz is to use the max nonzerors per local 1724 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1725 In this case, the values of d_nz,o_nz are: 1726 .vb 1727 proc0 : dnz = 2, o_nz = 2 1728 proc1 : dnz = 3, o_nz = 2 1729 proc2 : dnz = 1, o_nz = 4 1730 .ve 1731 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1732 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1733 for proc3. i.e we are using 12+15+10=37 storage locations to store 1734 34 values. 1735 1736 When d_nnz, o_nnz parameters are specified, the storage is specified 1737 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1738 In the above case the values for d_nnz,o_nnz are: 1739 .vb 1740 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1741 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1742 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1743 .ve 1744 Here the space allocated is sum of all the above values i.e 34, and 1745 hence pre-allocation is perfect. 1746 1747 Level: intermediate 1748 1749 .keywords: matrix, aij, compressed row, sparse, parallel 1750 1751 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1752 @*/ 1753 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1754 { 1755 Mat B; 1756 Mat_MPIAIJ *b; 1757 int ierr,i,size; 1758 PetscTruth flag1,flag2; 1759 1760 PetscFunctionBegin; 1761 1762 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 1763 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 1764 if (d_nnz) { 1765 for (i=0; i<m; i++) { 1766 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1767 } 1768 } 1769 if (o_nnz) { 1770 for (i=0; i<m; i++) { 1771 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1772 } 1773 } 1774 1775 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1776 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr); 1777 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1778 if (!flag1 && !flag2 && size == 1) { 1779 if (M == PETSC_DECIDE) M = m; 1780 if (N == PETSC_DECIDE) N = n; 1781 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1782 PetscFunctionReturn(0); 1783 } 1784 1785 *A = 0; 1786 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1787 PLogObjectCreate(B); 1788 B->data = (void*)(b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b); 1789 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1790 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1791 B->factor = 0; 1792 B->assembled = PETSC_FALSE; 1793 B->mapping = 0; 1794 1795 B->insertmode = NOT_SET_VALUES; 1796 b->size = size; 1797 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1798 1799 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 1800 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1801 } 1802 1803 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1804 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1805 b->m = m; B->m = m; 1806 b->n = n; B->n = n; 1807 b->N = N; B->N = N; 1808 b->M = M; B->M = M; 1809 1810 /* the information in the maps duplicates the information computed below, eventually 1811 we should remove the duplicate information that is not contained in the maps */ 1812 ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr); 1813 ierr = MapCreateMPI(B->comm,n,N,&B->cmap);CHKERRQ(ierr); 1814 1815 /* build local table of row and column ownerships */ 1816 b->rowners = (int*)PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1817 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1818 b->cowners = b->rowners + b->size + 2; 1819 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1820 b->rowners[0] = 0; 1821 for (i=2; i<=b->size; i++) { 1822 b->rowners[i] += b->rowners[i-1]; 1823 } 1824 b->rstart = b->rowners[b->rank]; 1825 b->rend = b->rowners[b->rank+1]; 1826 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1827 b->cowners[0] = 0; 1828 for (i=2; i<=b->size; i++) { 1829 b->cowners[i] += b->cowners[i-1]; 1830 } 1831 b->cstart = b->cowners[b->rank]; 1832 b->cend = b->cowners[b->rank+1]; 1833 1834 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1835 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1836 PLogObjectParent(B,b->A); 1837 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1838 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1839 PLogObjectParent(B,b->B); 1840 1841 /* build cache for off array entries formed */ 1842 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1843 b->donotstash = PETSC_FALSE; 1844 b->colmap = 0; 1845 b->garray = 0; 1846 b->roworiented = PETSC_TRUE; 1847 1848 /* stuff used for matrix vector multiply */ 1849 b->lvec = PETSC_NULL; 1850 b->Mvctx = PETSC_NULL; 1851 1852 /* stuff for MatGetRow() */ 1853 b->rowindices = 0; 1854 b->rowvalues = 0; 1855 b->getrowactive = PETSC_FALSE; 1856 1857 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1858 "MatStoreValues_MPIAIJ", 1859 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1860 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1861 "MatRetrieveValues_MPIAIJ", 1862 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1863 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1864 "MatGetDiagonalBlock_MPIAIJ", 1865 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1866 *A = B; 1867 PetscFunctionReturn(0); 1868 } 1869 1870 #undef __FUNC__ 1871 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIAIJ" 1872 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1873 { 1874 Mat mat; 1875 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1876 int ierr,len = 0; 1877 PetscTruth flg; 1878 1879 PetscFunctionBegin; 1880 *newmat = 0; 1881 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1882 PLogObjectCreate(mat); 1883 mat->data = (void*)(a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a); 1884 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1885 mat->factor = matin->factor; 1886 mat->assembled = PETSC_TRUE; 1887 mat->insertmode = NOT_SET_VALUES; 1888 1889 a->m = mat->m = oldmat->m; 1890 a->n = mat->n = oldmat->n; 1891 a->M = mat->M = oldmat->M; 1892 a->N = mat->N = oldmat->N; 1893 1894 a->rstart = oldmat->rstart; 1895 a->rend = oldmat->rend; 1896 a->cstart = oldmat->cstart; 1897 a->cend = oldmat->cend; 1898 a->size = oldmat->size; 1899 a->rank = oldmat->rank; 1900 a->donotstash = oldmat->donotstash; 1901 a->roworiented = oldmat->roworiented; 1902 a->rowindices = 0; 1903 a->rowvalues = 0; 1904 a->getrowactive = PETSC_FALSE; 1905 1906 a->rowners = (int*)PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1907 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1908 a->cowners = a->rowners + a->size + 2; 1909 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1910 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1911 if (oldmat->colmap) { 1912 #if defined (PETSC_USE_CTABLE) 1913 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1914 #else 1915 a->colmap = (int*)PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1916 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1917 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr); 1918 #endif 1919 } else a->colmap = 0; 1920 if (oldmat->garray) { 1921 len = ((Mat_SeqAIJ*)(oldmat->B->data))->n; 1922 a->garray = (int*)PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray); 1923 PLogObjectMemory(mat,len*sizeof(int)); 1924 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1925 } else a->garray = 0; 1926 1927 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1928 PLogObjectParent(mat,a->lvec); 1929 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1930 PLogObjectParent(mat,a->Mvctx); 1931 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1932 PLogObjectParent(mat,a->A); 1933 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1934 PLogObjectParent(mat,a->B); 1935 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1936 if (flg) { 1937 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1938 } 1939 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1940 *newmat = mat; 1941 PetscFunctionReturn(0); 1942 } 1943 1944 #include "petscsys.h" 1945 1946 #undef __FUNC__ 1947 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIAIJ" 1948 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1949 { 1950 Mat A; 1951 Scalar *vals,*svals; 1952 MPI_Comm comm = ((PetscObject)viewer)->comm; 1953 MPI_Status status; 1954 int i,nz,ierr,j,rstart,rend,fd; 1955 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1956 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1957 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1958 1959 PetscFunctionBegin; 1960 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1961 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1962 if (!rank) { 1963 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1964 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1965 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1966 if (header[3] < 0) { 1967 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1968 } 1969 } 1970 1971 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1972 M = header[1]; N = header[2]; 1973 /* determine ownership of all rows */ 1974 m = M/size + ((M % size) > rank); 1975 rowners = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1976 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1977 rowners[0] = 0; 1978 for (i=2; i<=size; i++) { 1979 rowners[i] += rowners[i-1]; 1980 } 1981 rstart = rowners[rank]; 1982 rend = rowners[rank+1]; 1983 1984 /* distribute row lengths to all processors */ 1985 ourlens = (int*)PetscMalloc(2*(rend-rstart)*sizeof(int));CHKPTRQ(ourlens); 1986 offlens = ourlens + (rend-rstart); 1987 if (!rank) { 1988 rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths); 1989 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1990 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 1991 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1992 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1993 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1994 } else { 1995 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1996 } 1997 1998 if (!rank) { 1999 /* calculate the number of nonzeros on each processor */ 2000 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2001 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2002 for (i=0; i<size; i++) { 2003 for (j=rowners[i]; j< rowners[i+1]; j++) { 2004 procsnz[i] += rowlengths[j]; 2005 } 2006 } 2007 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2008 2009 /* determine max buffer needed and allocate it */ 2010 maxnz = 0; 2011 for (i=0; i<size; i++) { 2012 maxnz = PetscMax(maxnz,procsnz[i]); 2013 } 2014 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2015 2016 /* read in my part of the matrix column indices */ 2017 nz = procsnz[0]; 2018 mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols); 2019 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2020 2021 /* read in every one elses and ship off */ 2022 for (i=1; i<size; i++) { 2023 nz = procsnz[i]; 2024 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2025 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2026 } 2027 ierr = PetscFree(cols);CHKERRQ(ierr); 2028 } else { 2029 /* determine buffer space needed for message */ 2030 nz = 0; 2031 for (i=0; i<m; i++) { 2032 nz += ourlens[i]; 2033 } 2034 mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols); 2035 2036 /* receive message of column indices*/ 2037 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2038 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2039 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2040 } 2041 2042 /* determine column ownership if matrix is not square */ 2043 if (N != M) { 2044 n = N/size + ((N % size) > rank); 2045 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2046 cstart = cend - n; 2047 } else { 2048 cstart = rstart; 2049 cend = rend; 2050 n = cend - cstart; 2051 } 2052 2053 /* loop over local rows, determining number of off diagonal entries */ 2054 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2055 jj = 0; 2056 for (i=0; i<m; i++) { 2057 for (j=0; j<ourlens[i]; j++) { 2058 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2059 jj++; 2060 } 2061 } 2062 2063 /* create our matrix */ 2064 for (i=0; i<m; i++) { 2065 ourlens[i] -= offlens[i]; 2066 } 2067 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2068 A = *newmat; 2069 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2070 for (i=0; i<m; i++) { 2071 ourlens[i] += offlens[i]; 2072 } 2073 2074 if (!rank) { 2075 vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals); 2076 2077 /* read in my part of the matrix numerical values */ 2078 nz = procsnz[0]; 2079 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2080 2081 /* insert into matrix */ 2082 jj = rstart; 2083 smycols = mycols; 2084 svals = vals; 2085 for (i=0; i<m; i++) { 2086 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2087 smycols += ourlens[i]; 2088 svals += ourlens[i]; 2089 jj++; 2090 } 2091 2092 /* read in other processors and ship out */ 2093 for (i=1; i<size; i++) { 2094 nz = procsnz[i]; 2095 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2096 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2097 } 2098 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2099 } else { 2100 /* receive numeric values */ 2101 vals = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(vals); 2102 2103 /* receive message of values*/ 2104 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2105 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2106 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2107 2108 /* insert into matrix */ 2109 jj = rstart; 2110 smycols = mycols; 2111 svals = vals; 2112 for (i=0; i<m; i++) { 2113 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2114 smycols += ourlens[i]; 2115 svals += ourlens[i]; 2116 jj++; 2117 } 2118 } 2119 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2120 ierr = PetscFree(vals);CHKERRQ(ierr); 2121 ierr = PetscFree(mycols);CHKERRQ(ierr); 2122 ierr = PetscFree(rowners);CHKERRQ(ierr); 2123 2124 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2125 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2126 PetscFunctionReturn(0); 2127 } 2128 2129 #undef __FUNC__ 2130 #define __FUNC__ /*<a name=""></a>*/"MatGetSubMatrix_MPIAIJ" 2131 /* 2132 Not great since it makes two copies of the submatrix, first an SeqAIJ 2133 in local and then by concatenating the local matrices the end result. 2134 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2135 */ 2136 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2137 { 2138 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2139 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 2140 Mat *local,M,Mreuse; 2141 Scalar *vwork,*aa; 2142 MPI_Comm comm = mat->comm; 2143 Mat_SeqAIJ *aij; 2144 2145 2146 PetscFunctionBegin; 2147 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2148 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2149 2150 if (call == MAT_REUSE_MATRIX) { 2151 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2152 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2153 local = &Mreuse; 2154 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2155 } else { 2156 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2157 Mreuse = *local; 2158 ierr = PetscFree(local);CHKERRQ(ierr); 2159 } 2160 2161 /* 2162 m - number of local rows 2163 n - number of columns (same on all processors) 2164 rstart - first row in new global matrix generated 2165 */ 2166 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2167 if (call == MAT_INITIAL_MATRIX) { 2168 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2169 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2170 ii = aij->i; 2171 jj = aij->j; 2172 2173 /* 2174 Determine the number of non-zeros in the diagonal and off-diagonal 2175 portions of the matrix in order to do correct preallocation 2176 */ 2177 2178 /* first get start and end of "diagonal" columns */ 2179 if (csize == PETSC_DECIDE) { 2180 nlocal = n/size + ((n % size) > rank); 2181 } else { 2182 nlocal = csize; 2183 } 2184 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2185 rstart = rend - nlocal; 2186 if (rank == size - 1 && rend != n) { 2187 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2188 } 2189 2190 /* next, compute all the lengths */ 2191 dlens = (int*)PetscMalloc((2*m+1)*sizeof(int));CHKPTRQ(dlens); 2192 olens = dlens + m; 2193 for (i=0; i<m; i++) { 2194 jend = ii[i+1] - ii[i]; 2195 olen = 0; 2196 dlen = 0; 2197 for (j=0; j<jend; j++) { 2198 if (*jj < rstart || *jj >= rend) olen++; 2199 else dlen++; 2200 jj++; 2201 } 2202 olens[i] = olen; 2203 dlens[i] = dlen; 2204 } 2205 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2206 ierr = PetscFree(dlens);CHKERRQ(ierr); 2207 } else { 2208 int ml,nl; 2209 2210 M = *newmat; 2211 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2212 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2213 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2214 /* 2215 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2216 rather than the slower MatSetValues(). 2217 */ 2218 M->was_assembled = PETSC_TRUE; 2219 M->assembled = PETSC_FALSE; 2220 } 2221 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2222 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2223 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2224 ii = aij->i; 2225 jj = aij->j; 2226 aa = aij->a; 2227 for (i=0; i<m; i++) { 2228 row = rstart + i; 2229 nz = ii[i+1] - ii[i]; 2230 cwork = jj; jj += nz; 2231 vwork = aa; aa += nz; 2232 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2233 } 2234 2235 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2236 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2237 *newmat = M; 2238 2239 /* save submatrix used in processor for next request */ 2240 if (call == MAT_INITIAL_MATRIX) { 2241 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2242 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2243 } 2244 2245 PetscFunctionReturn(0); 2246 } 2247