1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat); 14 EXTERN int MatUseSpooles_MPIAIJ(Mat); 15 16 /* 17 Local utility routine that creates a mapping from the global column 18 number to the local number in the off-diagonal part of the local 19 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 20 a slightly higher hash table cost; without it it is not scalable (each processor 21 has an order N integer array but is fast to acess. 22 */ 23 #undef __FUNCT__ 24 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 25 int CreateColmap_MPIAIJ_Private(Mat mat) 26 { 27 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 28 int n = aij->B->n,i,ierr; 29 30 PetscFunctionBegin; 31 #if defined (PETSC_USE_CTABLE) 32 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 33 for (i=0; i<n; i++){ 34 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 35 } 36 #else 37 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 38 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 39 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 40 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 41 #endif 42 PetscFunctionReturn(0); 43 } 44 45 #define CHUNKSIZE 15 46 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 47 { \ 48 \ 49 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 50 rmax = aimax[row]; nrow = ailen[row]; \ 51 col1 = col - shift; \ 52 \ 53 low = 0; high = nrow; \ 54 while (high-low > 5) { \ 55 t = (low+high)/2; \ 56 if (rp[t] > col) high = t; \ 57 else low = t; \ 58 } \ 59 for (_i=low; _i<high; _i++) { \ 60 if (rp[_i] > col1) break; \ 61 if (rp[_i] == col1) { \ 62 if (addv == ADD_VALUES) ap[_i] += value; \ 63 else ap[_i] = value; \ 64 goto a_noinsert; \ 65 } \ 66 } \ 67 if (nonew == 1) goto a_noinsert; \ 68 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 69 if (nrow >= rmax) { \ 70 /* there is no extra room in row, therefore enlarge */ \ 71 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 72 PetscScalar *new_a; \ 73 \ 74 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 75 \ 76 /* malloc new storage space */ \ 77 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 78 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 79 new_j = (int*)(new_a + new_nz); \ 80 new_i = new_j + new_nz; \ 81 \ 82 /* copy over old data into new slots */ \ 83 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 84 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 85 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 86 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 87 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 88 len*sizeof(int));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 91 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 92 /* free up old matrix storage */ \ 93 \ 94 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 95 if (!a->singlemalloc) { \ 96 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 97 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 98 } \ 99 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 100 a->singlemalloc = PETSC_TRUE; \ 101 \ 102 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 103 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 104 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 105 a->maxnz += CHUNKSIZE; \ 106 a->reallocs++; \ 107 } \ 108 N = nrow++ - 1; a->nz++; \ 109 /* shift up all the later entries in this row */ \ 110 for (ii=N; ii>=_i; ii--) { \ 111 rp[ii+1] = rp[ii]; \ 112 ap[ii+1] = ap[ii]; \ 113 } \ 114 rp[_i] = col1; \ 115 ap[_i] = value; \ 116 a_noinsert: ; \ 117 ailen[row] = nrow; \ 118 } 119 120 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 121 { \ 122 \ 123 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 124 rmax = bimax[row]; nrow = bilen[row]; \ 125 col1 = col - shift; \ 126 \ 127 low = 0; high = nrow; \ 128 while (high-low > 5) { \ 129 t = (low+high)/2; \ 130 if (rp[t] > col) high = t; \ 131 else low = t; \ 132 } \ 133 for (_i=low; _i<high; _i++) { \ 134 if (rp[_i] > col1) break; \ 135 if (rp[_i] == col1) { \ 136 if (addv == ADD_VALUES) ap[_i] += value; \ 137 else ap[_i] = value; \ 138 goto b_noinsert; \ 139 } \ 140 } \ 141 if (nonew == 1) goto b_noinsert; \ 142 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 143 if (nrow >= rmax) { \ 144 /* there is no extra room in row, therefore enlarge */ \ 145 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 146 PetscScalar *new_a; \ 147 \ 148 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 149 \ 150 /* malloc new storage space */ \ 151 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 152 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 153 new_j = (int*)(new_a + new_nz); \ 154 new_i = new_j + new_nz; \ 155 \ 156 /* copy over old data into new slots */ \ 157 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 158 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 159 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 160 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 161 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 162 len*sizeof(int));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 165 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 166 /* free up old matrix storage */ \ 167 \ 168 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 169 if (!b->singlemalloc) { \ 170 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 171 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 172 } \ 173 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 174 b->singlemalloc = PETSC_TRUE; \ 175 \ 176 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 177 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 178 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 179 b->maxnz += CHUNKSIZE; \ 180 b->reallocs++; \ 181 } \ 182 N = nrow++ - 1; b->nz++; \ 183 /* shift up all the later entries in this row */ \ 184 for (ii=N; ii>=_i; ii--) { \ 185 rp[ii+1] = rp[ii]; \ 186 ap[ii+1] = ap[ii]; \ 187 } \ 188 rp[_i] = col1; \ 189 ap[_i] = value; \ 190 b_noinsert: ; \ 191 bilen[row] = nrow; \ 192 } 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatSetValues_MPIAIJ" 196 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 197 { 198 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 199 PetscScalar value; 200 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 201 int cstart = aij->cstart,cend = aij->cend,row,col; 202 PetscTruth roworiented = aij->roworiented; 203 204 /* Some Variables required in the macro */ 205 Mat A = aij->A; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 207 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 208 PetscScalar *aa = a->a; 209 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 210 Mat B = aij->B; 211 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 212 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 213 PetscScalar *ba = b->a; 214 215 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 216 int nonew = a->nonew,shift = a->indexshift; 217 PetscScalar *ap; 218 219 PetscFunctionBegin; 220 for (i=0; i<m; i++) { 221 if (im[i] < 0) continue; 222 #if defined(PETSC_USE_BOPT_g) 223 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 224 #endif 225 if (im[i] >= rstart && im[i] < rend) { 226 row = im[i] - rstart; 227 for (j=0; j<n; j++) { 228 if (in[j] >= cstart && in[j] < cend){ 229 col = in[j] - cstart; 230 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 231 if (ignorezeroentries && value == 0.0) continue; 232 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 233 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 234 } else if (in[j] < 0) continue; 235 #if defined(PETSC_USE_BOPT_g) 236 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 237 #endif 238 else { 239 if (mat->was_assembled) { 240 if (!aij->colmap) { 241 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 242 } 243 #if defined (PETSC_USE_CTABLE) 244 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 245 col--; 246 #else 247 col = aij->colmap[in[j]] - 1; 248 #endif 249 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 250 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 251 col = in[j]; 252 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 253 B = aij->B; 254 b = (Mat_SeqAIJ*)B->data; 255 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 256 ba = b->a; 257 } 258 } else col = in[j]; 259 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 260 if (ignorezeroentries && value == 0.0) continue; 261 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } 264 } 265 } else { 266 if (!aij->donotstash) { 267 if (roworiented) { 268 if (ignorezeroentries && v[i*n] == 0.0) continue; 269 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 270 } else { 271 if (ignorezeroentries && v[i] == 0.0) continue; 272 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 273 } 274 } 275 } 276 } 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatGetValues_MPIAIJ" 282 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 283 { 284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 285 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 286 int cstart = aij->cstart,cend = aij->cend,row,col; 287 288 PetscFunctionBegin; 289 for (i=0; i<m; i++) { 290 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 291 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 292 if (idxm[i] >= rstart && idxm[i] < rend) { 293 row = idxm[i] - rstart; 294 for (j=0; j<n; j++) { 295 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 296 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 297 if (idxn[j] >= cstart && idxn[j] < cend){ 298 col = idxn[j] - cstart; 299 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 300 } else { 301 if (!aij->colmap) { 302 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 303 } 304 #if defined (PETSC_USE_CTABLE) 305 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 306 col --; 307 #else 308 col = aij->colmap[idxn[j]] - 1; 309 #endif 310 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 311 else { 312 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 313 } 314 } 315 } 316 } else { 317 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 318 } 319 } 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNCT__ 324 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 325 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 326 { 327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 328 int ierr,nstash,reallocs; 329 InsertMode addv; 330 331 PetscFunctionBegin; 332 if (aij->donotstash) { 333 PetscFunctionReturn(0); 334 } 335 336 /* make sure all processors are either in INSERTMODE or ADDMODE */ 337 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 338 if (addv == (ADD_VALUES|INSERT_VALUES)) { 339 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 340 } 341 mat->insertmode = addv; /* in case this processor had no cache */ 342 343 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 344 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 345 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 346 PetscFunctionReturn(0); 347 } 348 349 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 352 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 353 { 354 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 355 int i,j,rstart,ncols,n,ierr,flg; 356 int *row,*col,other_disassembled; 357 PetscScalar *val; 358 InsertMode addv = mat->insertmode; 359 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 360 PetscTruth flag; 361 #endif 362 363 PetscFunctionBegin; 364 if (!aij->donotstash) { 365 while (1) { 366 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 367 if (!flg) break; 368 369 for (i=0; i<n;) { 370 /* Now identify the consecutive vals belonging to the same row */ 371 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 372 if (j < n) ncols = j-i; 373 else ncols = n-i; 374 /* Now assemble all these values with a single function call */ 375 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 376 i = j; 377 } 378 } 379 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 380 } 381 382 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 383 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 384 385 /* determine if any processor has disassembled, if so we must 386 also disassemble ourselfs, in order that we may reassemble. */ 387 /* 388 if nonzero structure of submatrix B cannot change then we know that 389 no processor disassembled thus we can skip this stuff 390 */ 391 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 392 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 393 if (mat->was_assembled && !other_disassembled) { 394 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 } 397 398 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 399 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 400 } 401 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 402 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 403 404 if (aij->rowvalues) { 405 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 406 aij->rowvalues = 0; 407 } 408 #if defined(PETSC_HAVE_SUPERLUDIST) 409 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 410 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 411 #endif 412 413 #if defined(PETSC_HAVE_SPOOLES) 414 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 415 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 416 #endif 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 422 int MatZeroEntries_MPIAIJ(Mat A) 423 { 424 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 425 int ierr; 426 427 PetscFunctionBegin; 428 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 429 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatZeroRows_MPIAIJ" 435 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 436 { 437 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 438 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 439 int *procs,*nprocs,j,idx,nsends,*work,row; 440 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 441 int *rvalues,tag = A->tag,count,base,slen,n,*source; 442 int *lens,imdex,*lrows,*values,rstart=l->rstart; 443 MPI_Comm comm = A->comm; 444 MPI_Request *send_waits,*recv_waits; 445 MPI_Status recv_status,*send_status; 446 IS istmp; 447 PetscTruth found; 448 449 PetscFunctionBegin; 450 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 451 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 452 453 /* first count number of contributors to each processor */ 454 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 455 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 456 procs = nprocs + size; 457 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 458 for (i=0; i<N; i++) { 459 idx = rows[i]; 460 found = PETSC_FALSE; 461 for (j=0; j<size; j++) { 462 if (idx >= owners[j] && idx < owners[j+1]) { 463 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 464 } 465 } 466 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 467 } 468 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 469 470 /* inform other processors of number of messages and max length*/ 471 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 472 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 473 nrecvs = work[size+rank]; 474 nmax = work[rank]; 475 ierr = PetscFree(work);CHKERRQ(ierr); 476 477 /* post receives: */ 478 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 479 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 480 for (i=0; i<nrecvs; i++) { 481 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 482 } 483 484 /* do sends: 485 1) starts[i] gives the starting index in svalues for stuff going to 486 the ith processor 487 */ 488 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 489 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 490 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 491 starts[0] = 0; 492 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 493 for (i=0; i<N; i++) { 494 svalues[starts[owner[i]]++] = rows[i]; 495 } 496 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 497 498 starts[0] = 0; 499 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 500 count = 0; 501 for (i=0; i<size; i++) { 502 if (procs[i]) { 503 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 504 } 505 } 506 ierr = PetscFree(starts);CHKERRQ(ierr); 507 508 base = owners[rank]; 509 510 /* wait on receives */ 511 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 512 source = lens + nrecvs; 513 count = nrecvs; slen = 0; 514 while (count) { 515 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 516 /* unpack receives into our local space */ 517 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 518 source[imdex] = recv_status.MPI_SOURCE; 519 lens[imdex] = n; 520 slen += n; 521 count--; 522 } 523 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 524 525 /* move the data into the send scatter */ 526 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 527 count = 0; 528 for (i=0; i<nrecvs; i++) { 529 values = rvalues + i*nmax; 530 for (j=0; j<lens[i]; j++) { 531 lrows[count++] = values[j] - base; 532 } 533 } 534 ierr = PetscFree(rvalues);CHKERRQ(ierr); 535 ierr = PetscFree(lens);CHKERRQ(ierr); 536 ierr = PetscFree(owner);CHKERRQ(ierr); 537 ierr = PetscFree(nprocs);CHKERRQ(ierr); 538 539 /* actually zap the local rows */ 540 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 541 PetscLogObjectParent(A,istmp); 542 543 /* 544 Zero the required rows. If the "diagonal block" of the matrix 545 is square and the user wishes to set the diagonal we use seperate 546 code so that MatSetValues() is not called for each diagonal allocating 547 new memory, thus calling lots of mallocs and slowing things down. 548 549 Contributed by: Mathew Knepley 550 */ 551 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 552 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 553 if (diag && (l->A->M == l->A->N)) { 554 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 555 } else if (diag) { 556 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 557 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 558 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 559 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 560 } 561 for (i = 0; i < slen; i++) { 562 row = lrows[i] + rstart; 563 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 564 } 565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 567 } else { 568 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 569 } 570 ierr = ISDestroy(istmp);CHKERRQ(ierr); 571 ierr = PetscFree(lrows);CHKERRQ(ierr); 572 573 /* wait on sends */ 574 if (nsends) { 575 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 576 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 577 ierr = PetscFree(send_status);CHKERRQ(ierr); 578 } 579 ierr = PetscFree(send_waits);CHKERRQ(ierr); 580 ierr = PetscFree(svalues);CHKERRQ(ierr); 581 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatMult_MPIAIJ" 587 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 588 { 589 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 590 int ierr,nt; 591 592 PetscFunctionBegin; 593 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 594 if (nt != A->n) { 595 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 596 } 597 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 599 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatMultAdd_MPIAIJ" 606 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 607 { 608 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 609 int ierr; 610 611 PetscFunctionBegin; 612 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 613 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 614 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 615 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 619 #undef __FUNCT__ 620 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 621 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 622 { 623 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 624 int ierr; 625 626 PetscFunctionBegin; 627 /* do nondiagonal part */ 628 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 629 /* send it on its way */ 630 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 631 /* do local part */ 632 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 633 /* receive remote parts: note this assumes the values are not actually */ 634 /* inserted in yy until the next line, which is true for my implementation*/ 635 /* but is not perhaps always true. */ 636 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 642 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 643 { 644 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 645 int ierr; 646 647 PetscFunctionBegin; 648 /* do nondiagonal part */ 649 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 650 /* send it on its way */ 651 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 652 /* do local part */ 653 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 654 /* receive remote parts: note this assumes the values are not actually */ 655 /* inserted in yy until the next line, which is true for my implementation*/ 656 /* but is not perhaps always true. */ 657 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 } 660 661 /* 662 This only works correctly for square matrices where the subblock A->A is the 663 diagonal block 664 */ 665 #undef __FUNCT__ 666 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 667 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 668 { 669 int ierr; 670 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 671 672 PetscFunctionBegin; 673 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 674 if (a->rstart != a->cstart || a->rend != a->cend) { 675 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 676 } 677 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 #undef __FUNCT__ 682 #define __FUNCT__ "MatScale_MPIAIJ" 683 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 684 { 685 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 686 int ierr; 687 688 PetscFunctionBegin; 689 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 690 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatDestroy_MPIAIJ" 696 int MatDestroy_MPIAIJ(Mat mat) 697 { 698 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 699 int ierr; 700 701 PetscFunctionBegin; 702 #if defined(PETSC_USE_LOG) 703 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 704 #endif 705 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 706 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 707 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 708 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 709 #if defined (PETSC_USE_CTABLE) 710 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 711 #else 712 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 713 #endif 714 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 715 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 716 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 717 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 718 ierr = PetscFree(aij);CHKERRQ(ierr); 719 PetscFunctionReturn(0); 720 } 721 722 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 723 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 727 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 730 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 731 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 732 PetscTruth isdraw,isascii,flg; 733 PetscViewer sviewer; 734 PetscViewerFormat format; 735 736 PetscFunctionBegin; 737 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 738 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 739 if (isascii) { 740 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 741 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 742 MatInfo info; 743 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 744 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 745 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 746 if (flg) { 747 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 748 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 749 } else { 750 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 751 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 752 } 753 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 754 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 755 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 756 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 757 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 758 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } else if (format == PETSC_VIEWER_ASCII_INFO) { 761 PetscFunctionReturn(0); 762 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 763 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 764 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 765 #endif 766 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 767 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 768 #endif 769 PetscFunctionReturn(0); 770 } 771 } else if (isdraw) { 772 PetscDraw draw; 773 PetscTruth isnull; 774 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 775 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 776 } 777 778 if (size == 1) { 779 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 780 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 781 } else { 782 /* assemble the entire matrix onto first processor. */ 783 Mat A; 784 Mat_SeqAIJ *Aloc; 785 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 786 PetscScalar *a; 787 788 if (!rank) { 789 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 790 } else { 791 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 792 } 793 PetscLogObjectParent(mat,A); 794 795 /* copy over the A part */ 796 Aloc = (Mat_SeqAIJ*)aij->A->data; 797 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 798 row = aij->rstart; 799 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 800 for (i=0; i<m; i++) { 801 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 802 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 803 } 804 aj = Aloc->j; 805 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 806 807 /* copy over the B part */ 808 Aloc = (Mat_SeqAIJ*)aij->B->data; 809 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 810 row = aij->rstart; 811 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 812 ct = cols; 813 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 814 for (i=0; i<m; i++) { 815 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 816 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 817 } 818 ierr = PetscFree(ct);CHKERRQ(ierr); 819 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 820 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 821 /* 822 Everyone has to call to draw the matrix since the graphics waits are 823 synchronized across all processors that share the PetscDraw object 824 */ 825 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 826 if (!rank) { 827 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 828 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 829 } 830 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 831 ierr = MatDestroy(A);CHKERRQ(ierr); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatView_MPIAIJ" 838 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 839 { 840 int ierr; 841 PetscTruth isascii,isdraw,issocket,isbinary; 842 843 PetscFunctionBegin; 844 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 845 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 846 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 847 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 848 if (isascii || isdraw || isbinary || issocket) { 849 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 850 } else { 851 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 852 } 853 PetscFunctionReturn(0); 854 } 855 856 857 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatRelax_MPIAIJ" 860 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 861 { 862 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 863 int ierr; 864 Vec bb1; 865 PetscScalar mone=-1.0; 866 867 PetscFunctionBegin; 868 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 869 870 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 871 872 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 873 if (flag & SOR_ZERO_INITIAL_GUESS) { 874 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 875 its--; 876 } 877 878 while (its--) { 879 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 880 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 881 882 /* update rhs: bb1 = bb - B*x */ 883 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 884 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 885 886 /* local sweep */ 887 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 888 CHKERRQ(ierr); 889 } 890 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 891 if (flag & SOR_ZERO_INITIAL_GUESS) { 892 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 893 its--; 894 } 895 while (its--) { 896 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 897 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 898 899 /* update rhs: bb1 = bb - B*x */ 900 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 901 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 902 903 /* local sweep */ 904 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 905 CHKERRQ(ierr); 906 } 907 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 908 if (flag & SOR_ZERO_INITIAL_GUESS) { 909 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 910 its--; 911 } 912 while (its--) { 913 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 914 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 915 916 /* update rhs: bb1 = bb - B*x */ 917 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 918 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 919 920 /* local sweep */ 921 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 922 CHKERRQ(ierr); 923 } 924 } else { 925 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 926 } 927 928 ierr = VecDestroy(bb1);CHKERRQ(ierr); 929 PetscFunctionReturn(0); 930 } 931 932 #undef __FUNCT__ 933 #define __FUNCT__ "MatGetInfo_MPIAIJ" 934 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 935 { 936 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 937 Mat A = mat->A,B = mat->B; 938 int ierr; 939 PetscReal isend[5],irecv[5]; 940 941 PetscFunctionBegin; 942 info->block_size = 1.0; 943 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 944 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 945 isend[3] = info->memory; isend[4] = info->mallocs; 946 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 947 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 948 isend[3] += info->memory; isend[4] += info->mallocs; 949 if (flag == MAT_LOCAL) { 950 info->nz_used = isend[0]; 951 info->nz_allocated = isend[1]; 952 info->nz_unneeded = isend[2]; 953 info->memory = isend[3]; 954 info->mallocs = isend[4]; 955 } else if (flag == MAT_GLOBAL_MAX) { 956 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 957 info->nz_used = irecv[0]; 958 info->nz_allocated = irecv[1]; 959 info->nz_unneeded = irecv[2]; 960 info->memory = irecv[3]; 961 info->mallocs = irecv[4]; 962 } else if (flag == MAT_GLOBAL_SUM) { 963 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 964 info->nz_used = irecv[0]; 965 info->nz_allocated = irecv[1]; 966 info->nz_unneeded = irecv[2]; 967 info->memory = irecv[3]; 968 info->mallocs = irecv[4]; 969 } 970 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 971 info->fill_ratio_needed = 0; 972 info->factor_mallocs = 0; 973 info->rows_global = (double)matin->M; 974 info->columns_global = (double)matin->N; 975 info->rows_local = (double)matin->m; 976 info->columns_local = (double)matin->N; 977 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatSetOption_MPIAIJ" 983 int MatSetOption_MPIAIJ(Mat A,MatOption op) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 int ierr; 987 988 PetscFunctionBegin; 989 switch (op) { 990 case MAT_NO_NEW_NONZERO_LOCATIONS: 991 case MAT_YES_NEW_NONZERO_LOCATIONS: 992 case MAT_COLUMNS_UNSORTED: 993 case MAT_COLUMNS_SORTED: 994 case MAT_NEW_NONZERO_ALLOCATION_ERR: 995 case MAT_KEEP_ZEROED_ROWS: 996 case MAT_NEW_NONZERO_LOCATION_ERR: 997 case MAT_USE_INODES: 998 case MAT_DO_NOT_USE_INODES: 999 case MAT_IGNORE_ZERO_ENTRIES: 1000 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1001 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1002 break; 1003 case MAT_ROW_ORIENTED: 1004 a->roworiented = PETSC_TRUE; 1005 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1006 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1007 break; 1008 case MAT_ROWS_SORTED: 1009 case MAT_ROWS_UNSORTED: 1010 case MAT_YES_NEW_DIAGONALS: 1011 case MAT_USE_SINGLE_PRECISION_SOLVES: 1012 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1013 break; 1014 case MAT_COLUMN_ORIENTED: 1015 a->roworiented = PETSC_FALSE; 1016 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1017 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1018 break; 1019 case MAT_IGNORE_OFF_PROC_ENTRIES: 1020 a->donotstash = PETSC_TRUE; 1021 break; 1022 case MAT_NO_NEW_DIAGONALS: 1023 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1024 default: 1025 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1026 } 1027 PetscFunctionReturn(0); 1028 } 1029 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatGetRow_MPIAIJ" 1032 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1033 { 1034 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1035 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1036 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1037 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1038 int *cmap,*idx_p; 1039 1040 PetscFunctionBegin; 1041 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1042 mat->getrowactive = PETSC_TRUE; 1043 1044 if (!mat->rowvalues && (idx || v)) { 1045 /* 1046 allocate enough space to hold information from the longest row. 1047 */ 1048 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1049 int max = 1,tmp; 1050 for (i=0; i<matin->m; i++) { 1051 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1052 if (max < tmp) { max = tmp; } 1053 } 1054 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1055 mat->rowindices = (int*)(mat->rowvalues + max); 1056 } 1057 1058 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1059 lrow = row - rstart; 1060 1061 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1062 if (!v) {pvA = 0; pvB = 0;} 1063 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1064 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1065 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1066 nztot = nzA + nzB; 1067 1068 cmap = mat->garray; 1069 if (v || idx) { 1070 if (nztot) { 1071 /* Sort by increasing column numbers, assuming A and B already sorted */ 1072 int imark = -1; 1073 if (v) { 1074 *v = v_p = mat->rowvalues; 1075 for (i=0; i<nzB; i++) { 1076 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1077 else break; 1078 } 1079 imark = i; 1080 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1081 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1082 } 1083 if (idx) { 1084 *idx = idx_p = mat->rowindices; 1085 if (imark > -1) { 1086 for (i=0; i<imark; i++) { 1087 idx_p[i] = cmap[cworkB[i]]; 1088 } 1089 } else { 1090 for (i=0; i<nzB; i++) { 1091 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1092 else break; 1093 } 1094 imark = i; 1095 } 1096 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1097 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1098 } 1099 } else { 1100 if (idx) *idx = 0; 1101 if (v) *v = 0; 1102 } 1103 } 1104 *nz = nztot; 1105 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1106 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1112 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1113 { 1114 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1115 1116 PetscFunctionBegin; 1117 if (aij->getrowactive == PETSC_FALSE) { 1118 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1119 } 1120 aij->getrowactive = PETSC_FALSE; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "MatNorm_MPIAIJ" 1126 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1127 { 1128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1129 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1130 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1131 PetscReal sum = 0.0; 1132 PetscScalar *v; 1133 1134 PetscFunctionBegin; 1135 if (aij->size == 1) { 1136 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1137 } else { 1138 if (type == NORM_FROBENIUS) { 1139 v = amat->a; 1140 for (i=0; i<amat->nz; i++) { 1141 #if defined(PETSC_USE_COMPLEX) 1142 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1143 #else 1144 sum += (*v)*(*v); v++; 1145 #endif 1146 } 1147 v = bmat->a; 1148 for (i=0; i<bmat->nz; i++) { 1149 #if defined(PETSC_USE_COMPLEX) 1150 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1151 #else 1152 sum += (*v)*(*v); v++; 1153 #endif 1154 } 1155 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1156 *norm = sqrt(*norm); 1157 } else if (type == NORM_1) { /* max column norm */ 1158 PetscReal *tmp,*tmp2; 1159 int *jj,*garray = aij->garray; 1160 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1161 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1162 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1163 *norm = 0.0; 1164 v = amat->a; jj = amat->j; 1165 for (j=0; j<amat->nz; j++) { 1166 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1167 } 1168 v = bmat->a; jj = bmat->j; 1169 for (j=0; j<bmat->nz; j++) { 1170 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1171 } 1172 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1173 for (j=0; j<mat->N; j++) { 1174 if (tmp2[j] > *norm) *norm = tmp2[j]; 1175 } 1176 ierr = PetscFree(tmp);CHKERRQ(ierr); 1177 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1178 } else if (type == NORM_INFINITY) { /* max row norm */ 1179 PetscReal ntemp = 0.0; 1180 for (j=0; j<aij->A->m; j++) { 1181 v = amat->a + amat->i[j] + shift; 1182 sum = 0.0; 1183 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1184 sum += PetscAbsScalar(*v); v++; 1185 } 1186 v = bmat->a + bmat->i[j] + shift; 1187 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1188 sum += PetscAbsScalar(*v); v++; 1189 } 1190 if (sum > ntemp) ntemp = sum; 1191 } 1192 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1193 } else { 1194 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1195 } 1196 } 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "MatTranspose_MPIAIJ" 1202 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1203 { 1204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1205 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1206 int ierr,shift = Aloc->indexshift; 1207 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1208 Mat B; 1209 PetscScalar *array; 1210 1211 PetscFunctionBegin; 1212 if (!matout && M != N) { 1213 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1214 } 1215 1216 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1217 1218 /* copy over the A part */ 1219 Aloc = (Mat_SeqAIJ*)a->A->data; 1220 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1221 row = a->rstart; 1222 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1223 for (i=0; i<m; i++) { 1224 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1225 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1226 } 1227 aj = Aloc->j; 1228 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1229 1230 /* copy over the B part */ 1231 Aloc = (Mat_SeqAIJ*)a->B->data; 1232 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1233 row = a->rstart; 1234 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1235 ct = cols; 1236 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1237 for (i=0; i<m; i++) { 1238 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1239 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1240 } 1241 ierr = PetscFree(ct);CHKERRQ(ierr); 1242 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1243 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1244 if (matout) { 1245 *matout = B; 1246 } else { 1247 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1248 } 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1254 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1255 { 1256 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1257 Mat a = aij->A,b = aij->B; 1258 int ierr,s1,s2,s3; 1259 1260 PetscFunctionBegin; 1261 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1262 if (rr) { 1263 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1264 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1265 /* Overlap communication with computation. */ 1266 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1267 } 1268 if (ll) { 1269 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1270 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1271 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1272 } 1273 /* scale the diagonal block */ 1274 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1275 1276 if (rr) { 1277 /* Do a scatter end and then right scale the off-diagonal block */ 1278 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1279 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1280 } 1281 1282 PetscFunctionReturn(0); 1283 } 1284 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1288 int MatPrintHelp_MPIAIJ(Mat A) 1289 { 1290 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1291 int ierr; 1292 1293 PetscFunctionBegin; 1294 if (!a->rank) { 1295 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1296 } 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1302 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1303 { 1304 PetscFunctionBegin; 1305 *bs = 1; 1306 PetscFunctionReturn(0); 1307 } 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1310 int MatSetUnfactored_MPIAIJ(Mat A) 1311 { 1312 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1313 int ierr; 1314 1315 PetscFunctionBegin; 1316 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatEqual_MPIAIJ" 1322 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1323 { 1324 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1325 Mat a,b,c,d; 1326 PetscTruth flg; 1327 int ierr; 1328 1329 PetscFunctionBegin; 1330 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1331 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1332 a = matA->A; b = matA->B; 1333 c = matB->A; d = matB->B; 1334 1335 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1336 if (flg == PETSC_TRUE) { 1337 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1338 } 1339 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatCopy_MPIAIJ" 1345 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1346 { 1347 int ierr; 1348 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1349 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1350 PetscTruth flg; 1351 1352 PetscFunctionBegin; 1353 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1354 if (str != SAME_NONZERO_PATTERN || !flg) { 1355 /* because of the column compression in the off-processor part of the matrix a->B, 1356 the number of columns in a->B and b->B may be different, hence we cannot call 1357 the MatCopy() directly on the two parts. If need be, we can provide a more 1358 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1359 then copying the submatrices */ 1360 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1361 } else { 1362 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1363 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1364 } 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1370 int MatSetUpPreallocation_MPIAIJ(Mat A) 1371 { 1372 int ierr; 1373 1374 PetscFunctionBegin; 1375 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1380 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1381 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1382 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1383 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1384 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1385 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1386 #endif 1387 1388 #include "petscblaslapack.h" 1389 1390 #undef __FUNCT__ 1391 #define __FUNCT__ "MatAXPY_MPIAIJ" 1392 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1393 { 1394 int ierr,one; 1395 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1396 Mat_SeqAIJ *x,*y; 1397 1398 PetscFunctionBegin; 1399 if (str == SAME_NONZERO_PATTERN) { 1400 x = (Mat_SeqAIJ *)xx->A->data; 1401 y = (Mat_SeqAIJ *)yy->A->data; 1402 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1403 x = (Mat_SeqAIJ *)xx->B->data; 1404 y = (Mat_SeqAIJ *)yy->B->data; 1405 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1406 } else { 1407 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1408 } 1409 PetscFunctionReturn(0); 1410 } 1411 1412 /* -------------------------------------------------------------------*/ 1413 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1414 MatGetRow_MPIAIJ, 1415 MatRestoreRow_MPIAIJ, 1416 MatMult_MPIAIJ, 1417 MatMultAdd_MPIAIJ, 1418 MatMultTranspose_MPIAIJ, 1419 MatMultTransposeAdd_MPIAIJ, 1420 0, 1421 0, 1422 0, 1423 0, 1424 0, 1425 0, 1426 MatRelax_MPIAIJ, 1427 MatTranspose_MPIAIJ, 1428 MatGetInfo_MPIAIJ, 1429 MatEqual_MPIAIJ, 1430 MatGetDiagonal_MPIAIJ, 1431 MatDiagonalScale_MPIAIJ, 1432 MatNorm_MPIAIJ, 1433 MatAssemblyBegin_MPIAIJ, 1434 MatAssemblyEnd_MPIAIJ, 1435 0, 1436 MatSetOption_MPIAIJ, 1437 MatZeroEntries_MPIAIJ, 1438 MatZeroRows_MPIAIJ, 1439 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1440 MatLUFactorSymbolic_MPIAIJ_TFS, 1441 #else 1442 0, 1443 #endif 1444 0, 1445 0, 1446 0, 1447 MatSetUpPreallocation_MPIAIJ, 1448 0, 1449 0, 1450 0, 1451 0, 1452 MatDuplicate_MPIAIJ, 1453 0, 1454 0, 1455 0, 1456 0, 1457 MatAXPY_MPIAIJ, 1458 MatGetSubMatrices_MPIAIJ, 1459 MatIncreaseOverlap_MPIAIJ, 1460 MatGetValues_MPIAIJ, 1461 MatCopy_MPIAIJ, 1462 MatPrintHelp_MPIAIJ, 1463 MatScale_MPIAIJ, 1464 0, 1465 0, 1466 0, 1467 MatGetBlockSize_MPIAIJ, 1468 0, 1469 0, 1470 0, 1471 0, 1472 MatFDColoringCreate_MPIAIJ, 1473 0, 1474 MatSetUnfactored_MPIAIJ, 1475 0, 1476 0, 1477 MatGetSubMatrix_MPIAIJ, 1478 MatDestroy_MPIAIJ, 1479 MatView_MPIAIJ, 1480 MatGetPetscMaps_Petsc, 1481 0, 1482 0, 1483 0, 1484 0, 1485 0, 1486 0, 1487 0, 1488 0, 1489 MatSetColoring_MPIAIJ, 1490 MatSetValuesAdic_MPIAIJ, 1491 MatSetValuesAdifor_MPIAIJ 1492 }; 1493 1494 /* ----------------------------------------------------------------------------------------*/ 1495 1496 EXTERN_C_BEGIN 1497 #undef __FUNCT__ 1498 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1499 int MatStoreValues_MPIAIJ(Mat mat) 1500 { 1501 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1502 int ierr; 1503 1504 PetscFunctionBegin; 1505 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1506 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1507 PetscFunctionReturn(0); 1508 } 1509 EXTERN_C_END 1510 1511 EXTERN_C_BEGIN 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1514 int MatRetrieveValues_MPIAIJ(Mat mat) 1515 { 1516 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1517 int ierr; 1518 1519 PetscFunctionBegin; 1520 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1521 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1522 PetscFunctionReturn(0); 1523 } 1524 EXTERN_C_END 1525 1526 #include "petscpc.h" 1527 EXTERN_C_BEGIN 1528 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1529 EXTERN_C_END 1530 1531 EXTERN_C_BEGIN 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatCreate_MPIAIJ" 1534 int MatCreate_MPIAIJ(Mat B) 1535 { 1536 Mat_MPIAIJ *b; 1537 int ierr,i,size; 1538 1539 PetscFunctionBegin; 1540 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1541 1542 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1543 B->data = (void*)b; 1544 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1545 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1546 B->factor = 0; 1547 B->assembled = PETSC_FALSE; 1548 B->mapping = 0; 1549 1550 B->insertmode = NOT_SET_VALUES; 1551 b->size = size; 1552 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1553 1554 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1555 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1556 1557 /* the information in the maps duplicates the information computed below, eventually 1558 we should remove the duplicate information that is not contained in the maps */ 1559 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1560 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1561 1562 /* build local table of row and column ownerships */ 1563 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1564 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1565 b->cowners = b->rowners + b->size + 2; 1566 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1567 b->rowners[0] = 0; 1568 for (i=2; i<=b->size; i++) { 1569 b->rowners[i] += b->rowners[i-1]; 1570 } 1571 b->rstart = b->rowners[b->rank]; 1572 b->rend = b->rowners[b->rank+1]; 1573 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1574 b->cowners[0] = 0; 1575 for (i=2; i<=b->size; i++) { 1576 b->cowners[i] += b->cowners[i-1]; 1577 } 1578 b->cstart = b->cowners[b->rank]; 1579 b->cend = b->cowners[b->rank+1]; 1580 1581 /* build cache for off array entries formed */ 1582 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1583 b->donotstash = PETSC_FALSE; 1584 b->colmap = 0; 1585 b->garray = 0; 1586 b->roworiented = PETSC_TRUE; 1587 1588 /* stuff used for matrix vector multiply */ 1589 b->lvec = PETSC_NULL; 1590 b->Mvctx = PETSC_NULL; 1591 1592 /* stuff for MatGetRow() */ 1593 b->rowindices = 0; 1594 b->rowvalues = 0; 1595 b->getrowactive = PETSC_FALSE; 1596 1597 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1598 "MatStoreValues_MPIAIJ", 1599 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1600 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1601 "MatRetrieveValues_MPIAIJ", 1602 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1603 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1604 "MatGetDiagonalBlock_MPIAIJ", 1605 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1606 1607 PetscFunctionReturn(0); 1608 } 1609 EXTERN_C_END 1610 1611 #undef __FUNCT__ 1612 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1613 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1614 { 1615 Mat mat; 1616 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1617 int ierr; 1618 1619 PetscFunctionBegin; 1620 *newmat = 0; 1621 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1622 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1623 a = (Mat_MPIAIJ*)mat->data; 1624 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1625 mat->factor = matin->factor; 1626 mat->assembled = PETSC_TRUE; 1627 mat->insertmode = NOT_SET_VALUES; 1628 mat->preallocated = PETSC_TRUE; 1629 1630 a->rstart = oldmat->rstart; 1631 a->rend = oldmat->rend; 1632 a->cstart = oldmat->cstart; 1633 a->cend = oldmat->cend; 1634 a->size = oldmat->size; 1635 a->rank = oldmat->rank; 1636 a->donotstash = oldmat->donotstash; 1637 a->roworiented = oldmat->roworiented; 1638 a->rowindices = 0; 1639 a->rowvalues = 0; 1640 a->getrowactive = PETSC_FALSE; 1641 1642 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1643 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1644 if (oldmat->colmap) { 1645 #if defined (PETSC_USE_CTABLE) 1646 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1647 #else 1648 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1649 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1650 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1651 #endif 1652 } else a->colmap = 0; 1653 if (oldmat->garray) { 1654 int len; 1655 len = oldmat->B->n; 1656 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1657 PetscLogObjectMemory(mat,len*sizeof(int)); 1658 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1659 } else a->garray = 0; 1660 1661 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1662 PetscLogObjectParent(mat,a->lvec); 1663 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1664 PetscLogObjectParent(mat,a->Mvctx); 1665 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1666 PetscLogObjectParent(mat,a->A); 1667 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1668 PetscLogObjectParent(mat,a->B); 1669 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1670 *newmat = mat; 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #include "petscsys.h" 1675 1676 EXTERN_C_BEGIN 1677 #undef __FUNCT__ 1678 #define __FUNCT__ "MatLoad_MPIAIJ" 1679 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1680 { 1681 Mat A; 1682 PetscScalar *vals,*svals; 1683 MPI_Comm comm = ((PetscObject)viewer)->comm; 1684 MPI_Status status; 1685 int i,nz,ierr,j,rstart,rend,fd; 1686 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1687 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1688 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1689 1690 PetscFunctionBegin; 1691 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1692 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1693 if (!rank) { 1694 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1695 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1696 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1697 if (header[3] < 0) { 1698 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1699 } 1700 } 1701 1702 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1703 M = header[1]; N = header[2]; 1704 /* determine ownership of all rows */ 1705 m = M/size + ((M % size) > rank); 1706 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1707 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1708 rowners[0] = 0; 1709 for (i=2; i<=size; i++) { 1710 rowners[i] += rowners[i-1]; 1711 } 1712 rstart = rowners[rank]; 1713 rend = rowners[rank+1]; 1714 1715 /* distribute row lengths to all processors */ 1716 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1717 offlens = ourlens + (rend-rstart); 1718 if (!rank) { 1719 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1720 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1721 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1722 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1723 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1724 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1725 } else { 1726 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1727 } 1728 1729 if (!rank) { 1730 /* calculate the number of nonzeros on each processor */ 1731 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1732 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1733 for (i=0; i<size; i++) { 1734 for (j=rowners[i]; j< rowners[i+1]; j++) { 1735 procsnz[i] += rowlengths[j]; 1736 } 1737 } 1738 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1739 1740 /* determine max buffer needed and allocate it */ 1741 maxnz = 0; 1742 for (i=0; i<size; i++) { 1743 maxnz = PetscMax(maxnz,procsnz[i]); 1744 } 1745 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1746 1747 /* read in my part of the matrix column indices */ 1748 nz = procsnz[0]; 1749 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1750 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1751 1752 /* read in every one elses and ship off */ 1753 for (i=1; i<size; i++) { 1754 nz = procsnz[i]; 1755 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1756 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1757 } 1758 ierr = PetscFree(cols);CHKERRQ(ierr); 1759 } else { 1760 /* determine buffer space needed for message */ 1761 nz = 0; 1762 for (i=0; i<m; i++) { 1763 nz += ourlens[i]; 1764 } 1765 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1766 1767 /* receive message of column indices*/ 1768 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1769 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1770 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1771 } 1772 1773 /* determine column ownership if matrix is not square */ 1774 if (N != M) { 1775 n = N/size + ((N % size) > rank); 1776 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1777 cstart = cend - n; 1778 } else { 1779 cstart = rstart; 1780 cend = rend; 1781 n = cend - cstart; 1782 } 1783 1784 /* loop over local rows, determining number of off diagonal entries */ 1785 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1786 jj = 0; 1787 for (i=0; i<m; i++) { 1788 for (j=0; j<ourlens[i]; j++) { 1789 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1790 jj++; 1791 } 1792 } 1793 1794 /* create our matrix */ 1795 for (i=0; i<m; i++) { 1796 ourlens[i] -= offlens[i]; 1797 } 1798 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1799 A = *newmat; 1800 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1801 for (i=0; i<m; i++) { 1802 ourlens[i] += offlens[i]; 1803 } 1804 1805 if (!rank) { 1806 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1807 1808 /* read in my part of the matrix numerical values */ 1809 nz = procsnz[0]; 1810 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1811 1812 /* insert into matrix */ 1813 jj = rstart; 1814 smycols = mycols; 1815 svals = vals; 1816 for (i=0; i<m; i++) { 1817 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1818 smycols += ourlens[i]; 1819 svals += ourlens[i]; 1820 jj++; 1821 } 1822 1823 /* read in other processors and ship out */ 1824 for (i=1; i<size; i++) { 1825 nz = procsnz[i]; 1826 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1827 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1828 } 1829 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1830 } else { 1831 /* receive numeric values */ 1832 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1833 1834 /* receive message of values*/ 1835 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1836 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1837 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1838 1839 /* insert into matrix */ 1840 jj = rstart; 1841 smycols = mycols; 1842 svals = vals; 1843 for (i=0; i<m; i++) { 1844 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1845 smycols += ourlens[i]; 1846 svals += ourlens[i]; 1847 jj++; 1848 } 1849 } 1850 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1851 ierr = PetscFree(vals);CHKERRQ(ierr); 1852 ierr = PetscFree(mycols);CHKERRQ(ierr); 1853 ierr = PetscFree(rowners);CHKERRQ(ierr); 1854 1855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1857 PetscFunctionReturn(0); 1858 } 1859 EXTERN_C_END 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1863 /* 1864 Not great since it makes two copies of the submatrix, first an SeqAIJ 1865 in local and then by concatenating the local matrices the end result. 1866 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1867 */ 1868 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1869 { 1870 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1871 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1872 Mat *local,M,Mreuse; 1873 PetscScalar *vwork,*aa; 1874 MPI_Comm comm = mat->comm; 1875 Mat_SeqAIJ *aij; 1876 1877 1878 PetscFunctionBegin; 1879 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1880 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1881 1882 if (call == MAT_REUSE_MATRIX) { 1883 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1884 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1885 local = &Mreuse; 1886 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1887 } else { 1888 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1889 Mreuse = *local; 1890 ierr = PetscFree(local);CHKERRQ(ierr); 1891 } 1892 1893 /* 1894 m - number of local rows 1895 n - number of columns (same on all processors) 1896 rstart - first row in new global matrix generated 1897 */ 1898 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1899 if (call == MAT_INITIAL_MATRIX) { 1900 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1901 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1902 ii = aij->i; 1903 jj = aij->j; 1904 1905 /* 1906 Determine the number of non-zeros in the diagonal and off-diagonal 1907 portions of the matrix in order to do correct preallocation 1908 */ 1909 1910 /* first get start and end of "diagonal" columns */ 1911 if (csize == PETSC_DECIDE) { 1912 nlocal = n/size + ((n % size) > rank); 1913 } else { 1914 nlocal = csize; 1915 } 1916 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1917 rstart = rend - nlocal; 1918 if (rank == size - 1 && rend != n) { 1919 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1920 } 1921 1922 /* next, compute all the lengths */ 1923 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1924 olens = dlens + m; 1925 for (i=0; i<m; i++) { 1926 jend = ii[i+1] - ii[i]; 1927 olen = 0; 1928 dlen = 0; 1929 for (j=0; j<jend; j++) { 1930 if (*jj < rstart || *jj >= rend) olen++; 1931 else dlen++; 1932 jj++; 1933 } 1934 olens[i] = olen; 1935 dlens[i] = dlen; 1936 } 1937 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1938 ierr = PetscFree(dlens);CHKERRQ(ierr); 1939 } else { 1940 int ml,nl; 1941 1942 M = *newmat; 1943 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1944 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1945 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1946 /* 1947 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1948 rather than the slower MatSetValues(). 1949 */ 1950 M->was_assembled = PETSC_TRUE; 1951 M->assembled = PETSC_FALSE; 1952 } 1953 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1954 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1955 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1956 ii = aij->i; 1957 jj = aij->j; 1958 aa = aij->a; 1959 for (i=0; i<m; i++) { 1960 row = rstart + i; 1961 nz = ii[i+1] - ii[i]; 1962 cwork = jj; jj += nz; 1963 vwork = aa; aa += nz; 1964 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1965 } 1966 1967 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1968 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1969 *newmat = M; 1970 1971 /* save submatrix used in processor for next request */ 1972 if (call == MAT_INITIAL_MATRIX) { 1973 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1974 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1975 } 1976 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatMPIAIJSetPreallocation" 1982 /*@C 1983 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1984 (the default parallel PETSc format). For good matrix assembly performance 1985 the user should preallocate the matrix storage by setting the parameters 1986 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1987 performance can be increased by more than a factor of 50. 1988 1989 Collective on MPI_Comm 1990 1991 Input Parameters: 1992 + A - the matrix 1993 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1994 (same value is used for all local rows) 1995 . d_nnz - array containing the number of nonzeros in the various rows of the 1996 DIAGONAL portion of the local submatrix (possibly different for each row) 1997 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1998 The size of this array is equal to the number of local rows, i.e 'm'. 1999 You must leave room for the diagonal entry even if it is zero. 2000 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2001 submatrix (same value is used for all local rows). 2002 - o_nnz - array containing the number of nonzeros in the various rows of the 2003 OFF-DIAGONAL portion of the local submatrix (possibly different for 2004 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2005 structure. The size of this array is equal to the number 2006 of local rows, i.e 'm'. 2007 2008 The AIJ format (also called the Yale sparse matrix format or 2009 compressed row storage), is fully compatible with standard Fortran 77 2010 storage. That is, the stored row and column indices can begin at 2011 either one (as in Fortran) or zero. See the users manual for details. 2012 2013 The user MUST specify either the local or global matrix dimensions 2014 (possibly both). 2015 2016 The parallel matrix is partitioned such that the first m0 rows belong to 2017 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2018 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2019 2020 The DIAGONAL portion of the local submatrix of a processor can be defined 2021 as the submatrix which is obtained by extraction the part corresponding 2022 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2023 first row that belongs to the processor, and r2 is the last row belonging 2024 to the this processor. This is a square mxm matrix. The remaining portion 2025 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2026 2027 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2028 2029 By default, this format uses inodes (identical nodes) when possible. 2030 We search for consecutive rows with the same nonzero structure, thereby 2031 reusing matrix information to achieve increased efficiency. 2032 2033 Options Database Keys: 2034 + -mat_aij_no_inode - Do not use inodes 2035 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2036 - -mat_aij_oneindex - Internally use indexing starting at 1 2037 rather than 0. Note that when calling MatSetValues(), 2038 the user still MUST index entries starting at 0! 2039 2040 Example usage: 2041 2042 Consider the following 8x8 matrix with 34 non-zero values, that is 2043 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2044 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2045 as follows: 2046 2047 .vb 2048 1 2 0 | 0 3 0 | 0 4 2049 Proc0 0 5 6 | 7 0 0 | 8 0 2050 9 0 10 | 11 0 0 | 12 0 2051 ------------------------------------- 2052 13 0 14 | 15 16 17 | 0 0 2053 Proc1 0 18 0 | 19 20 21 | 0 0 2054 0 0 0 | 22 23 0 | 24 0 2055 ------------------------------------- 2056 Proc2 25 26 27 | 0 0 28 | 29 0 2057 30 0 0 | 31 32 33 | 0 34 2058 .ve 2059 2060 This can be represented as a collection of submatrices as: 2061 2062 .vb 2063 A B C 2064 D E F 2065 G H I 2066 .ve 2067 2068 Where the submatrices A,B,C are owned by proc0, D,E,F are 2069 owned by proc1, G,H,I are owned by proc2. 2070 2071 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2072 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2073 The 'M','N' parameters are 8,8, and have the same values on all procs. 2074 2075 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2076 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2077 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2078 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2079 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2080 matrix, ans [DF] as another SeqAIJ matrix. 2081 2082 When d_nz, o_nz parameters are specified, d_nz storage elements are 2083 allocated for every row of the local diagonal submatrix, and o_nz 2084 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2085 One way to choose d_nz and o_nz is to use the max nonzerors per local 2086 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2087 In this case, the values of d_nz,o_nz are: 2088 .vb 2089 proc0 : dnz = 2, o_nz = 2 2090 proc1 : dnz = 3, o_nz = 2 2091 proc2 : dnz = 1, o_nz = 4 2092 .ve 2093 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2094 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2095 for proc3. i.e we are using 12+15+10=37 storage locations to store 2096 34 values. 2097 2098 When d_nnz, o_nnz parameters are specified, the storage is specified 2099 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2100 In the above case the values for d_nnz,o_nnz are: 2101 .vb 2102 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2103 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2104 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2105 .ve 2106 Here the space allocated is sum of all the above values i.e 34, and 2107 hence pre-allocation is perfect. 2108 2109 Level: intermediate 2110 2111 .keywords: matrix, aij, compressed row, sparse, parallel 2112 2113 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2114 @*/ 2115 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2116 { 2117 Mat_MPIAIJ *b; 2118 int ierr,i; 2119 PetscTruth flg2; 2120 2121 PetscFunctionBegin; 2122 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2123 if (!flg2) PetscFunctionReturn(0); 2124 B->preallocated = PETSC_TRUE; 2125 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2126 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2127 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2128 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2129 if (d_nnz) { 2130 for (i=0; i<B->m; i++) { 2131 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2132 } 2133 } 2134 if (o_nnz) { 2135 for (i=0; i<B->m; i++) { 2136 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2137 } 2138 } 2139 b = (Mat_MPIAIJ*)B->data; 2140 2141 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2142 PetscLogObjectParent(B,b->A); 2143 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2144 PetscLogObjectParent(B,b->B); 2145 2146 PetscFunctionReturn(0); 2147 } 2148 2149 #undef __FUNCT__ 2150 #define __FUNCT__ "MatCreateMPIAIJ" 2151 /*@C 2152 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2153 (the default parallel PETSc format). For good matrix assembly performance 2154 the user should preallocate the matrix storage by setting the parameters 2155 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2156 performance can be increased by more than a factor of 50. 2157 2158 Collective on MPI_Comm 2159 2160 Input Parameters: 2161 + comm - MPI communicator 2162 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2163 This value should be the same as the local size used in creating the 2164 y vector for the matrix-vector product y = Ax. 2165 . n - This value should be the same as the local size used in creating the 2166 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2167 calculated if N is given) For square matrices n is almost always m. 2168 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2169 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2170 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2171 (same value is used for all local rows) 2172 . d_nnz - array containing the number of nonzeros in the various rows of the 2173 DIAGONAL portion of the local submatrix (possibly different for each row) 2174 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2175 The size of this array is equal to the number of local rows, i.e 'm'. 2176 You must leave room for the diagonal entry even if it is zero. 2177 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2178 submatrix (same value is used for all local rows). 2179 - o_nnz - array containing the number of nonzeros in the various rows of the 2180 OFF-DIAGONAL portion of the local submatrix (possibly different for 2181 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2182 structure. The size of this array is equal to the number 2183 of local rows, i.e 'm'. 2184 2185 Output Parameter: 2186 . A - the matrix 2187 2188 Notes: 2189 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2190 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2191 storage requirements for this matrix. 2192 2193 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2194 processor than it must be used on all processors that share the object for 2195 that argument. 2196 2197 The AIJ format (also called the Yale sparse matrix format or 2198 compressed row storage), is fully compatible with standard Fortran 77 2199 storage. That is, the stored row and column indices can begin at 2200 either one (as in Fortran) or zero. See the users manual for details. 2201 2202 The user MUST specify either the local or global matrix dimensions 2203 (possibly both). 2204 2205 The parallel matrix is partitioned such that the first m0 rows belong to 2206 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2207 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2208 2209 The DIAGONAL portion of the local submatrix of a processor can be defined 2210 as the submatrix which is obtained by extraction the part corresponding 2211 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2212 first row that belongs to the processor, and r2 is the last row belonging 2213 to the this processor. This is a square mxm matrix. The remaining portion 2214 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2215 2216 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2217 2218 By default, this format uses inodes (identical nodes) when possible. 2219 We search for consecutive rows with the same nonzero structure, thereby 2220 reusing matrix information to achieve increased efficiency. 2221 2222 Options Database Keys: 2223 + -mat_aij_no_inode - Do not use inodes 2224 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2225 - -mat_aij_oneindex - Internally use indexing starting at 1 2226 rather than 0. Note that when calling MatSetValues(), 2227 the user still MUST index entries starting at 0! 2228 2229 2230 Example usage: 2231 2232 Consider the following 8x8 matrix with 34 non-zero values, that is 2233 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2234 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2235 as follows: 2236 2237 .vb 2238 1 2 0 | 0 3 0 | 0 4 2239 Proc0 0 5 6 | 7 0 0 | 8 0 2240 9 0 10 | 11 0 0 | 12 0 2241 ------------------------------------- 2242 13 0 14 | 15 16 17 | 0 0 2243 Proc1 0 18 0 | 19 20 21 | 0 0 2244 0 0 0 | 22 23 0 | 24 0 2245 ------------------------------------- 2246 Proc2 25 26 27 | 0 0 28 | 29 0 2247 30 0 0 | 31 32 33 | 0 34 2248 .ve 2249 2250 This can be represented as a collection of submatrices as: 2251 2252 .vb 2253 A B C 2254 D E F 2255 G H I 2256 .ve 2257 2258 Where the submatrices A,B,C are owned by proc0, D,E,F are 2259 owned by proc1, G,H,I are owned by proc2. 2260 2261 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2262 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2263 The 'M','N' parameters are 8,8, and have the same values on all procs. 2264 2265 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2266 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2267 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2268 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2269 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2270 matrix, ans [DF] as another SeqAIJ matrix. 2271 2272 When d_nz, o_nz parameters are specified, d_nz storage elements are 2273 allocated for every row of the local diagonal submatrix, and o_nz 2274 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2275 One way to choose d_nz and o_nz is to use the max nonzerors per local 2276 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2277 In this case, the values of d_nz,o_nz are: 2278 .vb 2279 proc0 : dnz = 2, o_nz = 2 2280 proc1 : dnz = 3, o_nz = 2 2281 proc2 : dnz = 1, o_nz = 4 2282 .ve 2283 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2284 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2285 for proc3. i.e we are using 12+15+10=37 storage locations to store 2286 34 values. 2287 2288 When d_nnz, o_nnz parameters are specified, the storage is specified 2289 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2290 In the above case the values for d_nnz,o_nnz are: 2291 .vb 2292 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2293 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2294 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2295 .ve 2296 Here the space allocated is sum of all the above values i.e 34, and 2297 hence pre-allocation is perfect. 2298 2299 Level: intermediate 2300 2301 .keywords: matrix, aij, compressed row, sparse, parallel 2302 2303 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2304 @*/ 2305 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2306 { 2307 int ierr,size; 2308 2309 PetscFunctionBegin; 2310 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2311 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2312 if (size > 1) { 2313 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2314 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2315 } else { 2316 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2317 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2318 } 2319 PetscFunctionReturn(0); 2320 } 2321 2322 #undef __FUNCT__ 2323 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2324 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2325 { 2326 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2327 PetscFunctionBegin; 2328 *Ad = a->A; 2329 *Ao = a->B; 2330 *colmap = a->garray; 2331 PetscFunctionReturn(0); 2332 } 2333 2334 #undef __FUNCT__ 2335 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2336 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2337 { 2338 int ierr; 2339 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2340 2341 PetscFunctionBegin; 2342 if (coloring->ctype == IS_COLORING_LOCAL) { 2343 int *allcolors,*colors,i; 2344 ISColoring ocoloring; 2345 2346 /* set coloring for diagonal portion */ 2347 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2348 2349 /* set coloring for off-diagonal portion */ 2350 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2351 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2352 for (i=0; i<a->B->n; i++) { 2353 colors[i] = allcolors[a->garray[i]]; 2354 } 2355 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2356 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2357 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2358 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2359 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2360 int *colors,i,*larray; 2361 ISColoring ocoloring; 2362 2363 /* set coloring for diagonal portion */ 2364 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2365 for (i=0; i<a->A->n; i++) { 2366 larray[i] = i + a->cstart; 2367 } 2368 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2369 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2370 for (i=0; i<a->A->n; i++) { 2371 colors[i] = coloring->colors[larray[i]]; 2372 } 2373 ierr = PetscFree(larray);CHKERRQ(ierr); 2374 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2375 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2376 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2377 2378 /* set coloring for off-diagonal portion */ 2379 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2380 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2381 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2382 for (i=0; i<a->B->n; i++) { 2383 colors[i] = coloring->colors[larray[i]]; 2384 } 2385 ierr = PetscFree(larray);CHKERRQ(ierr); 2386 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2387 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2388 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2389 } else { 2390 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2391 } 2392 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2398 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2399 { 2400 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2401 int ierr; 2402 2403 PetscFunctionBegin; 2404 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2405 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2411 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2412 { 2413 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2414 int ierr; 2415 2416 PetscFunctionBegin; 2417 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2418 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2419 PetscFunctionReturn(0); 2420 } 2421