1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 PetscScalar *val; 356 InsertMode addv = mat->insertmode; 357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 358 PetscTruth flag; 359 #endif 360 361 PetscFunctionBegin; 362 if (!aij->donotstash) { 363 while (1) { 364 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 365 if (!flg) break; 366 367 for (i=0; i<n;) { 368 /* Now identify the consecutive vals belonging to the same row */ 369 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 370 if (j < n) ncols = j-i; 371 else ncols = n-i; 372 /* Now assemble all these values with a single function call */ 373 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 374 i = j; 375 } 376 } 377 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 378 } 379 380 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 381 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 382 383 /* determine if any processor has disassembled, if so we must 384 also disassemble ourselfs, in order that we may reassemble. */ 385 /* 386 if nonzero structure of submatrix B cannot change then we know that 387 no processor disassembled thus we can skip this stuff 388 */ 389 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 390 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 391 if (mat->was_assembled && !other_disassembled) { 392 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 393 } 394 } 395 396 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 397 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 398 } 399 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 400 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 401 402 if (aij->rowvalues) { 403 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 404 aij->rowvalues = 0; 405 } 406 #if defined(PETSC_HAVE_SUPERLUDIST) 407 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 408 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 409 #endif 410 411 #if defined(PETSC_HAVE_SPOOLES) 412 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 413 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 414 #endif 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 420 int MatZeroEntries_MPIAIJ(Mat A) 421 { 422 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 423 int ierr; 424 425 PetscFunctionBegin; 426 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 427 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 428 PetscFunctionReturn(0); 429 } 430 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatZeroRows_MPIAIJ" 433 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 434 { 435 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 436 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 437 int *procs,*nprocs,j,idx,nsends,*work,row; 438 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 439 int *rvalues,tag = A->tag,count,base,slen,n,*source; 440 int *lens,imdex,*lrows,*values,rstart=l->rstart; 441 MPI_Comm comm = A->comm; 442 MPI_Request *send_waits,*recv_waits; 443 MPI_Status recv_status,*send_status; 444 IS istmp; 445 PetscTruth found; 446 447 PetscFunctionBegin; 448 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 449 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 450 451 /* first count number of contributors to each processor */ 452 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 453 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 454 procs = nprocs + size; 455 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 456 for (i=0; i<N; i++) { 457 idx = rows[i]; 458 found = PETSC_FALSE; 459 for (j=0; j<size; j++) { 460 if (idx >= owners[j] && idx < owners[j+1]) { 461 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 462 } 463 } 464 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 465 } 466 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 467 468 /* inform other processors of number of messages and max length*/ 469 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 470 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 471 nrecvs = work[size+rank]; 472 nmax = work[rank]; 473 ierr = PetscFree(work);CHKERRQ(ierr); 474 475 /* post receives: */ 476 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 477 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 478 for (i=0; i<nrecvs; i++) { 479 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 480 } 481 482 /* do sends: 483 1) starts[i] gives the starting index in svalues for stuff going to 484 the ith processor 485 */ 486 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 487 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 488 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 489 starts[0] = 0; 490 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 491 for (i=0; i<N; i++) { 492 svalues[starts[owner[i]]++] = rows[i]; 493 } 494 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 495 496 starts[0] = 0; 497 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 498 count = 0; 499 for (i=0; i<size; i++) { 500 if (procs[i]) { 501 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 502 } 503 } 504 ierr = PetscFree(starts);CHKERRQ(ierr); 505 506 base = owners[rank]; 507 508 /* wait on receives */ 509 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 510 source = lens + nrecvs; 511 count = nrecvs; slen = 0; 512 while (count) { 513 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 514 /* unpack receives into our local space */ 515 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 516 source[imdex] = recv_status.MPI_SOURCE; 517 lens[imdex] = n; 518 slen += n; 519 count--; 520 } 521 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 522 523 /* move the data into the send scatter */ 524 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 525 count = 0; 526 for (i=0; i<nrecvs; i++) { 527 values = rvalues + i*nmax; 528 for (j=0; j<lens[i]; j++) { 529 lrows[count++] = values[j] - base; 530 } 531 } 532 ierr = PetscFree(rvalues);CHKERRQ(ierr); 533 ierr = PetscFree(lens);CHKERRQ(ierr); 534 ierr = PetscFree(owner);CHKERRQ(ierr); 535 ierr = PetscFree(nprocs);CHKERRQ(ierr); 536 537 /* actually zap the local rows */ 538 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 539 PetscLogObjectParent(A,istmp); 540 541 /* 542 Zero the required rows. If the "diagonal block" of the matrix 543 is square and the user wishes to set the diagonal we use seperate 544 code so that MatSetValues() is not called for each diagonal allocating 545 new memory, thus calling lots of mallocs and slowing things down. 546 547 Contributed by: Mathew Knepley 548 */ 549 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 550 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 551 if (diag && (l->A->M == l->A->N)) { 552 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 553 } else if (diag) { 554 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 555 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 556 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 557 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 558 } 559 for (i = 0; i < slen; i++) { 560 row = lrows[i] + rstart; 561 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 562 } 563 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 564 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 565 } else { 566 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 567 } 568 ierr = ISDestroy(istmp);CHKERRQ(ierr); 569 ierr = PetscFree(lrows);CHKERRQ(ierr); 570 571 /* wait on sends */ 572 if (nsends) { 573 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 574 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 575 ierr = PetscFree(send_status);CHKERRQ(ierr); 576 } 577 ierr = PetscFree(send_waits);CHKERRQ(ierr); 578 ierr = PetscFree(svalues);CHKERRQ(ierr); 579 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatMult_MPIAIJ" 585 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 586 { 587 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 588 int ierr,nt; 589 590 PetscFunctionBegin; 591 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 592 if (nt != A->n) { 593 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 594 } 595 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 596 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 597 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 599 PetscFunctionReturn(0); 600 } 601 602 #undef __FUNCT__ 603 #define __FUNCT__ "MatMultAdd_MPIAIJ" 604 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 605 { 606 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 607 int ierr; 608 609 PetscFunctionBegin; 610 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 611 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 612 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 613 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 614 PetscFunctionReturn(0); 615 } 616 617 #undef __FUNCT__ 618 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 619 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 620 { 621 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 622 int ierr; 623 624 PetscFunctionBegin; 625 /* do nondiagonal part */ 626 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 627 /* send it on its way */ 628 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 629 /* do local part */ 630 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 631 /* receive remote parts: note this assumes the values are not actually */ 632 /* inserted in yy until the next line, which is true for my implementation*/ 633 /* but is not perhaps always true. */ 634 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 635 PetscFunctionReturn(0); 636 } 637 638 #undef __FUNCT__ 639 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 640 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 641 { 642 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 643 int ierr; 644 645 PetscFunctionBegin; 646 /* do nondiagonal part */ 647 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 648 /* send it on its way */ 649 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 650 /* do local part */ 651 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 652 /* receive remote parts: note this assumes the values are not actually */ 653 /* inserted in yy until the next line, which is true for my implementation*/ 654 /* but is not perhaps always true. */ 655 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 656 PetscFunctionReturn(0); 657 } 658 659 /* 660 This only works correctly for square matrices where the subblock A->A is the 661 diagonal block 662 */ 663 #undef __FUNCT__ 664 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 665 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 666 { 667 int ierr; 668 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 669 670 PetscFunctionBegin; 671 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 672 if (a->rstart != a->cstart || a->rend != a->cend) { 673 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 674 } 675 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatScale_MPIAIJ" 681 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 682 { 683 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 684 int ierr; 685 686 PetscFunctionBegin; 687 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 688 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 692 #undef __FUNCT__ 693 #define __FUNCT__ "MatDestroy_MPIAIJ" 694 int MatDestroy_MPIAIJ(Mat mat) 695 { 696 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 697 int ierr; 698 699 PetscFunctionBegin; 700 #if defined(PETSC_USE_LOG) 701 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 702 #endif 703 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 704 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 705 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 706 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 707 #if defined (PETSC_USE_CTABLE) 708 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 709 #else 710 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 711 #endif 712 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 713 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 714 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 715 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 716 ierr = PetscFree(aij);CHKERRQ(ierr); 717 PetscFunctionReturn(0); 718 } 719 720 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 721 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 725 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 726 { 727 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 728 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 729 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 730 PetscTruth isdraw,isascii,flg; 731 PetscViewer sviewer; 732 PetscViewerFormat format; 733 734 PetscFunctionBegin; 735 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 736 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 737 if (isascii) { 738 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 739 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 740 MatInfo info; 741 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 742 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 743 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 744 if (flg) { 745 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 746 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 747 } else { 748 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 749 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 750 } 751 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 752 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 753 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 754 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 755 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 756 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } else if (format == PETSC_VIEWER_ASCII_INFO) { 759 PetscFunctionReturn(0); 760 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 761 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 762 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 763 #endif 764 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 765 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 766 #endif 767 768 PetscFunctionReturn(0); 769 } 770 } else if (isdraw) { 771 PetscDraw draw; 772 PetscTruth isnull; 773 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 774 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 775 } 776 777 if (size == 1) { 778 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 779 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 780 } else { 781 /* assemble the entire matrix onto first processor. */ 782 Mat A; 783 Mat_SeqAIJ *Aloc; 784 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 785 PetscScalar *a; 786 787 if (!rank) { 788 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 789 } else { 790 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 791 } 792 PetscLogObjectParent(mat,A); 793 794 /* copy over the A part */ 795 Aloc = (Mat_SeqAIJ*)aij->A->data; 796 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 797 row = aij->rstart; 798 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 799 for (i=0; i<m; i++) { 800 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 801 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 802 } 803 aj = Aloc->j; 804 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 805 806 /* copy over the B part */ 807 Aloc = (Mat_SeqAIJ*)aij->B->data; 808 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 809 row = aij->rstart; 810 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 811 ct = cols; 812 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 813 for (i=0; i<m; i++) { 814 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 815 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 816 } 817 ierr = PetscFree(ct);CHKERRQ(ierr); 818 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 819 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 820 /* 821 Everyone has to call to draw the matrix since the graphics waits are 822 synchronized across all processors that share the PetscDraw object 823 */ 824 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 825 if (!rank) { 826 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 827 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 828 } 829 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 830 ierr = MatDestroy(A);CHKERRQ(ierr); 831 } 832 PetscFunctionReturn(0); 833 } 834 835 #undef __FUNCT__ 836 #define __FUNCT__ "MatView_MPIAIJ" 837 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 838 { 839 int ierr; 840 PetscTruth isascii,isdraw,issocket,isbinary; 841 842 PetscFunctionBegin; 843 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 844 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 845 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 846 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 847 if (isascii || isdraw || isbinary || issocket) { 848 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 849 } else { 850 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 851 } 852 PetscFunctionReturn(0); 853 } 854 855 856 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatRelax_MPIAIJ" 859 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 860 { 861 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 862 int ierr; 863 Vec bb1; 864 PetscScalar mone=-1.0; 865 866 PetscFunctionBegin; 867 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 868 869 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 870 871 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 872 if (flag & SOR_ZERO_INITIAL_GUESS) { 873 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 874 its--; 875 } 876 877 while (its--) { 878 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 879 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 880 881 /* update rhs: bb1 = bb - B*x */ 882 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 883 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 884 885 /* local sweep */ 886 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 887 CHKERRQ(ierr); 888 } 889 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 890 if (flag & SOR_ZERO_INITIAL_GUESS) { 891 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 892 its--; 893 } 894 while (its--) { 895 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 896 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 897 898 /* update rhs: bb1 = bb - B*x */ 899 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 900 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 901 902 /* local sweep */ 903 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 904 CHKERRQ(ierr); 905 } 906 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 907 if (flag & SOR_ZERO_INITIAL_GUESS) { 908 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 909 its--; 910 } 911 while (its--) { 912 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 913 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 914 915 /* update rhs: bb1 = bb - B*x */ 916 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 917 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 918 919 /* local sweep */ 920 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 921 CHKERRQ(ierr); 922 } 923 } else { 924 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 925 } 926 927 ierr = VecDestroy(bb1);CHKERRQ(ierr); 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatGetInfo_MPIAIJ" 933 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 934 { 935 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 936 Mat A = mat->A,B = mat->B; 937 int ierr; 938 PetscReal isend[5],irecv[5]; 939 940 PetscFunctionBegin; 941 info->block_size = 1.0; 942 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 943 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 944 isend[3] = info->memory; isend[4] = info->mallocs; 945 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 946 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 947 isend[3] += info->memory; isend[4] += info->mallocs; 948 if (flag == MAT_LOCAL) { 949 info->nz_used = isend[0]; 950 info->nz_allocated = isend[1]; 951 info->nz_unneeded = isend[2]; 952 info->memory = isend[3]; 953 info->mallocs = isend[4]; 954 } else if (flag == MAT_GLOBAL_MAX) { 955 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 956 info->nz_used = irecv[0]; 957 info->nz_allocated = irecv[1]; 958 info->nz_unneeded = irecv[2]; 959 info->memory = irecv[3]; 960 info->mallocs = irecv[4]; 961 } else if (flag == MAT_GLOBAL_SUM) { 962 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 963 info->nz_used = irecv[0]; 964 info->nz_allocated = irecv[1]; 965 info->nz_unneeded = irecv[2]; 966 info->memory = irecv[3]; 967 info->mallocs = irecv[4]; 968 } 969 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 970 info->fill_ratio_needed = 0; 971 info->factor_mallocs = 0; 972 info->rows_global = (double)matin->M; 973 info->columns_global = (double)matin->N; 974 info->rows_local = (double)matin->m; 975 info->columns_local = (double)matin->N; 976 977 PetscFunctionReturn(0); 978 } 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatSetOption_MPIAIJ" 982 int MatSetOption_MPIAIJ(Mat A,MatOption op) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 985 int ierr; 986 987 PetscFunctionBegin; 988 switch (op) { 989 case MAT_NO_NEW_NONZERO_LOCATIONS: 990 case MAT_YES_NEW_NONZERO_LOCATIONS: 991 case MAT_COLUMNS_UNSORTED: 992 case MAT_COLUMNS_SORTED: 993 case MAT_NEW_NONZERO_ALLOCATION_ERR: 994 case MAT_KEEP_ZEROED_ROWS: 995 case MAT_NEW_NONZERO_LOCATION_ERR: 996 case MAT_USE_INODES: 997 case MAT_DO_NOT_USE_INODES: 998 case MAT_IGNORE_ZERO_ENTRIES: 999 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1000 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1001 break; 1002 case MAT_ROW_ORIENTED: 1003 a->roworiented = PETSC_TRUE; 1004 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1005 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1006 break; 1007 case MAT_ROWS_SORTED: 1008 case MAT_ROWS_UNSORTED: 1009 case MAT_YES_NEW_DIAGONALS: 1010 case MAT_USE_SINGLE_PRECISION_SOLVES: 1011 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1012 break; 1013 case MAT_COLUMN_ORIENTED: 1014 a->roworiented = PETSC_FALSE; 1015 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1016 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1017 break; 1018 case MAT_IGNORE_OFF_PROC_ENTRIES: 1019 a->donotstash = PETSC_TRUE; 1020 break; 1021 case MAT_NO_NEW_DIAGONALS: 1022 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1023 default: 1024 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1025 } 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatGetRow_MPIAIJ" 1031 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1032 { 1033 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1034 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1035 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1036 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1037 int *cmap,*idx_p; 1038 1039 PetscFunctionBegin; 1040 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1041 mat->getrowactive = PETSC_TRUE; 1042 1043 if (!mat->rowvalues && (idx || v)) { 1044 /* 1045 allocate enough space to hold information from the longest row. 1046 */ 1047 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1048 int max = 1,tmp; 1049 for (i=0; i<matin->m; i++) { 1050 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1051 if (max < tmp) { max = tmp; } 1052 } 1053 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1054 mat->rowindices = (int*)(mat->rowvalues + max); 1055 } 1056 1057 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1058 lrow = row - rstart; 1059 1060 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1061 if (!v) {pvA = 0; pvB = 0;} 1062 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1063 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1064 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1065 nztot = nzA + nzB; 1066 1067 cmap = mat->garray; 1068 if (v || idx) { 1069 if (nztot) { 1070 /* Sort by increasing column numbers, assuming A and B already sorted */ 1071 int imark = -1; 1072 if (v) { 1073 *v = v_p = mat->rowvalues; 1074 for (i=0; i<nzB; i++) { 1075 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1076 else break; 1077 } 1078 imark = i; 1079 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1080 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1081 } 1082 if (idx) { 1083 *idx = idx_p = mat->rowindices; 1084 if (imark > -1) { 1085 for (i=0; i<imark; i++) { 1086 idx_p[i] = cmap[cworkB[i]]; 1087 } 1088 } else { 1089 for (i=0; i<nzB; i++) { 1090 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1091 else break; 1092 } 1093 imark = i; 1094 } 1095 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1096 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1097 } 1098 } else { 1099 if (idx) *idx = 0; 1100 if (v) *v = 0; 1101 } 1102 } 1103 *nz = nztot; 1104 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1105 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1111 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1112 { 1113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1114 1115 PetscFunctionBegin; 1116 if (aij->getrowactive == PETSC_FALSE) { 1117 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1118 } 1119 aij->getrowactive = PETSC_FALSE; 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatNorm_MPIAIJ" 1125 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1126 { 1127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1128 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1129 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1130 PetscReal sum = 0.0; 1131 PetscScalar *v; 1132 1133 PetscFunctionBegin; 1134 if (aij->size == 1) { 1135 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1136 } else { 1137 if (type == NORM_FROBENIUS) { 1138 v = amat->a; 1139 for (i=0; i<amat->nz; i++) { 1140 #if defined(PETSC_USE_COMPLEX) 1141 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1142 #else 1143 sum += (*v)*(*v); v++; 1144 #endif 1145 } 1146 v = bmat->a; 1147 for (i=0; i<bmat->nz; i++) { 1148 #if defined(PETSC_USE_COMPLEX) 1149 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1150 #else 1151 sum += (*v)*(*v); v++; 1152 #endif 1153 } 1154 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1155 *norm = sqrt(*norm); 1156 } else if (type == NORM_1) { /* max column norm */ 1157 PetscReal *tmp,*tmp2; 1158 int *jj,*garray = aij->garray; 1159 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1160 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1161 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1162 *norm = 0.0; 1163 v = amat->a; jj = amat->j; 1164 for (j=0; j<amat->nz; j++) { 1165 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1166 } 1167 v = bmat->a; jj = bmat->j; 1168 for (j=0; j<bmat->nz; j++) { 1169 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1170 } 1171 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1172 for (j=0; j<mat->N; j++) { 1173 if (tmp2[j] > *norm) *norm = tmp2[j]; 1174 } 1175 ierr = PetscFree(tmp);CHKERRQ(ierr); 1176 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1177 } else if (type == NORM_INFINITY) { /* max row norm */ 1178 PetscReal ntemp = 0.0; 1179 for (j=0; j<aij->A->m; j++) { 1180 v = amat->a + amat->i[j] + shift; 1181 sum = 0.0; 1182 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1183 sum += PetscAbsScalar(*v); v++; 1184 } 1185 v = bmat->a + bmat->i[j] + shift; 1186 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1187 sum += PetscAbsScalar(*v); v++; 1188 } 1189 if (sum > ntemp) ntemp = sum; 1190 } 1191 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1192 } else { 1193 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1194 } 1195 } 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatTranspose_MPIAIJ" 1201 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1205 int ierr,shift = Aloc->indexshift; 1206 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1207 Mat B; 1208 PetscScalar *array; 1209 1210 PetscFunctionBegin; 1211 if (!matout && M != N) { 1212 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1213 } 1214 1215 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1216 1217 /* copy over the A part */ 1218 Aloc = (Mat_SeqAIJ*)a->A->data; 1219 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1220 row = a->rstart; 1221 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1222 for (i=0; i<m; i++) { 1223 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1224 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1225 } 1226 aj = Aloc->j; 1227 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1228 1229 /* copy over the B part */ 1230 Aloc = (Mat_SeqAIJ*)a->B->data; 1231 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1232 row = a->rstart; 1233 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1234 ct = cols; 1235 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1236 for (i=0; i<m; i++) { 1237 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1238 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1239 } 1240 ierr = PetscFree(ct);CHKERRQ(ierr); 1241 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1242 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1243 if (matout) { 1244 *matout = B; 1245 } else { 1246 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1247 } 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1253 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1254 { 1255 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1256 Mat a = aij->A,b = aij->B; 1257 int ierr,s1,s2,s3; 1258 1259 PetscFunctionBegin; 1260 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1261 if (rr) { 1262 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1263 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1264 /* Overlap communication with computation. */ 1265 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1266 } 1267 if (ll) { 1268 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1269 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1270 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1271 } 1272 /* scale the diagonal block */ 1273 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1274 1275 if (rr) { 1276 /* Do a scatter end and then right scale the off-diagonal block */ 1277 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1278 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1279 } 1280 1281 PetscFunctionReturn(0); 1282 } 1283 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1287 int MatPrintHelp_MPIAIJ(Mat A) 1288 { 1289 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1290 int ierr; 1291 1292 PetscFunctionBegin; 1293 if (!a->rank) { 1294 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1295 } 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1301 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1302 { 1303 PetscFunctionBegin; 1304 *bs = 1; 1305 PetscFunctionReturn(0); 1306 } 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1309 int MatSetUnfactored_MPIAIJ(Mat A) 1310 { 1311 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1312 int ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatEqual_MPIAIJ" 1321 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1322 { 1323 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1324 Mat a,b,c,d; 1325 PetscTruth flg; 1326 int ierr; 1327 1328 PetscFunctionBegin; 1329 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1330 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1331 a = matA->A; b = matA->B; 1332 c = matB->A; d = matB->B; 1333 1334 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1335 if (flg == PETSC_TRUE) { 1336 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1337 } 1338 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatCopy_MPIAIJ" 1344 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1345 { 1346 int ierr; 1347 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1348 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1349 PetscTruth flg; 1350 1351 PetscFunctionBegin; 1352 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1353 if (str != SAME_NONZERO_PATTERN || !flg) { 1354 /* because of the column compression in the off-processor part of the matrix a->B, 1355 the number of columns in a->B and b->B may be different, hence we cannot call 1356 the MatCopy() directly on the two parts. If need be, we can provide a more 1357 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1358 then copying the submatrices */ 1359 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1360 } else { 1361 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1362 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1363 } 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1369 int MatSetUpPreallocation_MPIAIJ(Mat A) 1370 { 1371 int ierr; 1372 1373 PetscFunctionBegin; 1374 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1379 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1380 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1381 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1382 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1383 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1384 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1385 #endif 1386 1387 #include "petscblaslapack.h" 1388 1389 #undef __FUNCT__ 1390 #define __FUNCT__ "MatAXPY_MPIAIJ" 1391 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1392 { 1393 int ierr,one; 1394 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1395 Mat_SeqAIJ *x,*y; 1396 1397 PetscFunctionBegin; 1398 if (str == SAME_NONZERO_PATTERN) { 1399 x = (Mat_SeqAIJ *)xx->A->data; 1400 y = (Mat_SeqAIJ *)yy->A->data; 1401 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1402 x = (Mat_SeqAIJ *)xx->B->data; 1403 y = (Mat_SeqAIJ *)yy->B->data; 1404 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1405 } else { 1406 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 /* -------------------------------------------------------------------*/ 1412 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1413 MatGetRow_MPIAIJ, 1414 MatRestoreRow_MPIAIJ, 1415 MatMult_MPIAIJ, 1416 MatMultAdd_MPIAIJ, 1417 MatMultTranspose_MPIAIJ, 1418 MatMultTransposeAdd_MPIAIJ, 1419 0, 1420 0, 1421 0, 1422 0, 1423 0, 1424 0, 1425 MatRelax_MPIAIJ, 1426 MatTranspose_MPIAIJ, 1427 MatGetInfo_MPIAIJ, 1428 MatEqual_MPIAIJ, 1429 MatGetDiagonal_MPIAIJ, 1430 MatDiagonalScale_MPIAIJ, 1431 MatNorm_MPIAIJ, 1432 MatAssemblyBegin_MPIAIJ, 1433 MatAssemblyEnd_MPIAIJ, 1434 0, 1435 MatSetOption_MPIAIJ, 1436 MatZeroEntries_MPIAIJ, 1437 MatZeroRows_MPIAIJ, 1438 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1439 MatLUFactorSymbolic_MPIAIJ_TFS, 1440 #else 1441 0, 1442 #endif 1443 0, 1444 0, 1445 0, 1446 MatSetUpPreallocation_MPIAIJ, 1447 0, 1448 0, 1449 0, 1450 0, 1451 MatDuplicate_MPIAIJ, 1452 0, 1453 0, 1454 0, 1455 0, 1456 MatAXPY_MPIAIJ, 1457 MatGetSubMatrices_MPIAIJ, 1458 MatIncreaseOverlap_MPIAIJ, 1459 MatGetValues_MPIAIJ, 1460 MatCopy_MPIAIJ, 1461 MatPrintHelp_MPIAIJ, 1462 MatScale_MPIAIJ, 1463 0, 1464 0, 1465 0, 1466 MatGetBlockSize_MPIAIJ, 1467 0, 1468 0, 1469 0, 1470 0, 1471 MatFDColoringCreate_MPIAIJ, 1472 0, 1473 MatSetUnfactored_MPIAIJ, 1474 0, 1475 0, 1476 MatGetSubMatrix_MPIAIJ, 1477 MatDestroy_MPIAIJ, 1478 MatView_MPIAIJ, 1479 MatGetPetscMaps_Petsc, 1480 0, 1481 0, 1482 0, 1483 0, 1484 0, 1485 0, 1486 0, 1487 0, 1488 MatSetColoring_MPIAIJ, 1489 MatSetValuesAdic_MPIAIJ, 1490 MatSetValuesAdifor_MPIAIJ 1491 }; 1492 1493 /* ----------------------------------------------------------------------------------------*/ 1494 1495 EXTERN_C_BEGIN 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1498 int MatStoreValues_MPIAIJ(Mat mat) 1499 { 1500 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1501 int ierr; 1502 1503 PetscFunctionBegin; 1504 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1505 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1506 PetscFunctionReturn(0); 1507 } 1508 EXTERN_C_END 1509 1510 EXTERN_C_BEGIN 1511 #undef __FUNCT__ 1512 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1513 int MatRetrieveValues_MPIAIJ(Mat mat) 1514 { 1515 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1516 int ierr; 1517 1518 PetscFunctionBegin; 1519 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1520 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1521 PetscFunctionReturn(0); 1522 } 1523 EXTERN_C_END 1524 1525 #include "petscpc.h" 1526 EXTERN_C_BEGIN 1527 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1528 EXTERN_C_END 1529 1530 EXTERN_C_BEGIN 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatCreate_MPIAIJ" 1533 int MatCreate_MPIAIJ(Mat B) 1534 { 1535 Mat_MPIAIJ *b; 1536 int ierr,i,size; 1537 1538 PetscFunctionBegin; 1539 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1540 1541 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1542 B->data = (void*)b; 1543 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1544 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1545 B->factor = 0; 1546 B->assembled = PETSC_FALSE; 1547 B->mapping = 0; 1548 1549 B->insertmode = NOT_SET_VALUES; 1550 b->size = size; 1551 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1552 1553 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1554 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1555 1556 /* the information in the maps duplicates the information computed below, eventually 1557 we should remove the duplicate information that is not contained in the maps */ 1558 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1559 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1560 1561 /* build local table of row and column ownerships */ 1562 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1563 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1564 b->cowners = b->rowners + b->size + 2; 1565 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1566 b->rowners[0] = 0; 1567 for (i=2; i<=b->size; i++) { 1568 b->rowners[i] += b->rowners[i-1]; 1569 } 1570 b->rstart = b->rowners[b->rank]; 1571 b->rend = b->rowners[b->rank+1]; 1572 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1573 b->cowners[0] = 0; 1574 for (i=2; i<=b->size; i++) { 1575 b->cowners[i] += b->cowners[i-1]; 1576 } 1577 b->cstart = b->cowners[b->rank]; 1578 b->cend = b->cowners[b->rank+1]; 1579 1580 /* build cache for off array entries formed */ 1581 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1582 b->donotstash = PETSC_FALSE; 1583 b->colmap = 0; 1584 b->garray = 0; 1585 b->roworiented = PETSC_TRUE; 1586 1587 /* stuff used for matrix vector multiply */ 1588 b->lvec = PETSC_NULL; 1589 b->Mvctx = PETSC_NULL; 1590 1591 /* stuff for MatGetRow() */ 1592 b->rowindices = 0; 1593 b->rowvalues = 0; 1594 b->getrowactive = PETSC_FALSE; 1595 1596 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1597 "MatStoreValues_MPIAIJ", 1598 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1600 "MatRetrieveValues_MPIAIJ", 1601 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1603 "MatGetDiagonalBlock_MPIAIJ", 1604 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1605 1606 PetscFunctionReturn(0); 1607 } 1608 EXTERN_C_END 1609 1610 #undef __FUNCT__ 1611 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1612 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1613 { 1614 Mat mat; 1615 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1616 int ierr; 1617 1618 PetscFunctionBegin; 1619 *newmat = 0; 1620 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1621 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1622 a = (Mat_MPIAIJ*)mat->data; 1623 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1624 mat->factor = matin->factor; 1625 mat->assembled = PETSC_TRUE; 1626 mat->insertmode = NOT_SET_VALUES; 1627 mat->preallocated = PETSC_TRUE; 1628 1629 a->rstart = oldmat->rstart; 1630 a->rend = oldmat->rend; 1631 a->cstart = oldmat->cstart; 1632 a->cend = oldmat->cend; 1633 a->size = oldmat->size; 1634 a->rank = oldmat->rank; 1635 a->donotstash = oldmat->donotstash; 1636 a->roworiented = oldmat->roworiented; 1637 a->rowindices = 0; 1638 a->rowvalues = 0; 1639 a->getrowactive = PETSC_FALSE; 1640 1641 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1642 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1643 if (oldmat->colmap) { 1644 #if defined (PETSC_USE_CTABLE) 1645 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1646 #else 1647 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1648 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1649 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1650 #endif 1651 } else a->colmap = 0; 1652 if (oldmat->garray) { 1653 int len; 1654 len = oldmat->B->n; 1655 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1656 PetscLogObjectMemory(mat,len*sizeof(int)); 1657 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1658 } else a->garray = 0; 1659 1660 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1661 PetscLogObjectParent(mat,a->lvec); 1662 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1663 PetscLogObjectParent(mat,a->Mvctx); 1664 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1665 PetscLogObjectParent(mat,a->A); 1666 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1667 PetscLogObjectParent(mat,a->B); 1668 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1669 *newmat = mat; 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #include "petscsys.h" 1674 1675 EXTERN_C_BEGIN 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatLoad_MPIAIJ" 1678 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1679 { 1680 Mat A; 1681 PetscScalar *vals,*svals; 1682 MPI_Comm comm = ((PetscObject)viewer)->comm; 1683 MPI_Status status; 1684 int i,nz,ierr,j,rstart,rend,fd; 1685 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1686 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1687 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1688 1689 PetscFunctionBegin; 1690 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1691 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1692 if (!rank) { 1693 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1694 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1695 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1696 if (header[3] < 0) { 1697 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1698 } 1699 } 1700 1701 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1702 M = header[1]; N = header[2]; 1703 /* determine ownership of all rows */ 1704 m = M/size + ((M % size) > rank); 1705 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1706 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1707 rowners[0] = 0; 1708 for (i=2; i<=size; i++) { 1709 rowners[i] += rowners[i-1]; 1710 } 1711 rstart = rowners[rank]; 1712 rend = rowners[rank+1]; 1713 1714 /* distribute row lengths to all processors */ 1715 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1716 offlens = ourlens + (rend-rstart); 1717 if (!rank) { 1718 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1719 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1720 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1721 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1722 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1723 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1724 } else { 1725 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1726 } 1727 1728 if (!rank) { 1729 /* calculate the number of nonzeros on each processor */ 1730 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1731 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1732 for (i=0; i<size; i++) { 1733 for (j=rowners[i]; j< rowners[i+1]; j++) { 1734 procsnz[i] += rowlengths[j]; 1735 } 1736 } 1737 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1738 1739 /* determine max buffer needed and allocate it */ 1740 maxnz = 0; 1741 for (i=0; i<size; i++) { 1742 maxnz = PetscMax(maxnz,procsnz[i]); 1743 } 1744 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1745 1746 /* read in my part of the matrix column indices */ 1747 nz = procsnz[0]; 1748 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1749 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1750 1751 /* read in every one elses and ship off */ 1752 for (i=1; i<size; i++) { 1753 nz = procsnz[i]; 1754 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1755 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1756 } 1757 ierr = PetscFree(cols);CHKERRQ(ierr); 1758 } else { 1759 /* determine buffer space needed for message */ 1760 nz = 0; 1761 for (i=0; i<m; i++) { 1762 nz += ourlens[i]; 1763 } 1764 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1765 1766 /* receive message of column indices*/ 1767 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1768 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1769 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1770 } 1771 1772 /* determine column ownership if matrix is not square */ 1773 if (N != M) { 1774 n = N/size + ((N % size) > rank); 1775 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1776 cstart = cend - n; 1777 } else { 1778 cstart = rstart; 1779 cend = rend; 1780 n = cend - cstart; 1781 } 1782 1783 /* loop over local rows, determining number of off diagonal entries */ 1784 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1785 jj = 0; 1786 for (i=0; i<m; i++) { 1787 for (j=0; j<ourlens[i]; j++) { 1788 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1789 jj++; 1790 } 1791 } 1792 1793 /* create our matrix */ 1794 for (i=0; i<m; i++) { 1795 ourlens[i] -= offlens[i]; 1796 } 1797 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1798 A = *newmat; 1799 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1800 for (i=0; i<m; i++) { 1801 ourlens[i] += offlens[i]; 1802 } 1803 1804 if (!rank) { 1805 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1806 1807 /* read in my part of the matrix numerical values */ 1808 nz = procsnz[0]; 1809 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1810 1811 /* insert into matrix */ 1812 jj = rstart; 1813 smycols = mycols; 1814 svals = vals; 1815 for (i=0; i<m; i++) { 1816 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1817 smycols += ourlens[i]; 1818 svals += ourlens[i]; 1819 jj++; 1820 } 1821 1822 /* read in other processors and ship out */ 1823 for (i=1; i<size; i++) { 1824 nz = procsnz[i]; 1825 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1826 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1827 } 1828 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1829 } else { 1830 /* receive numeric values */ 1831 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1832 1833 /* receive message of values*/ 1834 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1835 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1836 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1837 1838 /* insert into matrix */ 1839 jj = rstart; 1840 smycols = mycols; 1841 svals = vals; 1842 for (i=0; i<m; i++) { 1843 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1844 smycols += ourlens[i]; 1845 svals += ourlens[i]; 1846 jj++; 1847 } 1848 } 1849 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1850 ierr = PetscFree(vals);CHKERRQ(ierr); 1851 ierr = PetscFree(mycols);CHKERRQ(ierr); 1852 ierr = PetscFree(rowners);CHKERRQ(ierr); 1853 1854 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1855 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1856 PetscFunctionReturn(0); 1857 } 1858 EXTERN_C_END 1859 1860 #undef __FUNCT__ 1861 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1862 /* 1863 Not great since it makes two copies of the submatrix, first an SeqAIJ 1864 in local and then by concatenating the local matrices the end result. 1865 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1866 */ 1867 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1868 { 1869 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1870 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1871 Mat *local,M,Mreuse; 1872 PetscScalar *vwork,*aa; 1873 MPI_Comm comm = mat->comm; 1874 Mat_SeqAIJ *aij; 1875 1876 1877 PetscFunctionBegin; 1878 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1879 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1880 1881 if (call == MAT_REUSE_MATRIX) { 1882 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1883 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1884 local = &Mreuse; 1885 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1886 } else { 1887 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1888 Mreuse = *local; 1889 ierr = PetscFree(local);CHKERRQ(ierr); 1890 } 1891 1892 /* 1893 m - number of local rows 1894 n - number of columns (same on all processors) 1895 rstart - first row in new global matrix generated 1896 */ 1897 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1898 if (call == MAT_INITIAL_MATRIX) { 1899 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1900 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1901 ii = aij->i; 1902 jj = aij->j; 1903 1904 /* 1905 Determine the number of non-zeros in the diagonal and off-diagonal 1906 portions of the matrix in order to do correct preallocation 1907 */ 1908 1909 /* first get start and end of "diagonal" columns */ 1910 if (csize == PETSC_DECIDE) { 1911 nlocal = n/size + ((n % size) > rank); 1912 } else { 1913 nlocal = csize; 1914 } 1915 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1916 rstart = rend - nlocal; 1917 if (rank == size - 1 && rend != n) { 1918 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1919 } 1920 1921 /* next, compute all the lengths */ 1922 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1923 olens = dlens + m; 1924 for (i=0; i<m; i++) { 1925 jend = ii[i+1] - ii[i]; 1926 olen = 0; 1927 dlen = 0; 1928 for (j=0; j<jend; j++) { 1929 if (*jj < rstart || *jj >= rend) olen++; 1930 else dlen++; 1931 jj++; 1932 } 1933 olens[i] = olen; 1934 dlens[i] = dlen; 1935 } 1936 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1937 ierr = PetscFree(dlens);CHKERRQ(ierr); 1938 } else { 1939 int ml,nl; 1940 1941 M = *newmat; 1942 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1943 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1944 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1945 /* 1946 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1947 rather than the slower MatSetValues(). 1948 */ 1949 M->was_assembled = PETSC_TRUE; 1950 M->assembled = PETSC_FALSE; 1951 } 1952 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1953 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1954 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1955 ii = aij->i; 1956 jj = aij->j; 1957 aa = aij->a; 1958 for (i=0; i<m; i++) { 1959 row = rstart + i; 1960 nz = ii[i+1] - ii[i]; 1961 cwork = jj; jj += nz; 1962 vwork = aa; aa += nz; 1963 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1964 } 1965 1966 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1967 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1968 *newmat = M; 1969 1970 /* save submatrix used in processor for next request */ 1971 if (call == MAT_INITIAL_MATRIX) { 1972 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1973 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1974 } 1975 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatMPIAIJSetPreallocation" 1981 /*@C 1982 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1983 (the default parallel PETSc format). For good matrix assembly performance 1984 the user should preallocate the matrix storage by setting the parameters 1985 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1986 performance can be increased by more than a factor of 50. 1987 1988 Collective on MPI_Comm 1989 1990 Input Parameters: 1991 + A - the matrix 1992 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1993 (same value is used for all local rows) 1994 . d_nnz - array containing the number of nonzeros in the various rows of the 1995 DIAGONAL portion of the local submatrix (possibly different for each row) 1996 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1997 The size of this array is equal to the number of local rows, i.e 'm'. 1998 You must leave room for the diagonal entry even if it is zero. 1999 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2000 submatrix (same value is used for all local rows). 2001 - o_nnz - array containing the number of nonzeros in the various rows of the 2002 OFF-DIAGONAL portion of the local submatrix (possibly different for 2003 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2004 structure. The size of this array is equal to the number 2005 of local rows, i.e 'm'. 2006 2007 The AIJ format (also called the Yale sparse matrix format or 2008 compressed row storage), is fully compatible with standard Fortran 77 2009 storage. That is, the stored row and column indices can begin at 2010 either one (as in Fortran) or zero. See the users manual for details. 2011 2012 The user MUST specify either the local or global matrix dimensions 2013 (possibly both). 2014 2015 The parallel matrix is partitioned such that the first m0 rows belong to 2016 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2017 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2018 2019 The DIAGONAL portion of the local submatrix of a processor can be defined 2020 as the submatrix which is obtained by extraction the part corresponding 2021 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2022 first row that belongs to the processor, and r2 is the last row belonging 2023 to the this processor. This is a square mxm matrix. The remaining portion 2024 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2025 2026 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2027 2028 By default, this format uses inodes (identical nodes) when possible. 2029 We search for consecutive rows with the same nonzero structure, thereby 2030 reusing matrix information to achieve increased efficiency. 2031 2032 Options Database Keys: 2033 + -mat_aij_no_inode - Do not use inodes 2034 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2035 - -mat_aij_oneindex - Internally use indexing starting at 1 2036 rather than 0. Note that when calling MatSetValues(), 2037 the user still MUST index entries starting at 0! 2038 2039 Example usage: 2040 2041 Consider the following 8x8 matrix with 34 non-zero values, that is 2042 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2043 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2044 as follows: 2045 2046 .vb 2047 1 2 0 | 0 3 0 | 0 4 2048 Proc0 0 5 6 | 7 0 0 | 8 0 2049 9 0 10 | 11 0 0 | 12 0 2050 ------------------------------------- 2051 13 0 14 | 15 16 17 | 0 0 2052 Proc1 0 18 0 | 19 20 21 | 0 0 2053 0 0 0 | 22 23 0 | 24 0 2054 ------------------------------------- 2055 Proc2 25 26 27 | 0 0 28 | 29 0 2056 30 0 0 | 31 32 33 | 0 34 2057 .ve 2058 2059 This can be represented as a collection of submatrices as: 2060 2061 .vb 2062 A B C 2063 D E F 2064 G H I 2065 .ve 2066 2067 Where the submatrices A,B,C are owned by proc0, D,E,F are 2068 owned by proc1, G,H,I are owned by proc2. 2069 2070 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2071 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2072 The 'M','N' parameters are 8,8, and have the same values on all procs. 2073 2074 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2075 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2076 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2077 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2078 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2079 matrix, ans [DF] as another SeqAIJ matrix. 2080 2081 When d_nz, o_nz parameters are specified, d_nz storage elements are 2082 allocated for every row of the local diagonal submatrix, and o_nz 2083 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2084 One way to choose d_nz and o_nz is to use the max nonzerors per local 2085 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2086 In this case, the values of d_nz,o_nz are: 2087 .vb 2088 proc0 : dnz = 2, o_nz = 2 2089 proc1 : dnz = 3, o_nz = 2 2090 proc2 : dnz = 1, o_nz = 4 2091 .ve 2092 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2093 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2094 for proc3. i.e we are using 12+15+10=37 storage locations to store 2095 34 values. 2096 2097 When d_nnz, o_nnz parameters are specified, the storage is specified 2098 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2099 In the above case the values for d_nnz,o_nnz are: 2100 .vb 2101 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2102 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2103 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2104 .ve 2105 Here the space allocated is sum of all the above values i.e 34, and 2106 hence pre-allocation is perfect. 2107 2108 Level: intermediate 2109 2110 .keywords: matrix, aij, compressed row, sparse, parallel 2111 2112 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2113 @*/ 2114 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2115 { 2116 Mat_MPIAIJ *b; 2117 int ierr,i; 2118 PetscTruth flg2; 2119 2120 PetscFunctionBegin; 2121 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2122 if (!flg2) PetscFunctionReturn(0); 2123 B->preallocated = PETSC_TRUE; 2124 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2125 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2126 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2127 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2128 if (d_nnz) { 2129 for (i=0; i<B->m; i++) { 2130 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2131 } 2132 } 2133 if (o_nnz) { 2134 for (i=0; i<B->m; i++) { 2135 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2136 } 2137 } 2138 b = (Mat_MPIAIJ*)B->data; 2139 2140 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2141 PetscLogObjectParent(B,b->A); 2142 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2143 PetscLogObjectParent(B,b->B); 2144 2145 PetscFunctionReturn(0); 2146 } 2147 2148 #undef __FUNCT__ 2149 #define __FUNCT__ "MatCreateMPIAIJ" 2150 /*@C 2151 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2152 (the default parallel PETSc format). For good matrix assembly performance 2153 the user should preallocate the matrix storage by setting the parameters 2154 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2155 performance can be increased by more than a factor of 50. 2156 2157 Collective on MPI_Comm 2158 2159 Input Parameters: 2160 + comm - MPI communicator 2161 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2162 This value should be the same as the local size used in creating the 2163 y vector for the matrix-vector product y = Ax. 2164 . n - This value should be the same as the local size used in creating the 2165 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2166 calculated if N is given) For square matrices n is almost always m. 2167 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2168 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2169 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2170 (same value is used for all local rows) 2171 . d_nnz - array containing the number of nonzeros in the various rows of the 2172 DIAGONAL portion of the local submatrix (possibly different for each row) 2173 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2174 The size of this array is equal to the number of local rows, i.e 'm'. 2175 You must leave room for the diagonal entry even if it is zero. 2176 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2177 submatrix (same value is used for all local rows). 2178 - o_nnz - array containing the number of nonzeros in the various rows of the 2179 OFF-DIAGONAL portion of the local submatrix (possibly different for 2180 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2181 structure. The size of this array is equal to the number 2182 of local rows, i.e 'm'. 2183 2184 Output Parameter: 2185 . A - the matrix 2186 2187 Notes: 2188 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2189 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2190 storage requirements for this matrix. 2191 2192 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2193 processor than it must be used on all processors that share the object for 2194 that argument. 2195 2196 The AIJ format (also called the Yale sparse matrix format or 2197 compressed row storage), is fully compatible with standard Fortran 77 2198 storage. That is, the stored row and column indices can begin at 2199 either one (as in Fortran) or zero. See the users manual for details. 2200 2201 The user MUST specify either the local or global matrix dimensions 2202 (possibly both). 2203 2204 The parallel matrix is partitioned such that the first m0 rows belong to 2205 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2206 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2207 2208 The DIAGONAL portion of the local submatrix of a processor can be defined 2209 as the submatrix which is obtained by extraction the part corresponding 2210 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2211 first row that belongs to the processor, and r2 is the last row belonging 2212 to the this processor. This is a square mxm matrix. The remaining portion 2213 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2214 2215 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2216 2217 By default, this format uses inodes (identical nodes) when possible. 2218 We search for consecutive rows with the same nonzero structure, thereby 2219 reusing matrix information to achieve increased efficiency. 2220 2221 Options Database Keys: 2222 + -mat_aij_no_inode - Do not use inodes 2223 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2224 - -mat_aij_oneindex - Internally use indexing starting at 1 2225 rather than 0. Note that when calling MatSetValues(), 2226 the user still MUST index entries starting at 0! 2227 2228 2229 Example usage: 2230 2231 Consider the following 8x8 matrix with 34 non-zero values, that is 2232 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2233 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2234 as follows: 2235 2236 .vb 2237 1 2 0 | 0 3 0 | 0 4 2238 Proc0 0 5 6 | 7 0 0 | 8 0 2239 9 0 10 | 11 0 0 | 12 0 2240 ------------------------------------- 2241 13 0 14 | 15 16 17 | 0 0 2242 Proc1 0 18 0 | 19 20 21 | 0 0 2243 0 0 0 | 22 23 0 | 24 0 2244 ------------------------------------- 2245 Proc2 25 26 27 | 0 0 28 | 29 0 2246 30 0 0 | 31 32 33 | 0 34 2247 .ve 2248 2249 This can be represented as a collection of submatrices as: 2250 2251 .vb 2252 A B C 2253 D E F 2254 G H I 2255 .ve 2256 2257 Where the submatrices A,B,C are owned by proc0, D,E,F are 2258 owned by proc1, G,H,I are owned by proc2. 2259 2260 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2261 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2262 The 'M','N' parameters are 8,8, and have the same values on all procs. 2263 2264 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2265 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2266 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2267 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2268 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2269 matrix, ans [DF] as another SeqAIJ matrix. 2270 2271 When d_nz, o_nz parameters are specified, d_nz storage elements are 2272 allocated for every row of the local diagonal submatrix, and o_nz 2273 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2274 One way to choose d_nz and o_nz is to use the max nonzerors per local 2275 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2276 In this case, the values of d_nz,o_nz are: 2277 .vb 2278 proc0 : dnz = 2, o_nz = 2 2279 proc1 : dnz = 3, o_nz = 2 2280 proc2 : dnz = 1, o_nz = 4 2281 .ve 2282 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2283 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2284 for proc3. i.e we are using 12+15+10=37 storage locations to store 2285 34 values. 2286 2287 When d_nnz, o_nnz parameters are specified, the storage is specified 2288 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2289 In the above case the values for d_nnz,o_nnz are: 2290 .vb 2291 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2292 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2293 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2294 .ve 2295 Here the space allocated is sum of all the above values i.e 34, and 2296 hence pre-allocation is perfect. 2297 2298 Level: intermediate 2299 2300 .keywords: matrix, aij, compressed row, sparse, parallel 2301 2302 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2303 @*/ 2304 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2305 { 2306 int ierr,size; 2307 2308 PetscFunctionBegin; 2309 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2310 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2311 if (size > 1) { 2312 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2313 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2314 } else { 2315 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2316 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2317 } 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #undef __FUNCT__ 2322 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2323 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2324 { 2325 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2326 PetscFunctionBegin; 2327 *Ad = a->A; 2328 *Ao = a->B; 2329 *colmap = a->garray; 2330 PetscFunctionReturn(0); 2331 } 2332 2333 #undef __FUNCT__ 2334 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2335 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2336 { 2337 int ierr; 2338 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2339 2340 PetscFunctionBegin; 2341 if (coloring->ctype == IS_COLORING_LOCAL) { 2342 int *allcolors,*colors,i; 2343 ISColoring ocoloring; 2344 2345 /* set coloring for diagonal portion */ 2346 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2347 2348 /* set coloring for off-diagonal portion */ 2349 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2350 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2351 for (i=0; i<a->B->n; i++) { 2352 colors[i] = allcolors[a->garray[i]]; 2353 } 2354 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2355 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2356 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2357 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2358 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2359 int *colors,i,*larray; 2360 ISColoring ocoloring; 2361 2362 /* set coloring for diagonal portion */ 2363 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2364 for (i=0; i<a->A->n; i++) { 2365 larray[i] = i + a->cstart; 2366 } 2367 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2368 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2369 for (i=0; i<a->A->n; i++) { 2370 colors[i] = coloring->colors[larray[i]]; 2371 } 2372 ierr = PetscFree(larray);CHKERRQ(ierr); 2373 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2374 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2375 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2376 2377 /* set coloring for off-diagonal portion */ 2378 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2379 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2380 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2381 for (i=0; i<a->B->n; i++) { 2382 colors[i] = coloring->colors[larray[i]]; 2383 } 2384 ierr = PetscFree(larray);CHKERRQ(ierr); 2385 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2386 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2387 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2388 } else { 2389 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2390 } 2391 2392 PetscFunctionReturn(0); 2393 } 2394 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2397 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2398 { 2399 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2400 int ierr; 2401 2402 PetscFunctionBegin; 2403 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2404 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2405 PetscFunctionReturn(0); 2406 } 2407 2408 #undef __FUNCT__ 2409 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2410 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2411 { 2412 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2413 int ierr; 2414 2415 PetscFunctionBegin; 2416 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2417 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "MatFileMerge" 2423 /*@C 2424 MatFileMerge - Saves a single large PETSc matrix by reading in 2425 sequential matrices from each processor and concatinating them 2426 together. 2427 2428 Collective on MPI_Comm 2429 2430 Input Parameters: 2431 + comm - the communicators the individual files are on 2432 . infile - name of input file 2433 - outfile - name of output file 2434 2435 Notes: The number of columns of the matrix in EACH of the seperate files 2436 MUST be the same. 2437 2438 The true name of the input files on each processor is obtained by 2439 appending .rank onto the infile string; where rank is 0 up to one less 2440 then the number of processors 2441 @*/ 2442 int MatFileMerge(MPI_Comm comm,char *infile,char *outfile) 2443 { 2444 int ierr,rank,len,m,n,i,rstart,*indx,nnz,I; 2445 PetscViewer in,out; 2446 char *name; 2447 Mat A,B; 2448 PetscScalar *values; 2449 2450 PetscFunctionBegin; 2451 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2452 ierr = PetscStrlen(infile,&len);CHKERRQ(ierr); 2453 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2454 sprintf(name,"%s.%d",infile,rank); 2455 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_RDONLY,&in);CHKERRQ(ierr); 2456 ierr = PetscFree(name); 2457 ierr = MatLoad(in,MATSEQAIJ,&A);CHKERRQ(ierr); 2458 ierr = PetscViewerDestroy(in);CHKERRQ(ierr); 2459 2460 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 2461 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DETERMINE,n,0,0,0,0,&B);CHKERRQ(ierr); 2462 ierr = MatGetOwnershipRange(B,&rstart,0);CHKERRQ(ierr); 2463 for (i=0;i<m;i++) { 2464 ierr = MatGetRow(A,i,&nnz,&indx,&values);CHKERRQ(ierr); 2465 I = i + rstart; 2466 ierr = MatSetValues(B,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2467 ierr = MatRestoreRow(A,i,&nnz,&indx,&values);CHKERRQ(ierr); 2468 } 2469 ierr = MatDestroy(A);CHKERRQ(ierr); 2470 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2471 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2472 2473 ierr = PetscViewerBinaryOpen(comm,outfile,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2474 ierr = MatView(B,out);CHKERRQ(ierr); 2475 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2476 ierr = MatDestroy(B);CHKERRQ(ierr); 2477 PetscFunctionReturn(0); 2478 } 2479 2480 #undef __FUNCT__ 2481 #define __FUNCT__ "MatFileSplit" 2482 int MatFileSplit(Mat A,char *outfile) 2483 { 2484 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2485 PetscViewer out; 2486 char *name; 2487 Mat B; 2488 PetscScalar *values; 2489 2490 PetscFunctionBegin; 2491 2492 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2493 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2494 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2495 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2496 for (i=0;i<m;i++) { 2497 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2498 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2499 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2500 } 2501 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2502 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2503 2504 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2505 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2506 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2507 sprintf(name,"%s.%d",outfile,rank); 2508 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2509 ierr = PetscFree(name); 2510 ierr = MatView(B,out);CHKERRQ(ierr); 2511 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2512 ierr = MatDestroy(B);CHKERRQ(ierr); 2513 PetscFunctionReturn(0); 2514 } 2515