1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat); 14 EXTERN int MatUseSpooles_MPIAIJ(Mat); 15 16 /* 17 Local utility routine that creates a mapping from the global column 18 number to the local number in the off-diagonal part of the local 19 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 20 a slightly higher hash table cost; without it it is not scalable (each processor 21 has an order N integer array but is fast to acess. 22 */ 23 #undef __FUNCT__ 24 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 25 int CreateColmap_MPIAIJ_Private(Mat mat) 26 { 27 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 28 int n = aij->B->n,i,ierr; 29 30 PetscFunctionBegin; 31 #if defined (PETSC_USE_CTABLE) 32 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 33 for (i=0; i<n; i++){ 34 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 35 } 36 #else 37 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 38 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 39 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 40 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 41 #endif 42 PetscFunctionReturn(0); 43 } 44 45 #define CHUNKSIZE 15 46 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 47 { \ 48 \ 49 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 50 rmax = aimax[row]; nrow = ailen[row]; \ 51 col1 = col - shift; \ 52 \ 53 low = 0; high = nrow; \ 54 while (high-low > 5) { \ 55 t = (low+high)/2; \ 56 if (rp[t] > col) high = t; \ 57 else low = t; \ 58 } \ 59 for (_i=low; _i<high; _i++) { \ 60 if (rp[_i] > col1) break; \ 61 if (rp[_i] == col1) { \ 62 if (addv == ADD_VALUES) ap[_i] += value; \ 63 else ap[_i] = value; \ 64 goto a_noinsert; \ 65 } \ 66 } \ 67 if (nonew == 1) goto a_noinsert; \ 68 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 69 if (nrow >= rmax) { \ 70 /* there is no extra room in row, therefore enlarge */ \ 71 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 72 PetscScalar *new_a; \ 73 \ 74 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 75 \ 76 /* malloc new storage space */ \ 77 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 78 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 79 new_j = (int*)(new_a + new_nz); \ 80 new_i = new_j + new_nz; \ 81 \ 82 /* copy over old data into new slots */ \ 83 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 84 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 85 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 86 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 87 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 88 len*sizeof(int));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 91 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 92 /* free up old matrix storage */ \ 93 \ 94 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 95 if (!a->singlemalloc) { \ 96 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 97 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 98 } \ 99 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 100 a->singlemalloc = PETSC_TRUE; \ 101 \ 102 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 103 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 104 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 105 a->maxnz += CHUNKSIZE; \ 106 a->reallocs++; \ 107 } \ 108 N = nrow++ - 1; a->nz++; \ 109 /* shift up all the later entries in this row */ \ 110 for (ii=N; ii>=_i; ii--) { \ 111 rp[ii+1] = rp[ii]; \ 112 ap[ii+1] = ap[ii]; \ 113 } \ 114 rp[_i] = col1; \ 115 ap[_i] = value; \ 116 a_noinsert: ; \ 117 ailen[row] = nrow; \ 118 } 119 120 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 121 { \ 122 \ 123 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 124 rmax = bimax[row]; nrow = bilen[row]; \ 125 col1 = col - shift; \ 126 \ 127 low = 0; high = nrow; \ 128 while (high-low > 5) { \ 129 t = (low+high)/2; \ 130 if (rp[t] > col) high = t; \ 131 else low = t; \ 132 } \ 133 for (_i=low; _i<high; _i++) { \ 134 if (rp[_i] > col1) break; \ 135 if (rp[_i] == col1) { \ 136 if (addv == ADD_VALUES) ap[_i] += value; \ 137 else ap[_i] = value; \ 138 goto b_noinsert; \ 139 } \ 140 } \ 141 if (nonew == 1) goto b_noinsert; \ 142 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 143 if (nrow >= rmax) { \ 144 /* there is no extra room in row, therefore enlarge */ \ 145 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 146 PetscScalar *new_a; \ 147 \ 148 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 149 \ 150 /* malloc new storage space */ \ 151 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 152 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 153 new_j = (int*)(new_a + new_nz); \ 154 new_i = new_j + new_nz; \ 155 \ 156 /* copy over old data into new slots */ \ 157 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 158 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 159 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 160 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 161 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 162 len*sizeof(int));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 165 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 166 /* free up old matrix storage */ \ 167 \ 168 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 169 if (!b->singlemalloc) { \ 170 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 171 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 172 } \ 173 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 174 b->singlemalloc = PETSC_TRUE; \ 175 \ 176 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 177 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 178 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 179 b->maxnz += CHUNKSIZE; \ 180 b->reallocs++; \ 181 } \ 182 N = nrow++ - 1; b->nz++; \ 183 /* shift up all the later entries in this row */ \ 184 for (ii=N; ii>=_i; ii--) { \ 185 rp[ii+1] = rp[ii]; \ 186 ap[ii+1] = ap[ii]; \ 187 } \ 188 rp[_i] = col1; \ 189 ap[_i] = value; \ 190 b_noinsert: ; \ 191 bilen[row] = nrow; \ 192 } 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatSetValues_MPIAIJ" 196 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 197 { 198 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 199 PetscScalar value; 200 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 201 int cstart = aij->cstart,cend = aij->cend,row,col; 202 PetscTruth roworiented = aij->roworiented; 203 204 /* Some Variables required in the macro */ 205 Mat A = aij->A; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 207 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 208 PetscScalar *aa = a->a; 209 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 210 Mat B = aij->B; 211 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 212 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 213 PetscScalar *ba = b->a; 214 215 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 216 int nonew = a->nonew,shift = a->indexshift; 217 PetscScalar *ap; 218 219 PetscFunctionBegin; 220 for (i=0; i<m; i++) { 221 if (im[i] < 0) continue; 222 #if defined(PETSC_USE_BOPT_g) 223 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 224 #endif 225 if (im[i] >= rstart && im[i] < rend) { 226 row = im[i] - rstart; 227 for (j=0; j<n; j++) { 228 if (in[j] >= cstart && in[j] < cend){ 229 col = in[j] - cstart; 230 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 231 if (ignorezeroentries && value == 0.0) continue; 232 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 233 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 234 } else if (in[j] < 0) continue; 235 #if defined(PETSC_USE_BOPT_g) 236 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 237 #endif 238 else { 239 if (mat->was_assembled) { 240 if (!aij->colmap) { 241 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 242 } 243 #if defined (PETSC_USE_CTABLE) 244 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 245 col--; 246 #else 247 col = aij->colmap[in[j]] - 1; 248 #endif 249 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 250 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 251 col = in[j]; 252 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 253 B = aij->B; 254 b = (Mat_SeqAIJ*)B->data; 255 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 256 ba = b->a; 257 } 258 } else col = in[j]; 259 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 260 if (ignorezeroentries && value == 0.0) continue; 261 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } 264 } 265 } else { 266 if (!aij->donotstash) { 267 if (roworiented) { 268 if (ignorezeroentries && v[i*n] == 0.0) continue; 269 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 270 } else { 271 if (ignorezeroentries && v[i] == 0.0) continue; 272 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 273 } 274 } 275 } 276 } 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatGetValues_MPIAIJ" 282 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 283 { 284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 285 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 286 int cstart = aij->cstart,cend = aij->cend,row,col; 287 288 PetscFunctionBegin; 289 for (i=0; i<m; i++) { 290 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 291 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 292 if (idxm[i] >= rstart && idxm[i] < rend) { 293 row = idxm[i] - rstart; 294 for (j=0; j<n; j++) { 295 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 296 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 297 if (idxn[j] >= cstart && idxn[j] < cend){ 298 col = idxn[j] - cstart; 299 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 300 } else { 301 if (!aij->colmap) { 302 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 303 } 304 #if defined (PETSC_USE_CTABLE) 305 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 306 col --; 307 #else 308 col = aij->colmap[idxn[j]] - 1; 309 #endif 310 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 311 else { 312 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 313 } 314 } 315 } 316 } else { 317 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 318 } 319 } 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNCT__ 324 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 325 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 326 { 327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 328 int ierr,nstash,reallocs; 329 InsertMode addv; 330 331 PetscFunctionBegin; 332 if (aij->donotstash) { 333 PetscFunctionReturn(0); 334 } 335 336 /* make sure all processors are either in INSERTMODE or ADDMODE */ 337 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 338 if (addv == (ADD_VALUES|INSERT_VALUES)) { 339 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 340 } 341 mat->insertmode = addv; /* in case this processor had no cache */ 342 343 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 344 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 345 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 346 PetscFunctionReturn(0); 347 } 348 349 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 352 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 353 { 354 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 355 int i,j,rstart,ncols,n,ierr,flg; 356 int *row,*col,other_disassembled; 357 PetscScalar *val; 358 InsertMode addv = mat->insertmode; 359 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 360 PetscTruth flag; 361 #endif 362 363 PetscFunctionBegin; 364 if (!aij->donotstash) { 365 while (1) { 366 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 367 if (!flg) break; 368 369 for (i=0; i<n;) { 370 /* Now identify the consecutive vals belonging to the same row */ 371 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 372 if (j < n) ncols = j-i; 373 else ncols = n-i; 374 /* Now assemble all these values with a single function call */ 375 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 376 i = j; 377 } 378 } 379 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 380 } 381 382 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 383 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 384 385 /* determine if any processor has disassembled, if so we must 386 also disassemble ourselfs, in order that we may reassemble. */ 387 /* 388 if nonzero structure of submatrix B cannot change then we know that 389 no processor disassembled thus we can skip this stuff 390 */ 391 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 392 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 393 if (mat->was_assembled && !other_disassembled) { 394 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 } 397 398 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 399 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 400 } 401 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 402 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 403 404 if (aij->rowvalues) { 405 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 406 aij->rowvalues = 0; 407 } 408 #if defined(PETSC_HAVE_SUPERLUDIST) 409 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 410 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 411 #endif 412 413 #if defined(PETSC_HAVE_SPOOLES) 414 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 415 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 416 #endif 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 422 int MatZeroEntries_MPIAIJ(Mat A) 423 { 424 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 425 int ierr; 426 427 PetscFunctionBegin; 428 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 429 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatZeroRows_MPIAIJ" 435 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 436 { 437 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 438 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 439 int *procs,*nprocs,j,idx,nsends,*work,row; 440 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 441 int *rvalues,tag = A->tag,count,base,slen,n,*source; 442 int *lens,imdex,*lrows,*values,rstart=l->rstart; 443 MPI_Comm comm = A->comm; 444 MPI_Request *send_waits,*recv_waits; 445 MPI_Status recv_status,*send_status; 446 IS istmp; 447 PetscTruth found; 448 449 PetscFunctionBegin; 450 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 451 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 452 453 /* first count number of contributors to each processor */ 454 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 455 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 456 procs = nprocs + size; 457 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 458 for (i=0; i<N; i++) { 459 idx = rows[i]; 460 found = PETSC_FALSE; 461 for (j=0; j<size; j++) { 462 if (idx >= owners[j] && idx < owners[j+1]) { 463 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 464 } 465 } 466 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 467 } 468 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 469 470 /* inform other processors of number of messages and max length*/ 471 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 472 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 473 nrecvs = work[size+rank]; 474 nmax = work[rank]; 475 ierr = PetscFree(work);CHKERRQ(ierr); 476 477 /* post receives: */ 478 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 479 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 480 for (i=0; i<nrecvs; i++) { 481 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 482 } 483 484 /* do sends: 485 1) starts[i] gives the starting index in svalues for stuff going to 486 the ith processor 487 */ 488 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 489 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 490 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 491 starts[0] = 0; 492 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 493 for (i=0; i<N; i++) { 494 svalues[starts[owner[i]]++] = rows[i]; 495 } 496 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 497 498 starts[0] = 0; 499 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 500 count = 0; 501 for (i=0; i<size; i++) { 502 if (procs[i]) { 503 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 504 } 505 } 506 ierr = PetscFree(starts);CHKERRQ(ierr); 507 508 base = owners[rank]; 509 510 /* wait on receives */ 511 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 512 source = lens + nrecvs; 513 count = nrecvs; slen = 0; 514 while (count) { 515 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 516 /* unpack receives into our local space */ 517 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 518 source[imdex] = recv_status.MPI_SOURCE; 519 lens[imdex] = n; 520 slen += n; 521 count--; 522 } 523 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 524 525 /* move the data into the send scatter */ 526 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 527 count = 0; 528 for (i=0; i<nrecvs; i++) { 529 values = rvalues + i*nmax; 530 for (j=0; j<lens[i]; j++) { 531 lrows[count++] = values[j] - base; 532 } 533 } 534 ierr = PetscFree(rvalues);CHKERRQ(ierr); 535 ierr = PetscFree(lens);CHKERRQ(ierr); 536 ierr = PetscFree(owner);CHKERRQ(ierr); 537 ierr = PetscFree(nprocs);CHKERRQ(ierr); 538 539 /* actually zap the local rows */ 540 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 541 PetscLogObjectParent(A,istmp); 542 543 /* 544 Zero the required rows. If the "diagonal block" of the matrix 545 is square and the user wishes to set the diagonal we use seperate 546 code so that MatSetValues() is not called for each diagonal allocating 547 new memory, thus calling lots of mallocs and slowing things down. 548 549 Contributed by: Mathew Knepley 550 */ 551 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 552 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 553 if (diag && (l->A->M == l->A->N)) { 554 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 555 } else if (diag) { 556 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 557 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 558 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 559 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 560 } 561 for (i = 0; i < slen; i++) { 562 row = lrows[i] + rstart; 563 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 564 } 565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 567 } else { 568 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 569 } 570 ierr = ISDestroy(istmp);CHKERRQ(ierr); 571 ierr = PetscFree(lrows);CHKERRQ(ierr); 572 573 /* wait on sends */ 574 if (nsends) { 575 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 576 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 577 ierr = PetscFree(send_status);CHKERRQ(ierr); 578 } 579 ierr = PetscFree(send_waits);CHKERRQ(ierr); 580 ierr = PetscFree(svalues);CHKERRQ(ierr); 581 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatMult_MPIAIJ" 587 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 588 { 589 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 590 int ierr,nt; 591 592 PetscFunctionBegin; 593 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 594 if (nt != A->n) { 595 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 596 } 597 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 599 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatMultAdd_MPIAIJ" 606 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 607 { 608 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 609 int ierr; 610 611 PetscFunctionBegin; 612 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 613 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 614 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 615 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 619 #undef __FUNCT__ 620 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 621 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 622 { 623 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 624 int ierr; 625 626 PetscFunctionBegin; 627 /* do nondiagonal part */ 628 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 629 /* send it on its way */ 630 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 631 /* do local part */ 632 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 633 /* receive remote parts: note this assumes the values are not actually */ 634 /* inserted in yy until the next line, which is true for my implementation*/ 635 /* but is not perhaps always true. */ 636 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 642 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 643 { 644 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 645 int ierr; 646 647 PetscFunctionBegin; 648 /* do nondiagonal part */ 649 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 650 /* send it on its way */ 651 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 652 /* do local part */ 653 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 654 /* receive remote parts: note this assumes the values are not actually */ 655 /* inserted in yy until the next line, which is true for my implementation*/ 656 /* but is not perhaps always true. */ 657 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 } 660 661 /* 662 This only works correctly for square matrices where the subblock A->A is the 663 diagonal block 664 */ 665 #undef __FUNCT__ 666 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 667 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 668 { 669 int ierr; 670 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 671 672 PetscFunctionBegin; 673 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 674 if (a->rstart != a->cstart || a->rend != a->cend) { 675 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 676 } 677 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 #undef __FUNCT__ 682 #define __FUNCT__ "MatScale_MPIAIJ" 683 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 684 { 685 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 686 int ierr; 687 688 PetscFunctionBegin; 689 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 690 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatDestroy_MPIAIJ" 696 int MatDestroy_MPIAIJ(Mat mat) 697 { 698 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 699 int ierr; 700 701 PetscFunctionBegin; 702 #if defined(PETSC_USE_LOG) 703 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 704 #endif 705 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 706 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 707 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 708 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 709 #if defined (PETSC_USE_CTABLE) 710 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 711 #else 712 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 713 #endif 714 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 715 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 716 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 717 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 718 ierr = PetscFree(aij);CHKERRQ(ierr); 719 PetscFunctionReturn(0); 720 } 721 722 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 723 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 727 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 730 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 731 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 732 PetscTruth isdraw,isascii,flg; 733 PetscViewer sviewer; 734 PetscViewerFormat format; 735 736 PetscFunctionBegin; 737 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 738 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 739 if (isascii) { 740 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 741 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 742 MatInfo info; 743 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 744 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 745 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 746 if (flg) { 747 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 748 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 749 } else { 750 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 751 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 752 } 753 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 754 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 755 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 756 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 757 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 758 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } else if (format == PETSC_VIEWER_ASCII_INFO) { 761 PetscFunctionReturn(0); 762 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 763 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 764 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 765 #endif 766 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 767 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 768 #endif 769 770 PetscFunctionReturn(0); 771 } 772 } else if (isdraw) { 773 PetscDraw draw; 774 PetscTruth isnull; 775 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 776 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 777 } 778 779 if (size == 1) { 780 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 781 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 782 } else { 783 /* assemble the entire matrix onto first processor. */ 784 Mat A; 785 Mat_SeqAIJ *Aloc; 786 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 787 PetscScalar *a; 788 789 if (!rank) { 790 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 791 } else { 792 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 793 } 794 PetscLogObjectParent(mat,A); 795 796 /* copy over the A part */ 797 Aloc = (Mat_SeqAIJ*)aij->A->data; 798 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 799 row = aij->rstart; 800 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 801 for (i=0; i<m; i++) { 802 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 803 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 804 } 805 aj = Aloc->j; 806 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 807 808 /* copy over the B part */ 809 Aloc = (Mat_SeqAIJ*)aij->B->data; 810 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 811 row = aij->rstart; 812 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 813 ct = cols; 814 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 815 for (i=0; i<m; i++) { 816 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 817 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 818 } 819 ierr = PetscFree(ct);CHKERRQ(ierr); 820 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 821 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 /* 823 Everyone has to call to draw the matrix since the graphics waits are 824 synchronized across all processors that share the PetscDraw object 825 */ 826 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 827 if (!rank) { 828 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 829 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 830 } 831 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 832 ierr = MatDestroy(A);CHKERRQ(ierr); 833 } 834 PetscFunctionReturn(0); 835 } 836 837 #undef __FUNCT__ 838 #define __FUNCT__ "MatView_MPIAIJ" 839 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 840 { 841 int ierr; 842 PetscTruth isascii,isdraw,issocket,isbinary; 843 844 PetscFunctionBegin; 845 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 846 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 847 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 848 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 849 if (isascii || isdraw || isbinary || issocket) { 850 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 851 } else { 852 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 853 } 854 PetscFunctionReturn(0); 855 } 856 857 858 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatRelax_MPIAIJ" 861 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 862 { 863 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 864 int ierr; 865 Vec bb1; 866 PetscScalar mone=-1.0; 867 868 PetscFunctionBegin; 869 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 870 871 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 872 873 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 874 if (flag & SOR_ZERO_INITIAL_GUESS) { 875 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 876 its--; 877 } 878 879 while (its--) { 880 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 881 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 882 883 /* update rhs: bb1 = bb - B*x */ 884 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 885 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 886 887 /* local sweep */ 888 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 889 CHKERRQ(ierr); 890 } 891 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 892 if (flag & SOR_ZERO_INITIAL_GUESS) { 893 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 894 its--; 895 } 896 while (its--) { 897 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 898 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 899 900 /* update rhs: bb1 = bb - B*x */ 901 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 902 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 903 904 /* local sweep */ 905 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 906 CHKERRQ(ierr); 907 } 908 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 909 if (flag & SOR_ZERO_INITIAL_GUESS) { 910 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 911 its--; 912 } 913 while (its--) { 914 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 915 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 916 917 /* update rhs: bb1 = bb - B*x */ 918 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 919 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 920 921 /* local sweep */ 922 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 923 CHKERRQ(ierr); 924 } 925 } else { 926 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 927 } 928 929 ierr = VecDestroy(bb1);CHKERRQ(ierr); 930 PetscFunctionReturn(0); 931 } 932 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatGetInfo_MPIAIJ" 935 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 936 { 937 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 938 Mat A = mat->A,B = mat->B; 939 int ierr; 940 PetscReal isend[5],irecv[5]; 941 942 PetscFunctionBegin; 943 info->block_size = 1.0; 944 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 945 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 946 isend[3] = info->memory; isend[4] = info->mallocs; 947 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 948 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 949 isend[3] += info->memory; isend[4] += info->mallocs; 950 if (flag == MAT_LOCAL) { 951 info->nz_used = isend[0]; 952 info->nz_allocated = isend[1]; 953 info->nz_unneeded = isend[2]; 954 info->memory = isend[3]; 955 info->mallocs = isend[4]; 956 } else if (flag == MAT_GLOBAL_MAX) { 957 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 958 info->nz_used = irecv[0]; 959 info->nz_allocated = irecv[1]; 960 info->nz_unneeded = irecv[2]; 961 info->memory = irecv[3]; 962 info->mallocs = irecv[4]; 963 } else if (flag == MAT_GLOBAL_SUM) { 964 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 965 info->nz_used = irecv[0]; 966 info->nz_allocated = irecv[1]; 967 info->nz_unneeded = irecv[2]; 968 info->memory = irecv[3]; 969 info->mallocs = irecv[4]; 970 } 971 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 972 info->fill_ratio_needed = 0; 973 info->factor_mallocs = 0; 974 info->rows_global = (double)matin->M; 975 info->columns_global = (double)matin->N; 976 info->rows_local = (double)matin->m; 977 info->columns_local = (double)matin->N; 978 979 PetscFunctionReturn(0); 980 } 981 982 #undef __FUNCT__ 983 #define __FUNCT__ "MatSetOption_MPIAIJ" 984 int MatSetOption_MPIAIJ(Mat A,MatOption op) 985 { 986 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 987 int ierr; 988 989 PetscFunctionBegin; 990 switch (op) { 991 case MAT_NO_NEW_NONZERO_LOCATIONS: 992 case MAT_YES_NEW_NONZERO_LOCATIONS: 993 case MAT_COLUMNS_UNSORTED: 994 case MAT_COLUMNS_SORTED: 995 case MAT_NEW_NONZERO_ALLOCATION_ERR: 996 case MAT_KEEP_ZEROED_ROWS: 997 case MAT_NEW_NONZERO_LOCATION_ERR: 998 case MAT_USE_INODES: 999 case MAT_DO_NOT_USE_INODES: 1000 case MAT_IGNORE_ZERO_ENTRIES: 1001 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1002 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1003 break; 1004 case MAT_ROW_ORIENTED: 1005 a->roworiented = PETSC_TRUE; 1006 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1007 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1008 break; 1009 case MAT_ROWS_SORTED: 1010 case MAT_ROWS_UNSORTED: 1011 case MAT_YES_NEW_DIAGONALS: 1012 case MAT_USE_SINGLE_PRECISION_SOLVES: 1013 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1014 break; 1015 case MAT_COLUMN_ORIENTED: 1016 a->roworiented = PETSC_FALSE; 1017 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1018 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1019 break; 1020 case MAT_IGNORE_OFF_PROC_ENTRIES: 1021 a->donotstash = PETSC_TRUE; 1022 break; 1023 case MAT_NO_NEW_DIAGONALS: 1024 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1025 default: 1026 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1027 } 1028 PetscFunctionReturn(0); 1029 } 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatGetRow_MPIAIJ" 1033 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1034 { 1035 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1036 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1037 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1038 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1039 int *cmap,*idx_p; 1040 1041 PetscFunctionBegin; 1042 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1043 mat->getrowactive = PETSC_TRUE; 1044 1045 if (!mat->rowvalues && (idx || v)) { 1046 /* 1047 allocate enough space to hold information from the longest row. 1048 */ 1049 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1050 int max = 1,tmp; 1051 for (i=0; i<matin->m; i++) { 1052 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1053 if (max < tmp) { max = tmp; } 1054 } 1055 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1056 mat->rowindices = (int*)(mat->rowvalues + max); 1057 } 1058 1059 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1060 lrow = row - rstart; 1061 1062 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1063 if (!v) {pvA = 0; pvB = 0;} 1064 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1065 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1066 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1067 nztot = nzA + nzB; 1068 1069 cmap = mat->garray; 1070 if (v || idx) { 1071 if (nztot) { 1072 /* Sort by increasing column numbers, assuming A and B already sorted */ 1073 int imark = -1; 1074 if (v) { 1075 *v = v_p = mat->rowvalues; 1076 for (i=0; i<nzB; i++) { 1077 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1078 else break; 1079 } 1080 imark = i; 1081 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1082 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1083 } 1084 if (idx) { 1085 *idx = idx_p = mat->rowindices; 1086 if (imark > -1) { 1087 for (i=0; i<imark; i++) { 1088 idx_p[i] = cmap[cworkB[i]]; 1089 } 1090 } else { 1091 for (i=0; i<nzB; i++) { 1092 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1093 else break; 1094 } 1095 imark = i; 1096 } 1097 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1098 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1099 } 1100 } else { 1101 if (idx) *idx = 0; 1102 if (v) *v = 0; 1103 } 1104 } 1105 *nz = nztot; 1106 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1107 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1108 PetscFunctionReturn(0); 1109 } 1110 1111 #undef __FUNCT__ 1112 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1113 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1114 { 1115 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1116 1117 PetscFunctionBegin; 1118 if (aij->getrowactive == PETSC_FALSE) { 1119 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1120 } 1121 aij->getrowactive = PETSC_FALSE; 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatNorm_MPIAIJ" 1127 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1128 { 1129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1130 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1131 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1132 PetscReal sum = 0.0; 1133 PetscScalar *v; 1134 1135 PetscFunctionBegin; 1136 if (aij->size == 1) { 1137 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1138 } else { 1139 if (type == NORM_FROBENIUS) { 1140 v = amat->a; 1141 for (i=0; i<amat->nz; i++) { 1142 #if defined(PETSC_USE_COMPLEX) 1143 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1144 #else 1145 sum += (*v)*(*v); v++; 1146 #endif 1147 } 1148 v = bmat->a; 1149 for (i=0; i<bmat->nz; i++) { 1150 #if defined(PETSC_USE_COMPLEX) 1151 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1152 #else 1153 sum += (*v)*(*v); v++; 1154 #endif 1155 } 1156 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1157 *norm = sqrt(*norm); 1158 } else if (type == NORM_1) { /* max column norm */ 1159 PetscReal *tmp,*tmp2; 1160 int *jj,*garray = aij->garray; 1161 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1162 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1163 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1164 *norm = 0.0; 1165 v = amat->a; jj = amat->j; 1166 for (j=0; j<amat->nz; j++) { 1167 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1168 } 1169 v = bmat->a; jj = bmat->j; 1170 for (j=0; j<bmat->nz; j++) { 1171 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1172 } 1173 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1174 for (j=0; j<mat->N; j++) { 1175 if (tmp2[j] > *norm) *norm = tmp2[j]; 1176 } 1177 ierr = PetscFree(tmp);CHKERRQ(ierr); 1178 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1179 } else if (type == NORM_INFINITY) { /* max row norm */ 1180 PetscReal ntemp = 0.0; 1181 for (j=0; j<aij->A->m; j++) { 1182 v = amat->a + amat->i[j] + shift; 1183 sum = 0.0; 1184 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1185 sum += PetscAbsScalar(*v); v++; 1186 } 1187 v = bmat->a + bmat->i[j] + shift; 1188 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1189 sum += PetscAbsScalar(*v); v++; 1190 } 1191 if (sum > ntemp) ntemp = sum; 1192 } 1193 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1194 } else { 1195 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1196 } 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatTranspose_MPIAIJ" 1203 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1204 { 1205 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1206 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1207 int ierr,shift = Aloc->indexshift; 1208 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1209 Mat B; 1210 PetscScalar *array; 1211 1212 PetscFunctionBegin; 1213 if (!matout && M != N) { 1214 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1215 } 1216 1217 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1218 1219 /* copy over the A part */ 1220 Aloc = (Mat_SeqAIJ*)a->A->data; 1221 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1222 row = a->rstart; 1223 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1224 for (i=0; i<m; i++) { 1225 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1226 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1227 } 1228 aj = Aloc->j; 1229 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1230 1231 /* copy over the B part */ 1232 Aloc = (Mat_SeqAIJ*)a->B->data; 1233 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1234 row = a->rstart; 1235 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1236 ct = cols; 1237 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1238 for (i=0; i<m; i++) { 1239 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1240 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1241 } 1242 ierr = PetscFree(ct);CHKERRQ(ierr); 1243 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1244 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1245 if (matout) { 1246 *matout = B; 1247 } else { 1248 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1249 } 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1255 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1256 { 1257 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1258 Mat a = aij->A,b = aij->B; 1259 int ierr,s1,s2,s3; 1260 1261 PetscFunctionBegin; 1262 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1263 if (rr) { 1264 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1265 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1266 /* Overlap communication with computation. */ 1267 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1268 } 1269 if (ll) { 1270 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1271 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1272 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1273 } 1274 /* scale the diagonal block */ 1275 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1276 1277 if (rr) { 1278 /* Do a scatter end and then right scale the off-diagonal block */ 1279 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1280 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1281 } 1282 1283 PetscFunctionReturn(0); 1284 } 1285 1286 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1289 int MatPrintHelp_MPIAIJ(Mat A) 1290 { 1291 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1292 int ierr; 1293 1294 PetscFunctionBegin; 1295 if (!a->rank) { 1296 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1297 } 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1303 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1304 { 1305 PetscFunctionBegin; 1306 *bs = 1; 1307 PetscFunctionReturn(0); 1308 } 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1311 int MatSetUnfactored_MPIAIJ(Mat A) 1312 { 1313 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1314 int ierr; 1315 1316 PetscFunctionBegin; 1317 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatEqual_MPIAIJ" 1323 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1324 { 1325 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1326 Mat a,b,c,d; 1327 PetscTruth flg; 1328 int ierr; 1329 1330 PetscFunctionBegin; 1331 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1332 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1333 a = matA->A; b = matA->B; 1334 c = matB->A; d = matB->B; 1335 1336 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1337 if (flg == PETSC_TRUE) { 1338 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1339 } 1340 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatCopy_MPIAIJ" 1346 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1347 { 1348 int ierr; 1349 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1350 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1351 PetscTruth flg; 1352 1353 PetscFunctionBegin; 1354 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1355 if (str != SAME_NONZERO_PATTERN || !flg) { 1356 /* because of the column compression in the off-processor part of the matrix a->B, 1357 the number of columns in a->B and b->B may be different, hence we cannot call 1358 the MatCopy() directly on the two parts. If need be, we can provide a more 1359 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1360 then copying the submatrices */ 1361 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1362 } else { 1363 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1364 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1365 } 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1371 int MatSetUpPreallocation_MPIAIJ(Mat A) 1372 { 1373 int ierr; 1374 1375 PetscFunctionBegin; 1376 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1381 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1382 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1383 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1384 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1385 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1386 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1387 #endif 1388 1389 #include "petscblaslapack.h" 1390 1391 #undef __FUNCT__ 1392 #define __FUNCT__ "MatAXPY_MPIAIJ" 1393 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1394 { 1395 int ierr,one; 1396 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1397 Mat_SeqAIJ *x,*y; 1398 1399 PetscFunctionBegin; 1400 if (str == SAME_NONZERO_PATTERN) { 1401 x = (Mat_SeqAIJ *)xx->A->data; 1402 y = (Mat_SeqAIJ *)yy->A->data; 1403 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1404 x = (Mat_SeqAIJ *)xx->B->data; 1405 y = (Mat_SeqAIJ *)yy->B->data; 1406 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1407 } else { 1408 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1409 } 1410 PetscFunctionReturn(0); 1411 } 1412 1413 /* -------------------------------------------------------------------*/ 1414 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1415 MatGetRow_MPIAIJ, 1416 MatRestoreRow_MPIAIJ, 1417 MatMult_MPIAIJ, 1418 MatMultAdd_MPIAIJ, 1419 MatMultTranspose_MPIAIJ, 1420 MatMultTransposeAdd_MPIAIJ, 1421 0, 1422 0, 1423 0, 1424 0, 1425 0, 1426 0, 1427 MatRelax_MPIAIJ, 1428 MatTranspose_MPIAIJ, 1429 MatGetInfo_MPIAIJ, 1430 MatEqual_MPIAIJ, 1431 MatGetDiagonal_MPIAIJ, 1432 MatDiagonalScale_MPIAIJ, 1433 MatNorm_MPIAIJ, 1434 MatAssemblyBegin_MPIAIJ, 1435 MatAssemblyEnd_MPIAIJ, 1436 0, 1437 MatSetOption_MPIAIJ, 1438 MatZeroEntries_MPIAIJ, 1439 MatZeroRows_MPIAIJ, 1440 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1441 MatLUFactorSymbolic_MPIAIJ_TFS, 1442 #else 1443 0, 1444 #endif 1445 0, 1446 0, 1447 0, 1448 MatSetUpPreallocation_MPIAIJ, 1449 0, 1450 0, 1451 0, 1452 0, 1453 MatDuplicate_MPIAIJ, 1454 0, 1455 0, 1456 0, 1457 0, 1458 MatAXPY_MPIAIJ, 1459 MatGetSubMatrices_MPIAIJ, 1460 MatIncreaseOverlap_MPIAIJ, 1461 MatGetValues_MPIAIJ, 1462 MatCopy_MPIAIJ, 1463 MatPrintHelp_MPIAIJ, 1464 MatScale_MPIAIJ, 1465 0, 1466 0, 1467 0, 1468 MatGetBlockSize_MPIAIJ, 1469 0, 1470 0, 1471 0, 1472 0, 1473 MatFDColoringCreate_MPIAIJ, 1474 0, 1475 MatSetUnfactored_MPIAIJ, 1476 0, 1477 0, 1478 MatGetSubMatrix_MPIAIJ, 1479 MatDestroy_MPIAIJ, 1480 MatView_MPIAIJ, 1481 MatGetPetscMaps_Petsc, 1482 0, 1483 0, 1484 0, 1485 0, 1486 0, 1487 0, 1488 0, 1489 0, 1490 MatSetColoring_MPIAIJ, 1491 MatSetValuesAdic_MPIAIJ, 1492 MatSetValuesAdifor_MPIAIJ 1493 }; 1494 1495 /* ----------------------------------------------------------------------------------------*/ 1496 1497 EXTERN_C_BEGIN 1498 #undef __FUNCT__ 1499 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1500 int MatStoreValues_MPIAIJ(Mat mat) 1501 { 1502 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1503 int ierr; 1504 1505 PetscFunctionBegin; 1506 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1507 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1508 PetscFunctionReturn(0); 1509 } 1510 EXTERN_C_END 1511 1512 EXTERN_C_BEGIN 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1515 int MatRetrieveValues_MPIAIJ(Mat mat) 1516 { 1517 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1518 int ierr; 1519 1520 PetscFunctionBegin; 1521 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1522 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1523 PetscFunctionReturn(0); 1524 } 1525 EXTERN_C_END 1526 1527 #include "petscpc.h" 1528 EXTERN_C_BEGIN 1529 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1530 EXTERN_C_END 1531 1532 EXTERN_C_BEGIN 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "MatCreate_MPIAIJ" 1535 int MatCreate_MPIAIJ(Mat B) 1536 { 1537 Mat_MPIAIJ *b; 1538 int ierr,i,size; 1539 1540 PetscFunctionBegin; 1541 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1542 1543 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1544 B->data = (void*)b; 1545 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1546 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1547 B->factor = 0; 1548 B->assembled = PETSC_FALSE; 1549 B->mapping = 0; 1550 1551 B->insertmode = NOT_SET_VALUES; 1552 b->size = size; 1553 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1554 1555 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1556 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1557 1558 /* the information in the maps duplicates the information computed below, eventually 1559 we should remove the duplicate information that is not contained in the maps */ 1560 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1561 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1562 1563 /* build local table of row and column ownerships */ 1564 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1565 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1566 b->cowners = b->rowners + b->size + 2; 1567 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1568 b->rowners[0] = 0; 1569 for (i=2; i<=b->size; i++) { 1570 b->rowners[i] += b->rowners[i-1]; 1571 } 1572 b->rstart = b->rowners[b->rank]; 1573 b->rend = b->rowners[b->rank+1]; 1574 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1575 b->cowners[0] = 0; 1576 for (i=2; i<=b->size; i++) { 1577 b->cowners[i] += b->cowners[i-1]; 1578 } 1579 b->cstart = b->cowners[b->rank]; 1580 b->cend = b->cowners[b->rank+1]; 1581 1582 /* build cache for off array entries formed */ 1583 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1584 b->donotstash = PETSC_FALSE; 1585 b->colmap = 0; 1586 b->garray = 0; 1587 b->roworiented = PETSC_TRUE; 1588 1589 /* stuff used for matrix vector multiply */ 1590 b->lvec = PETSC_NULL; 1591 b->Mvctx = PETSC_NULL; 1592 1593 /* stuff for MatGetRow() */ 1594 b->rowindices = 0; 1595 b->rowvalues = 0; 1596 b->getrowactive = PETSC_FALSE; 1597 1598 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1599 "MatStoreValues_MPIAIJ", 1600 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1601 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1602 "MatRetrieveValues_MPIAIJ", 1603 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1604 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1605 "MatGetDiagonalBlock_MPIAIJ", 1606 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1607 1608 PetscFunctionReturn(0); 1609 } 1610 EXTERN_C_END 1611 1612 #undef __FUNCT__ 1613 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1614 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1615 { 1616 Mat mat; 1617 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1618 int ierr; 1619 1620 PetscFunctionBegin; 1621 *newmat = 0; 1622 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1623 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1624 a = (Mat_MPIAIJ*)mat->data; 1625 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1626 mat->factor = matin->factor; 1627 mat->assembled = PETSC_TRUE; 1628 mat->insertmode = NOT_SET_VALUES; 1629 mat->preallocated = PETSC_TRUE; 1630 1631 a->rstart = oldmat->rstart; 1632 a->rend = oldmat->rend; 1633 a->cstart = oldmat->cstart; 1634 a->cend = oldmat->cend; 1635 a->size = oldmat->size; 1636 a->rank = oldmat->rank; 1637 a->donotstash = oldmat->donotstash; 1638 a->roworiented = oldmat->roworiented; 1639 a->rowindices = 0; 1640 a->rowvalues = 0; 1641 a->getrowactive = PETSC_FALSE; 1642 1643 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1644 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1645 if (oldmat->colmap) { 1646 #if defined (PETSC_USE_CTABLE) 1647 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1648 #else 1649 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1650 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1651 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1652 #endif 1653 } else a->colmap = 0; 1654 if (oldmat->garray) { 1655 int len; 1656 len = oldmat->B->n; 1657 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1658 PetscLogObjectMemory(mat,len*sizeof(int)); 1659 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1660 } else a->garray = 0; 1661 1662 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1663 PetscLogObjectParent(mat,a->lvec); 1664 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1665 PetscLogObjectParent(mat,a->Mvctx); 1666 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1667 PetscLogObjectParent(mat,a->A); 1668 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1669 PetscLogObjectParent(mat,a->B); 1670 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1671 *newmat = mat; 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #include "petscsys.h" 1676 1677 EXTERN_C_BEGIN 1678 #undef __FUNCT__ 1679 #define __FUNCT__ "MatLoad_MPIAIJ" 1680 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1681 { 1682 Mat A; 1683 PetscScalar *vals,*svals; 1684 MPI_Comm comm = ((PetscObject)viewer)->comm; 1685 MPI_Status status; 1686 int i,nz,ierr,j,rstart,rend,fd; 1687 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1688 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1689 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1690 1691 PetscFunctionBegin; 1692 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1693 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1694 if (!rank) { 1695 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1696 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1697 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1698 if (header[3] < 0) { 1699 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1700 } 1701 } 1702 1703 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1704 M = header[1]; N = header[2]; 1705 /* determine ownership of all rows */ 1706 m = M/size + ((M % size) > rank); 1707 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1708 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1709 rowners[0] = 0; 1710 for (i=2; i<=size; i++) { 1711 rowners[i] += rowners[i-1]; 1712 } 1713 rstart = rowners[rank]; 1714 rend = rowners[rank+1]; 1715 1716 /* distribute row lengths to all processors */ 1717 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1718 offlens = ourlens + (rend-rstart); 1719 if (!rank) { 1720 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1721 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1722 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1723 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1724 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1725 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1726 } else { 1727 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1728 } 1729 1730 if (!rank) { 1731 /* calculate the number of nonzeros on each processor */ 1732 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1733 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1734 for (i=0; i<size; i++) { 1735 for (j=rowners[i]; j< rowners[i+1]; j++) { 1736 procsnz[i] += rowlengths[j]; 1737 } 1738 } 1739 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1740 1741 /* determine max buffer needed and allocate it */ 1742 maxnz = 0; 1743 for (i=0; i<size; i++) { 1744 maxnz = PetscMax(maxnz,procsnz[i]); 1745 } 1746 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1747 1748 /* read in my part of the matrix column indices */ 1749 nz = procsnz[0]; 1750 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1751 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1752 1753 /* read in every one elses and ship off */ 1754 for (i=1; i<size; i++) { 1755 nz = procsnz[i]; 1756 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1757 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1758 } 1759 ierr = PetscFree(cols);CHKERRQ(ierr); 1760 } else { 1761 /* determine buffer space needed for message */ 1762 nz = 0; 1763 for (i=0; i<m; i++) { 1764 nz += ourlens[i]; 1765 } 1766 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1767 1768 /* receive message of column indices*/ 1769 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1770 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1771 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1772 } 1773 1774 /* determine column ownership if matrix is not square */ 1775 if (N != M) { 1776 n = N/size + ((N % size) > rank); 1777 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1778 cstart = cend - n; 1779 } else { 1780 cstart = rstart; 1781 cend = rend; 1782 n = cend - cstart; 1783 } 1784 1785 /* loop over local rows, determining number of off diagonal entries */ 1786 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1787 jj = 0; 1788 for (i=0; i<m; i++) { 1789 for (j=0; j<ourlens[i]; j++) { 1790 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1791 jj++; 1792 } 1793 } 1794 1795 /* create our matrix */ 1796 for (i=0; i<m; i++) { 1797 ourlens[i] -= offlens[i]; 1798 } 1799 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1800 A = *newmat; 1801 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1802 for (i=0; i<m; i++) { 1803 ourlens[i] += offlens[i]; 1804 } 1805 1806 if (!rank) { 1807 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1808 1809 /* read in my part of the matrix numerical values */ 1810 nz = procsnz[0]; 1811 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1812 1813 /* insert into matrix */ 1814 jj = rstart; 1815 smycols = mycols; 1816 svals = vals; 1817 for (i=0; i<m; i++) { 1818 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1819 smycols += ourlens[i]; 1820 svals += ourlens[i]; 1821 jj++; 1822 } 1823 1824 /* read in other processors and ship out */ 1825 for (i=1; i<size; i++) { 1826 nz = procsnz[i]; 1827 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1828 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1829 } 1830 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1831 } else { 1832 /* receive numeric values */ 1833 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1834 1835 /* receive message of values*/ 1836 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1837 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1838 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1839 1840 /* insert into matrix */ 1841 jj = rstart; 1842 smycols = mycols; 1843 svals = vals; 1844 for (i=0; i<m; i++) { 1845 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1846 smycols += ourlens[i]; 1847 svals += ourlens[i]; 1848 jj++; 1849 } 1850 } 1851 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1852 ierr = PetscFree(vals);CHKERRQ(ierr); 1853 ierr = PetscFree(mycols);CHKERRQ(ierr); 1854 ierr = PetscFree(rowners);CHKERRQ(ierr); 1855 1856 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1857 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1858 PetscFunctionReturn(0); 1859 } 1860 EXTERN_C_END 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1864 /* 1865 Not great since it makes two copies of the submatrix, first an SeqAIJ 1866 in local and then by concatenating the local matrices the end result. 1867 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1868 */ 1869 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1870 { 1871 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1872 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1873 Mat *local,M,Mreuse; 1874 PetscScalar *vwork,*aa; 1875 MPI_Comm comm = mat->comm; 1876 Mat_SeqAIJ *aij; 1877 1878 1879 PetscFunctionBegin; 1880 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1881 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1882 1883 if (call == MAT_REUSE_MATRIX) { 1884 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1885 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1886 local = &Mreuse; 1887 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1888 } else { 1889 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1890 Mreuse = *local; 1891 ierr = PetscFree(local);CHKERRQ(ierr); 1892 } 1893 1894 /* 1895 m - number of local rows 1896 n - number of columns (same on all processors) 1897 rstart - first row in new global matrix generated 1898 */ 1899 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1900 if (call == MAT_INITIAL_MATRIX) { 1901 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1902 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1903 ii = aij->i; 1904 jj = aij->j; 1905 1906 /* 1907 Determine the number of non-zeros in the diagonal and off-diagonal 1908 portions of the matrix in order to do correct preallocation 1909 */ 1910 1911 /* first get start and end of "diagonal" columns */ 1912 if (csize == PETSC_DECIDE) { 1913 nlocal = n/size + ((n % size) > rank); 1914 } else { 1915 nlocal = csize; 1916 } 1917 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1918 rstart = rend - nlocal; 1919 if (rank == size - 1 && rend != n) { 1920 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1921 } 1922 1923 /* next, compute all the lengths */ 1924 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1925 olens = dlens + m; 1926 for (i=0; i<m; i++) { 1927 jend = ii[i+1] - ii[i]; 1928 olen = 0; 1929 dlen = 0; 1930 for (j=0; j<jend; j++) { 1931 if (*jj < rstart || *jj >= rend) olen++; 1932 else dlen++; 1933 jj++; 1934 } 1935 olens[i] = olen; 1936 dlens[i] = dlen; 1937 } 1938 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1939 ierr = PetscFree(dlens);CHKERRQ(ierr); 1940 } else { 1941 int ml,nl; 1942 1943 M = *newmat; 1944 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1945 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1946 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1947 /* 1948 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1949 rather than the slower MatSetValues(). 1950 */ 1951 M->was_assembled = PETSC_TRUE; 1952 M->assembled = PETSC_FALSE; 1953 } 1954 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1955 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1956 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1957 ii = aij->i; 1958 jj = aij->j; 1959 aa = aij->a; 1960 for (i=0; i<m; i++) { 1961 row = rstart + i; 1962 nz = ii[i+1] - ii[i]; 1963 cwork = jj; jj += nz; 1964 vwork = aa; aa += nz; 1965 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1966 } 1967 1968 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1969 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1970 *newmat = M; 1971 1972 /* save submatrix used in processor for next request */ 1973 if (call == MAT_INITIAL_MATRIX) { 1974 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1975 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1976 } 1977 1978 PetscFunctionReturn(0); 1979 } 1980 1981 #undef __FUNCT__ 1982 #define __FUNCT__ "MatMPIAIJSetPreallocation" 1983 /*@C 1984 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1985 (the default parallel PETSc format). For good matrix assembly performance 1986 the user should preallocate the matrix storage by setting the parameters 1987 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1988 performance can be increased by more than a factor of 50. 1989 1990 Collective on MPI_Comm 1991 1992 Input Parameters: 1993 + A - the matrix 1994 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1995 (same value is used for all local rows) 1996 . d_nnz - array containing the number of nonzeros in the various rows of the 1997 DIAGONAL portion of the local submatrix (possibly different for each row) 1998 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1999 The size of this array is equal to the number of local rows, i.e 'm'. 2000 You must leave room for the diagonal entry even if it is zero. 2001 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2002 submatrix (same value is used for all local rows). 2003 - o_nnz - array containing the number of nonzeros in the various rows of the 2004 OFF-DIAGONAL portion of the local submatrix (possibly different for 2005 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2006 structure. The size of this array is equal to the number 2007 of local rows, i.e 'm'. 2008 2009 The AIJ format (also called the Yale sparse matrix format or 2010 compressed row storage), is fully compatible with standard Fortran 77 2011 storage. That is, the stored row and column indices can begin at 2012 either one (as in Fortran) or zero. See the users manual for details. 2013 2014 The user MUST specify either the local or global matrix dimensions 2015 (possibly both). 2016 2017 The parallel matrix is partitioned such that the first m0 rows belong to 2018 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2019 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2020 2021 The DIAGONAL portion of the local submatrix of a processor can be defined 2022 as the submatrix which is obtained by extraction the part corresponding 2023 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2024 first row that belongs to the processor, and r2 is the last row belonging 2025 to the this processor. This is a square mxm matrix. The remaining portion 2026 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2027 2028 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2029 2030 By default, this format uses inodes (identical nodes) when possible. 2031 We search for consecutive rows with the same nonzero structure, thereby 2032 reusing matrix information to achieve increased efficiency. 2033 2034 Options Database Keys: 2035 + -mat_aij_no_inode - Do not use inodes 2036 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2037 - -mat_aij_oneindex - Internally use indexing starting at 1 2038 rather than 0. Note that when calling MatSetValues(), 2039 the user still MUST index entries starting at 0! 2040 2041 Example usage: 2042 2043 Consider the following 8x8 matrix with 34 non-zero values, that is 2044 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2045 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2046 as follows: 2047 2048 .vb 2049 1 2 0 | 0 3 0 | 0 4 2050 Proc0 0 5 6 | 7 0 0 | 8 0 2051 9 0 10 | 11 0 0 | 12 0 2052 ------------------------------------- 2053 13 0 14 | 15 16 17 | 0 0 2054 Proc1 0 18 0 | 19 20 21 | 0 0 2055 0 0 0 | 22 23 0 | 24 0 2056 ------------------------------------- 2057 Proc2 25 26 27 | 0 0 28 | 29 0 2058 30 0 0 | 31 32 33 | 0 34 2059 .ve 2060 2061 This can be represented as a collection of submatrices as: 2062 2063 .vb 2064 A B C 2065 D E F 2066 G H I 2067 .ve 2068 2069 Where the submatrices A,B,C are owned by proc0, D,E,F are 2070 owned by proc1, G,H,I are owned by proc2. 2071 2072 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2073 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2074 The 'M','N' parameters are 8,8, and have the same values on all procs. 2075 2076 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2077 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2078 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2079 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2080 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2081 matrix, ans [DF] as another SeqAIJ matrix. 2082 2083 When d_nz, o_nz parameters are specified, d_nz storage elements are 2084 allocated for every row of the local diagonal submatrix, and o_nz 2085 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2086 One way to choose d_nz and o_nz is to use the max nonzerors per local 2087 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2088 In this case, the values of d_nz,o_nz are: 2089 .vb 2090 proc0 : dnz = 2, o_nz = 2 2091 proc1 : dnz = 3, o_nz = 2 2092 proc2 : dnz = 1, o_nz = 4 2093 .ve 2094 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2095 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2096 for proc3. i.e we are using 12+15+10=37 storage locations to store 2097 34 values. 2098 2099 When d_nnz, o_nnz parameters are specified, the storage is specified 2100 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2101 In the above case the values for d_nnz,o_nnz are: 2102 .vb 2103 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2104 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2105 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2106 .ve 2107 Here the space allocated is sum of all the above values i.e 34, and 2108 hence pre-allocation is perfect. 2109 2110 Level: intermediate 2111 2112 .keywords: matrix, aij, compressed row, sparse, parallel 2113 2114 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2115 @*/ 2116 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2117 { 2118 Mat_MPIAIJ *b; 2119 int ierr,i; 2120 PetscTruth flg2; 2121 2122 PetscFunctionBegin; 2123 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2124 if (!flg2) PetscFunctionReturn(0); 2125 B->preallocated = PETSC_TRUE; 2126 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2127 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2128 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2129 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2130 if (d_nnz) { 2131 for (i=0; i<B->m; i++) { 2132 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2133 } 2134 } 2135 if (o_nnz) { 2136 for (i=0; i<B->m; i++) { 2137 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2138 } 2139 } 2140 b = (Mat_MPIAIJ*)B->data; 2141 2142 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2143 PetscLogObjectParent(B,b->A); 2144 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2145 PetscLogObjectParent(B,b->B); 2146 2147 PetscFunctionReturn(0); 2148 } 2149 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "MatCreateMPIAIJ" 2152 /*@C 2153 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2154 (the default parallel PETSc format). For good matrix assembly performance 2155 the user should preallocate the matrix storage by setting the parameters 2156 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2157 performance can be increased by more than a factor of 50. 2158 2159 Collective on MPI_Comm 2160 2161 Input Parameters: 2162 + comm - MPI communicator 2163 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2164 This value should be the same as the local size used in creating the 2165 y vector for the matrix-vector product y = Ax. 2166 . n - This value should be the same as the local size used in creating the 2167 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2168 calculated if N is given) For square matrices n is almost always m. 2169 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2170 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2171 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2172 (same value is used for all local rows) 2173 . d_nnz - array containing the number of nonzeros in the various rows of the 2174 DIAGONAL portion of the local submatrix (possibly different for each row) 2175 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2176 The size of this array is equal to the number of local rows, i.e 'm'. 2177 You must leave room for the diagonal entry even if it is zero. 2178 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2179 submatrix (same value is used for all local rows). 2180 - o_nnz - array containing the number of nonzeros in the various rows of the 2181 OFF-DIAGONAL portion of the local submatrix (possibly different for 2182 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2183 structure. The size of this array is equal to the number 2184 of local rows, i.e 'm'. 2185 2186 Output Parameter: 2187 . A - the matrix 2188 2189 Notes: 2190 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2191 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2192 storage requirements for this matrix. 2193 2194 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2195 processor than it must be used on all processors that share the object for 2196 that argument. 2197 2198 The AIJ format (also called the Yale sparse matrix format or 2199 compressed row storage), is fully compatible with standard Fortran 77 2200 storage. That is, the stored row and column indices can begin at 2201 either one (as in Fortran) or zero. See the users manual for details. 2202 2203 The user MUST specify either the local or global matrix dimensions 2204 (possibly both). 2205 2206 The parallel matrix is partitioned such that the first m0 rows belong to 2207 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2208 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2209 2210 The DIAGONAL portion of the local submatrix of a processor can be defined 2211 as the submatrix which is obtained by extraction the part corresponding 2212 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2213 first row that belongs to the processor, and r2 is the last row belonging 2214 to the this processor. This is a square mxm matrix. The remaining portion 2215 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2216 2217 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2218 2219 By default, this format uses inodes (identical nodes) when possible. 2220 We search for consecutive rows with the same nonzero structure, thereby 2221 reusing matrix information to achieve increased efficiency. 2222 2223 Options Database Keys: 2224 + -mat_aij_no_inode - Do not use inodes 2225 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2226 - -mat_aij_oneindex - Internally use indexing starting at 1 2227 rather than 0. Note that when calling MatSetValues(), 2228 the user still MUST index entries starting at 0! 2229 2230 2231 Example usage: 2232 2233 Consider the following 8x8 matrix with 34 non-zero values, that is 2234 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2235 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2236 as follows: 2237 2238 .vb 2239 1 2 0 | 0 3 0 | 0 4 2240 Proc0 0 5 6 | 7 0 0 | 8 0 2241 9 0 10 | 11 0 0 | 12 0 2242 ------------------------------------- 2243 13 0 14 | 15 16 17 | 0 0 2244 Proc1 0 18 0 | 19 20 21 | 0 0 2245 0 0 0 | 22 23 0 | 24 0 2246 ------------------------------------- 2247 Proc2 25 26 27 | 0 0 28 | 29 0 2248 30 0 0 | 31 32 33 | 0 34 2249 .ve 2250 2251 This can be represented as a collection of submatrices as: 2252 2253 .vb 2254 A B C 2255 D E F 2256 G H I 2257 .ve 2258 2259 Where the submatrices A,B,C are owned by proc0, D,E,F are 2260 owned by proc1, G,H,I are owned by proc2. 2261 2262 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2263 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2264 The 'M','N' parameters are 8,8, and have the same values on all procs. 2265 2266 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2267 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2268 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2269 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2270 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2271 matrix, ans [DF] as another SeqAIJ matrix. 2272 2273 When d_nz, o_nz parameters are specified, d_nz storage elements are 2274 allocated for every row of the local diagonal submatrix, and o_nz 2275 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2276 One way to choose d_nz and o_nz is to use the max nonzerors per local 2277 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2278 In this case, the values of d_nz,o_nz are: 2279 .vb 2280 proc0 : dnz = 2, o_nz = 2 2281 proc1 : dnz = 3, o_nz = 2 2282 proc2 : dnz = 1, o_nz = 4 2283 .ve 2284 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2285 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2286 for proc3. i.e we are using 12+15+10=37 storage locations to store 2287 34 values. 2288 2289 When d_nnz, o_nnz parameters are specified, the storage is specified 2290 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2291 In the above case the values for d_nnz,o_nnz are: 2292 .vb 2293 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2294 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2295 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2296 .ve 2297 Here the space allocated is sum of all the above values i.e 34, and 2298 hence pre-allocation is perfect. 2299 2300 Level: intermediate 2301 2302 .keywords: matrix, aij, compressed row, sparse, parallel 2303 2304 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2305 @*/ 2306 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2307 { 2308 int ierr,size; 2309 2310 PetscFunctionBegin; 2311 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2312 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2313 if (size > 1) { 2314 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2315 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2316 } else { 2317 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2318 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2319 } 2320 PetscFunctionReturn(0); 2321 } 2322 2323 #undef __FUNCT__ 2324 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2325 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2326 { 2327 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2328 PetscFunctionBegin; 2329 *Ad = a->A; 2330 *Ao = a->B; 2331 *colmap = a->garray; 2332 PetscFunctionReturn(0); 2333 } 2334 2335 #undef __FUNCT__ 2336 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2337 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2338 { 2339 int ierr; 2340 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2341 2342 PetscFunctionBegin; 2343 if (coloring->ctype == IS_COLORING_LOCAL) { 2344 int *allcolors,*colors,i; 2345 ISColoring ocoloring; 2346 2347 /* set coloring for diagonal portion */ 2348 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2349 2350 /* set coloring for off-diagonal portion */ 2351 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2352 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2353 for (i=0; i<a->B->n; i++) { 2354 colors[i] = allcolors[a->garray[i]]; 2355 } 2356 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2357 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2358 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2359 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2360 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2361 int *colors,i,*larray; 2362 ISColoring ocoloring; 2363 2364 /* set coloring for diagonal portion */ 2365 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2366 for (i=0; i<a->A->n; i++) { 2367 larray[i] = i + a->cstart; 2368 } 2369 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2370 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2371 for (i=0; i<a->A->n; i++) { 2372 colors[i] = coloring->colors[larray[i]]; 2373 } 2374 ierr = PetscFree(larray);CHKERRQ(ierr); 2375 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2376 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2377 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2378 2379 /* set coloring for off-diagonal portion */ 2380 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2381 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2382 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2383 for (i=0; i<a->B->n; i++) { 2384 colors[i] = coloring->colors[larray[i]]; 2385 } 2386 ierr = PetscFree(larray);CHKERRQ(ierr); 2387 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2388 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2389 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2390 } else { 2391 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2392 } 2393 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #undef __FUNCT__ 2398 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2399 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2400 { 2401 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2402 int ierr; 2403 2404 PetscFunctionBegin; 2405 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2406 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2407 PetscFunctionReturn(0); 2408 } 2409 2410 #undef __FUNCT__ 2411 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2412 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2413 { 2414 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2415 int ierr; 2416 2417 PetscFunctionBegin; 2418 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2419 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2420 PetscFunctionReturn(0); 2421 } 2422