1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift=0; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 #if defined(PETSC_HAVE_SUPERLUDIST) 413 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 414 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 415 #endif 416 417 #if defined(PETSC_HAVE_SPOOLES) 418 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 419 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 420 #endif 421 422 #if defined(PETSC_HAVE_MUMPS) 423 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 424 if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); } 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 431 int MatZeroEntries_MPIAIJ(Mat A) 432 { 433 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 434 int ierr; 435 436 PetscFunctionBegin; 437 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 438 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatZeroRows_MPIAIJ" 444 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 445 { 446 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 447 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 448 int *nprocs,j,idx,nsends,row; 449 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 450 int *rvalues,tag = A->tag,count,base,slen,n,*source; 451 int *lens,imdex,*lrows,*values,rstart=l->rstart; 452 MPI_Comm comm = A->comm; 453 MPI_Request *send_waits,*recv_waits; 454 MPI_Status recv_status,*send_status; 455 IS istmp; 456 PetscTruth found; 457 458 PetscFunctionBegin; 459 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 460 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 461 462 /* first count number of contributors to each processor */ 463 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 464 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 465 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 466 for (i=0; i<N; i++) { 467 idx = rows[i]; 468 found = PETSC_FALSE; 469 for (j=0; j<size; j++) { 470 if (idx >= owners[j] && idx < owners[j+1]) { 471 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 472 } 473 } 474 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 475 } 476 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 477 478 /* inform other processors of number of messages and max length*/ 479 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 480 481 /* post receives: */ 482 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 483 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 484 for (i=0; i<nrecvs; i++) { 485 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 486 } 487 488 /* do sends: 489 1) starts[i] gives the starting index in svalues for stuff going to 490 the ith processor 491 */ 492 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 493 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 494 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 495 starts[0] = 0; 496 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 497 for (i=0; i<N; i++) { 498 svalues[starts[owner[i]]++] = rows[i]; 499 } 500 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 501 502 starts[0] = 0; 503 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 504 count = 0; 505 for (i=0; i<size; i++) { 506 if (nprocs[2*i+1]) { 507 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 508 } 509 } 510 ierr = PetscFree(starts);CHKERRQ(ierr); 511 512 base = owners[rank]; 513 514 /* wait on receives */ 515 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 516 source = lens + nrecvs; 517 count = nrecvs; slen = 0; 518 while (count) { 519 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 520 /* unpack receives into our local space */ 521 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 522 source[imdex] = recv_status.MPI_SOURCE; 523 lens[imdex] = n; 524 slen += n; 525 count--; 526 } 527 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 528 529 /* move the data into the send scatter */ 530 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 531 count = 0; 532 for (i=0; i<nrecvs; i++) { 533 values = rvalues + i*nmax; 534 for (j=0; j<lens[i]; j++) { 535 lrows[count++] = values[j] - base; 536 } 537 } 538 ierr = PetscFree(rvalues);CHKERRQ(ierr); 539 ierr = PetscFree(lens);CHKERRQ(ierr); 540 ierr = PetscFree(owner);CHKERRQ(ierr); 541 ierr = PetscFree(nprocs);CHKERRQ(ierr); 542 543 /* actually zap the local rows */ 544 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 545 PetscLogObjectParent(A,istmp); 546 547 /* 548 Zero the required rows. If the "diagonal block" of the matrix 549 is square and the user wishes to set the diagonal we use seperate 550 code so that MatSetValues() is not called for each diagonal allocating 551 new memory, thus calling lots of mallocs and slowing things down. 552 553 Contributed by: Mathew Knepley 554 */ 555 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 556 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 557 if (diag && (l->A->M == l->A->N)) { 558 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 559 } else if (diag) { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 562 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 564 } 565 for (i = 0; i < slen; i++) { 566 row = lrows[i] + rstart; 567 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 568 } 569 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 570 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 571 } else { 572 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 573 } 574 ierr = ISDestroy(istmp);CHKERRQ(ierr); 575 ierr = PetscFree(lrows);CHKERRQ(ierr); 576 577 /* wait on sends */ 578 if (nsends) { 579 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 580 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 581 ierr = PetscFree(send_status);CHKERRQ(ierr); 582 } 583 ierr = PetscFree(send_waits);CHKERRQ(ierr); 584 ierr = PetscFree(svalues);CHKERRQ(ierr); 585 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMult_MPIAIJ" 591 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 int ierr,nt; 595 596 PetscFunctionBegin; 597 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 598 if (nt != A->n) { 599 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 600 } 601 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 603 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMultAdd_MPIAIJ" 610 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 611 { 612 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 613 int ierr; 614 615 PetscFunctionBegin; 616 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 617 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 618 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 619 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 625 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 626 { 627 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 628 int ierr; 629 630 PetscFunctionBegin; 631 /* do nondiagonal part */ 632 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 633 /* send it on its way */ 634 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 635 /* do local part */ 636 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 637 /* receive remote parts: note this assumes the values are not actually */ 638 /* inserted in yy until the next line, which is true for my implementation*/ 639 /* but is not perhaps always true. */ 640 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } 643 644 EXTERN_C_BEGIN 645 #undef __FUNCT__ 646 #define __FUNCT__ "MatIsSymmetric_MPIAIJ" 647 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 648 { 649 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 650 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 651 IS Me,Notme; 652 int M,N,first,last,*notme,i, ierr; 653 654 PetscFunctionBegin; 655 656 /* Easy test: symmetric diagonal block */ 657 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 658 ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr); 659 if (!*f) PetscFunctionReturn(0); 660 661 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 662 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 663 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 664 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 665 for (i=0; i<first; i++) notme[i] = i; 666 for (i=last; i<M; i++) notme[i-last+first] = i; 667 ierr = ISCreateGeneral 668 (MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 669 ierr = ISCreateStride 670 (MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 671 ierr = MatGetSubMatrices 672 (Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 673 Aoff = Aoffs[0]; 674 ierr = MatGetSubMatrices 675 (Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 676 Boff = Boffs[0]; 677 ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr); 678 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 679 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 680 ierr = ISDestroy(Me); CHKERRQ(ierr); 681 ierr = ISDestroy(Notme); CHKERRQ(ierr); 682 683 PetscFunctionReturn(0); 684 } 685 EXTERN_C_END 686 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 689 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 690 { 691 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 692 int ierr; 693 694 PetscFunctionBegin; 695 /* do nondiagonal part */ 696 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 697 /* send it on its way */ 698 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 699 /* do local part */ 700 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 701 /* receive remote parts: note this assumes the values are not actually */ 702 /* inserted in yy until the next line, which is true for my implementation*/ 703 /* but is not perhaps always true. */ 704 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 /* 709 This only works correctly for square matrices where the subblock A->A is the 710 diagonal block 711 */ 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 714 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 715 { 716 int ierr; 717 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 718 719 PetscFunctionBegin; 720 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 721 if (a->rstart != a->cstart || a->rend != a->cend) { 722 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 723 } 724 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatScale_MPIAIJ" 730 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 731 { 732 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 733 int ierr; 734 735 PetscFunctionBegin; 736 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 737 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatDestroy_MPIAIJ" 743 int MatDestroy_MPIAIJ(Mat mat) 744 { 745 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 746 int ierr; 747 748 PetscFunctionBegin; 749 #if defined(PETSC_USE_LOG) 750 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 751 #endif 752 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 753 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 754 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 755 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 756 #if defined (PETSC_USE_CTABLE) 757 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 758 #else 759 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 760 #endif 761 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 762 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 763 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 764 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 765 ierr = PetscFree(aij);CHKERRQ(ierr); 766 PetscFunctionReturn(0); 767 } 768 769 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 770 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 771 extern int MatFactorInfo_MUMPS(Mat,PetscViewer); 772 773 #undef __FUNCT__ 774 #define __FUNCT__ "MatView_MPIAIJ_Binary" 775 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 776 { 777 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 778 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 779 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 780 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 781 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 782 PetscScalar *column_values; 783 784 PetscFunctionBegin; 785 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 786 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 787 nz = A->nz + B->nz; 788 if (rank == 0) { 789 header[0] = MAT_FILE_COOKIE; 790 header[1] = mat->M; 791 header[2] = mat->N; 792 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 793 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 794 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 795 /* get largest number of rows any processor has */ 796 rlen = mat->m; 797 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 798 for (i=1; i<size; i++) { 799 rlen = PetscMax(rlen,range[i+1] - range[i]); 800 } 801 } else { 802 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 803 rlen = mat->m; 804 } 805 806 /* load up the local row counts */ 807 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 808 for (i=0; i<mat->m; i++) { 809 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 810 } 811 812 /* store the row lengths to the file */ 813 if (rank == 0) { 814 MPI_Status status; 815 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 816 for (i=1; i<size; i++) { 817 rlen = range[i+1] - range[i]; 818 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 819 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 820 } 821 } else { 822 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 823 } 824 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 825 826 /* load up the local column indices */ 827 nzmax = nz; /* )th processor needs space a largest processor needs */ 828 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 829 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 830 cnt = 0; 831 for (i=0; i<mat->m; i++) { 832 for (j=B->i[i]; j<B->i[i+1]; j++) { 833 if ( (col = garray[B->j[j]]) > cstart) break; 834 column_indices[cnt++] = col; 835 } 836 for (k=A->i[i]; k<A->i[i+1]; k++) { 837 column_indices[cnt++] = A->j[k] + cstart; 838 } 839 for (; j<B->i[i+1]; j++) { 840 column_indices[cnt++] = garray[B->j[j]]; 841 } 842 } 843 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 844 845 /* store the column indices to the file */ 846 if (rank == 0) { 847 MPI_Status status; 848 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 849 for (i=1; i<size; i++) { 850 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 851 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 852 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 853 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 854 } 855 } else { 856 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 857 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 858 } 859 ierr = PetscFree(column_indices);CHKERRQ(ierr); 860 861 /* load up the local column values */ 862 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 863 cnt = 0; 864 for (i=0; i<mat->m; i++) { 865 for (j=B->i[i]; j<B->i[i+1]; j++) { 866 if ( garray[B->j[j]] > cstart) break; 867 column_values[cnt++] = B->a[j]; 868 } 869 for (k=A->i[i]; k<A->i[i+1]; k++) { 870 column_values[cnt++] = A->a[k]; 871 } 872 for (; j<B->i[i+1]; j++) { 873 column_values[cnt++] = B->a[j]; 874 } 875 } 876 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 877 878 /* store the column values to the file */ 879 if (rank == 0) { 880 MPI_Status status; 881 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 882 for (i=1; i<size; i++) { 883 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 884 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 885 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 886 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 887 } 888 } else { 889 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 890 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 891 } 892 ierr = PetscFree(column_values);CHKERRQ(ierr); 893 PetscFunctionReturn(0); 894 } 895 896 #undef __FUNCT__ 897 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 898 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 899 { 900 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 901 int ierr,rank = aij->rank,size = aij->size; 902 PetscTruth isdraw,isascii,flg,isbinary; 903 PetscViewer sviewer; 904 PetscViewerFormat format; 905 906 PetscFunctionBegin; 907 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 908 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 909 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 910 if (isascii) { 911 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 912 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 913 MatInfo info; 914 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 915 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 916 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 917 if (flg) { 918 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 919 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 920 } else { 921 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 922 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 923 } 924 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 925 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 926 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 927 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 928 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 929 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 930 PetscFunctionReturn(0); 931 } else if (format == PETSC_VIEWER_ASCII_INFO) { 932 PetscFunctionReturn(0); 933 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 934 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 935 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 936 #endif 937 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) 938 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 939 #endif 940 #if defined(PETSC_HAVE_MUMPS) && !defined(PETSC_USE_SINGLE) 941 ierr = MatFactorInfo_MUMPS(mat,viewer);CHKERRQ(ierr); 942 #endif 943 PetscFunctionReturn(0); 944 } 945 } else if (isbinary) { 946 if (size == 1) { 947 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 948 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 949 } else { 950 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 951 } 952 PetscFunctionReturn(0); 953 } else if (isdraw) { 954 PetscDraw draw; 955 PetscTruth isnull; 956 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 957 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 958 } 959 960 if (size == 1) { 961 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 962 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 963 } else { 964 /* assemble the entire matrix onto first processor. */ 965 Mat A; 966 Mat_SeqAIJ *Aloc; 967 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 968 PetscScalar *a; 969 970 if (!rank) { 971 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 972 } else { 973 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 974 } 975 PetscLogObjectParent(mat,A); 976 977 /* copy over the A part */ 978 Aloc = (Mat_SeqAIJ*)aij->A->data; 979 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 980 row = aij->rstart; 981 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 982 for (i=0; i<m; i++) { 983 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 984 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 985 } 986 aj = Aloc->j; 987 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 988 989 /* copy over the B part */ 990 Aloc = (Mat_SeqAIJ*)aij->B->data; 991 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 992 row = aij->rstart; 993 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 994 ct = cols; 995 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 996 for (i=0; i<m; i++) { 997 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 998 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 999 } 1000 ierr = PetscFree(ct);CHKERRQ(ierr); 1001 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1002 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1003 /* 1004 Everyone has to call to draw the matrix since the graphics waits are 1005 synchronized across all processors that share the PetscDraw object 1006 */ 1007 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1008 if (!rank) { 1009 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1010 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1011 } 1012 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1013 ierr = MatDestroy(A);CHKERRQ(ierr); 1014 } 1015 PetscFunctionReturn(0); 1016 } 1017 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "MatView_MPIAIJ" 1020 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1021 { 1022 int ierr; 1023 PetscTruth isascii,isdraw,issocket,isbinary; 1024 1025 PetscFunctionBegin; 1026 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1027 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1028 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1029 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1030 if (isascii || isdraw || isbinary || issocket) { 1031 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1032 } else { 1033 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1034 } 1035 PetscFunctionReturn(0); 1036 } 1037 1038 1039 1040 #undef __FUNCT__ 1041 #define __FUNCT__ "MatRelax_MPIAIJ" 1042 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1043 { 1044 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1045 int ierr; 1046 Vec bb1; 1047 PetscScalar mone=-1.0; 1048 1049 PetscFunctionBegin; 1050 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1051 1052 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1053 1054 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1055 if (flag & SOR_ZERO_INITIAL_GUESS) { 1056 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1057 its--; 1058 } 1059 1060 while (its--) { 1061 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1062 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1063 1064 /* update rhs: bb1 = bb - B*x */ 1065 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1066 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1067 1068 /* local sweep */ 1069 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1070 CHKERRQ(ierr); 1071 } 1072 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1073 if (flag & SOR_ZERO_INITIAL_GUESS) { 1074 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1075 its--; 1076 } 1077 while (its--) { 1078 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1079 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1080 1081 /* update rhs: bb1 = bb - B*x */ 1082 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1083 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1084 1085 /* local sweep */ 1086 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1087 CHKERRQ(ierr); 1088 } 1089 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1090 if (flag & SOR_ZERO_INITIAL_GUESS) { 1091 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1092 its--; 1093 } 1094 while (its--) { 1095 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1096 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1097 1098 /* update rhs: bb1 = bb - B*x */ 1099 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1100 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1101 1102 /* local sweep */ 1103 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1104 CHKERRQ(ierr); 1105 } 1106 } else { 1107 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1108 } 1109 1110 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1116 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1117 { 1118 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1119 Mat A = mat->A,B = mat->B; 1120 int ierr; 1121 PetscReal isend[5],irecv[5]; 1122 1123 PetscFunctionBegin; 1124 info->block_size = 1.0; 1125 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1126 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1127 isend[3] = info->memory; isend[4] = info->mallocs; 1128 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1129 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1130 isend[3] += info->memory; isend[4] += info->mallocs; 1131 if (flag == MAT_LOCAL) { 1132 info->nz_used = isend[0]; 1133 info->nz_allocated = isend[1]; 1134 info->nz_unneeded = isend[2]; 1135 info->memory = isend[3]; 1136 info->mallocs = isend[4]; 1137 } else if (flag == MAT_GLOBAL_MAX) { 1138 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1139 info->nz_used = irecv[0]; 1140 info->nz_allocated = irecv[1]; 1141 info->nz_unneeded = irecv[2]; 1142 info->memory = irecv[3]; 1143 info->mallocs = irecv[4]; 1144 } else if (flag == MAT_GLOBAL_SUM) { 1145 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1146 info->nz_used = irecv[0]; 1147 info->nz_allocated = irecv[1]; 1148 info->nz_unneeded = irecv[2]; 1149 info->memory = irecv[3]; 1150 info->mallocs = irecv[4]; 1151 } 1152 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1153 info->fill_ratio_needed = 0; 1154 info->factor_mallocs = 0; 1155 info->rows_global = (double)matin->M; 1156 info->columns_global = (double)matin->N; 1157 info->rows_local = (double)matin->m; 1158 info->columns_local = (double)matin->N; 1159 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatSetOption_MPIAIJ" 1165 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1166 { 1167 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1168 int ierr; 1169 1170 PetscFunctionBegin; 1171 switch (op) { 1172 case MAT_NO_NEW_NONZERO_LOCATIONS: 1173 case MAT_YES_NEW_NONZERO_LOCATIONS: 1174 case MAT_COLUMNS_UNSORTED: 1175 case MAT_COLUMNS_SORTED: 1176 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1177 case MAT_KEEP_ZEROED_ROWS: 1178 case MAT_NEW_NONZERO_LOCATION_ERR: 1179 case MAT_USE_INODES: 1180 case MAT_DO_NOT_USE_INODES: 1181 case MAT_IGNORE_ZERO_ENTRIES: 1182 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1183 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1184 break; 1185 case MAT_ROW_ORIENTED: 1186 a->roworiented = PETSC_TRUE; 1187 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1188 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1189 break; 1190 case MAT_ROWS_SORTED: 1191 case MAT_ROWS_UNSORTED: 1192 case MAT_YES_NEW_DIAGONALS: 1193 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1194 break; 1195 case MAT_COLUMN_ORIENTED: 1196 a->roworiented = PETSC_FALSE; 1197 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1198 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1199 break; 1200 case MAT_IGNORE_OFF_PROC_ENTRIES: 1201 a->donotstash = PETSC_TRUE; 1202 break; 1203 case MAT_NO_NEW_DIAGONALS: 1204 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1205 default: 1206 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1207 } 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatGetRow_MPIAIJ" 1213 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1214 { 1215 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1216 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1217 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1218 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1219 int *cmap,*idx_p; 1220 1221 PetscFunctionBegin; 1222 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1223 mat->getrowactive = PETSC_TRUE; 1224 1225 if (!mat->rowvalues && (idx || v)) { 1226 /* 1227 allocate enough space to hold information from the longest row. 1228 */ 1229 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1230 int max = 1,tmp; 1231 for (i=0; i<matin->m; i++) { 1232 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1233 if (max < tmp) { max = tmp; } 1234 } 1235 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1236 mat->rowindices = (int*)(mat->rowvalues + max); 1237 } 1238 1239 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1240 lrow = row - rstart; 1241 1242 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1243 if (!v) {pvA = 0; pvB = 0;} 1244 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1245 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1246 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1247 nztot = nzA + nzB; 1248 1249 cmap = mat->garray; 1250 if (v || idx) { 1251 if (nztot) { 1252 /* Sort by increasing column numbers, assuming A and B already sorted */ 1253 int imark = -1; 1254 if (v) { 1255 *v = v_p = mat->rowvalues; 1256 for (i=0; i<nzB; i++) { 1257 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1258 else break; 1259 } 1260 imark = i; 1261 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1262 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1263 } 1264 if (idx) { 1265 *idx = idx_p = mat->rowindices; 1266 if (imark > -1) { 1267 for (i=0; i<imark; i++) { 1268 idx_p[i] = cmap[cworkB[i]]; 1269 } 1270 } else { 1271 for (i=0; i<nzB; i++) { 1272 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1273 else break; 1274 } 1275 imark = i; 1276 } 1277 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1278 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1279 } 1280 } else { 1281 if (idx) *idx = 0; 1282 if (v) *v = 0; 1283 } 1284 } 1285 *nz = nztot; 1286 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1287 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1293 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1294 { 1295 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1296 1297 PetscFunctionBegin; 1298 if (aij->getrowactive == PETSC_FALSE) { 1299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1300 } 1301 aij->getrowactive = PETSC_FALSE; 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatNorm_MPIAIJ" 1307 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1308 { 1309 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1310 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1311 int ierr,i,j,cstart = aij->cstart; 1312 PetscReal sum = 0.0; 1313 PetscScalar *v; 1314 1315 PetscFunctionBegin; 1316 if (aij->size == 1) { 1317 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1318 } else { 1319 if (type == NORM_FROBENIUS) { 1320 v = amat->a; 1321 for (i=0; i<amat->nz; i++) { 1322 #if defined(PETSC_USE_COMPLEX) 1323 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1324 #else 1325 sum += (*v)*(*v); v++; 1326 #endif 1327 } 1328 v = bmat->a; 1329 for (i=0; i<bmat->nz; i++) { 1330 #if defined(PETSC_USE_COMPLEX) 1331 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1332 #else 1333 sum += (*v)*(*v); v++; 1334 #endif 1335 } 1336 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1337 *norm = sqrt(*norm); 1338 } else if (type == NORM_1) { /* max column norm */ 1339 PetscReal *tmp,*tmp2; 1340 int *jj,*garray = aij->garray; 1341 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1342 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1343 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1344 *norm = 0.0; 1345 v = amat->a; jj = amat->j; 1346 for (j=0; j<amat->nz; j++) { 1347 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1348 } 1349 v = bmat->a; jj = bmat->j; 1350 for (j=0; j<bmat->nz; j++) { 1351 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1352 } 1353 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1354 for (j=0; j<mat->N; j++) { 1355 if (tmp2[j] > *norm) *norm = tmp2[j]; 1356 } 1357 ierr = PetscFree(tmp);CHKERRQ(ierr); 1358 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1359 } else if (type == NORM_INFINITY) { /* max row norm */ 1360 PetscReal ntemp = 0.0; 1361 for (j=0; j<aij->A->m; j++) { 1362 v = amat->a + amat->i[j]; 1363 sum = 0.0; 1364 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1365 sum += PetscAbsScalar(*v); v++; 1366 } 1367 v = bmat->a + bmat->i[j]; 1368 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1369 sum += PetscAbsScalar(*v); v++; 1370 } 1371 if (sum > ntemp) ntemp = sum; 1372 } 1373 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1374 } else { 1375 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1376 } 1377 } 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "MatTranspose_MPIAIJ" 1383 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1384 { 1385 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1386 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1387 int ierr; 1388 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1389 Mat B; 1390 PetscScalar *array; 1391 1392 PetscFunctionBegin; 1393 if (!matout && M != N) { 1394 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1395 } 1396 1397 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1398 1399 /* copy over the A part */ 1400 Aloc = (Mat_SeqAIJ*)a->A->data; 1401 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1402 row = a->rstart; 1403 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1404 for (i=0; i<m; i++) { 1405 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1406 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1407 } 1408 aj = Aloc->j; 1409 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1410 1411 /* copy over the B part */ 1412 Aloc = (Mat_SeqAIJ*)a->B->data; 1413 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1414 row = a->rstart; 1415 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1416 ct = cols; 1417 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1418 for (i=0; i<m; i++) { 1419 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1420 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1421 } 1422 ierr = PetscFree(ct);CHKERRQ(ierr); 1423 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1424 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1425 if (matout) { 1426 *matout = B; 1427 } else { 1428 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1435 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1436 { 1437 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1438 Mat a = aij->A,b = aij->B; 1439 int ierr,s1,s2,s3; 1440 1441 PetscFunctionBegin; 1442 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1443 if (rr) { 1444 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1445 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1446 /* Overlap communication with computation. */ 1447 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1448 } 1449 if (ll) { 1450 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1451 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1452 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1453 } 1454 /* scale the diagonal block */ 1455 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1456 1457 if (rr) { 1458 /* Do a scatter end and then right scale the off-diagonal block */ 1459 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1460 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1461 } 1462 1463 PetscFunctionReturn(0); 1464 } 1465 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1469 int MatPrintHelp_MPIAIJ(Mat A) 1470 { 1471 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1472 int ierr; 1473 1474 PetscFunctionBegin; 1475 if (!a->rank) { 1476 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1477 } 1478 PetscFunctionReturn(0); 1479 } 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1483 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1484 { 1485 PetscFunctionBegin; 1486 *bs = 1; 1487 PetscFunctionReturn(0); 1488 } 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1491 int MatSetUnfactored_MPIAIJ(Mat A) 1492 { 1493 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1494 int ierr; 1495 1496 PetscFunctionBegin; 1497 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } 1500 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatEqual_MPIAIJ" 1503 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1504 { 1505 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1506 Mat a,b,c,d; 1507 PetscTruth flg; 1508 int ierr; 1509 1510 PetscFunctionBegin; 1511 a = matA->A; b = matA->B; 1512 c = matB->A; d = matB->B; 1513 1514 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1515 if (flg == PETSC_TRUE) { 1516 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1517 } 1518 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 1522 #undef __FUNCT__ 1523 #define __FUNCT__ "MatCopy_MPIAIJ" 1524 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1525 { 1526 int ierr; 1527 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1528 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1529 1530 PetscFunctionBegin; 1531 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1532 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1533 /* because of the column compression in the off-processor part of the matrix a->B, 1534 the number of columns in a->B and b->B may be different, hence we cannot call 1535 the MatCopy() directly on the two parts. If need be, we can provide a more 1536 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1537 then copying the submatrices */ 1538 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1539 } else { 1540 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1541 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1542 } 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1548 int MatSetUpPreallocation_MPIAIJ(Mat A) 1549 { 1550 int ierr; 1551 1552 PetscFunctionBegin; 1553 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1554 PetscFunctionReturn(0); 1555 } 1556 1557 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1558 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1559 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1560 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1561 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1562 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1563 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1564 #endif 1565 1566 #include "petscblaslapack.h" 1567 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure); 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatAXPY_MPIAIJ" 1570 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1571 { 1572 int ierr,one=1,i; 1573 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1574 Mat_SeqAIJ *x,*y; 1575 1576 PetscFunctionBegin; 1577 if (str == SAME_NONZERO_PATTERN) { 1578 x = (Mat_SeqAIJ *)xx->A->data; 1579 y = (Mat_SeqAIJ *)yy->A->data; 1580 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1581 x = (Mat_SeqAIJ *)xx->B->data; 1582 y = (Mat_SeqAIJ *)yy->B->data; 1583 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1584 } else if (str == SUBSET_NONZERO_PATTERN) { 1585 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1586 1587 x = (Mat_SeqAIJ *)xx->B->data; 1588 y = (Mat_SeqAIJ *)yy->B->data; 1589 if (y->xtoy && y->XtoY != xx->B) { 1590 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1591 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1592 } 1593 if (!y->xtoy) { /* get xtoy */ 1594 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1595 y->XtoY = xx->B; 1596 } 1597 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1598 } else { 1599 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1600 } 1601 PetscFunctionReturn(0); 1602 } 1603 1604 /* -------------------------------------------------------------------*/ 1605 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1606 MatGetRow_MPIAIJ, 1607 MatRestoreRow_MPIAIJ, 1608 MatMult_MPIAIJ, 1609 MatMultAdd_MPIAIJ, 1610 MatMultTranspose_MPIAIJ, 1611 MatMultTransposeAdd_MPIAIJ, 1612 0, 1613 0, 1614 0, 1615 0, 1616 0, 1617 0, 1618 MatRelax_MPIAIJ, 1619 MatTranspose_MPIAIJ, 1620 MatGetInfo_MPIAIJ, 1621 MatEqual_MPIAIJ, 1622 MatGetDiagonal_MPIAIJ, 1623 MatDiagonalScale_MPIAIJ, 1624 MatNorm_MPIAIJ, 1625 MatAssemblyBegin_MPIAIJ, 1626 MatAssemblyEnd_MPIAIJ, 1627 0, 1628 MatSetOption_MPIAIJ, 1629 MatZeroEntries_MPIAIJ, 1630 MatZeroRows_MPIAIJ, 1631 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1632 MatLUFactorSymbolic_MPIAIJ_TFS, 1633 #else 1634 0, 1635 #endif 1636 0, 1637 0, 1638 0, 1639 MatSetUpPreallocation_MPIAIJ, 1640 0, 1641 0, 1642 0, 1643 0, 1644 MatDuplicate_MPIAIJ, 1645 0, 1646 0, 1647 0, 1648 0, 1649 MatAXPY_MPIAIJ, 1650 MatGetSubMatrices_MPIAIJ, 1651 MatIncreaseOverlap_MPIAIJ, 1652 MatGetValues_MPIAIJ, 1653 MatCopy_MPIAIJ, 1654 MatPrintHelp_MPIAIJ, 1655 MatScale_MPIAIJ, 1656 0, 1657 0, 1658 0, 1659 MatGetBlockSize_MPIAIJ, 1660 0, 1661 0, 1662 0, 1663 0, 1664 MatFDColoringCreate_MPIAIJ, 1665 0, 1666 MatSetUnfactored_MPIAIJ, 1667 0, 1668 0, 1669 MatGetSubMatrix_MPIAIJ, 1670 MatDestroy_MPIAIJ, 1671 MatView_MPIAIJ, 1672 MatGetPetscMaps_Petsc, 1673 0, 1674 0, 1675 0, 1676 0, 1677 0, 1678 0, 1679 0, 1680 0, 1681 MatSetColoring_MPIAIJ, 1682 MatSetValuesAdic_MPIAIJ, 1683 MatSetValuesAdifor_MPIAIJ 1684 }; 1685 1686 /* ----------------------------------------------------------------------------------------*/ 1687 1688 EXTERN_C_BEGIN 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1691 int MatStoreValues_MPIAIJ(Mat mat) 1692 { 1693 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1694 int ierr; 1695 1696 PetscFunctionBegin; 1697 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1698 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 EXTERN_C_END 1702 1703 EXTERN_C_BEGIN 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1706 int MatRetrieveValues_MPIAIJ(Mat mat) 1707 { 1708 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1709 int ierr; 1710 1711 PetscFunctionBegin; 1712 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1713 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1714 PetscFunctionReturn(0); 1715 } 1716 EXTERN_C_END 1717 1718 #include "petscpc.h" 1719 EXTERN_C_BEGIN 1720 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1721 EXTERN int MatDiagonalScaleLocal_MPIAIJ(Mat,Vec); 1722 EXTERN_C_END 1723 1724 EXTERN_C_BEGIN 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1727 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1728 { 1729 Mat_MPIAIJ *b; 1730 int ierr,i; 1731 1732 PetscFunctionBegin; 1733 B->preallocated = PETSC_TRUE; 1734 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1735 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1736 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1737 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1738 if (d_nnz) { 1739 for (i=0; i<B->m; i++) { 1740 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1741 } 1742 } 1743 if (o_nnz) { 1744 for (i=0; i<B->m; i++) { 1745 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1746 } 1747 } 1748 b = (Mat_MPIAIJ*)B->data; 1749 1750 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1751 PetscLogObjectParent(B,b->A); 1752 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1753 PetscLogObjectParent(B,b->B); 1754 1755 PetscFunctionReturn(0); 1756 } 1757 EXTERN_C_END 1758 1759 EXTERN_C_BEGIN 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatCreate_MPIAIJ" 1762 int MatCreate_MPIAIJ(Mat B) 1763 { 1764 Mat_MPIAIJ *b; 1765 int ierr,i,size; 1766 1767 PetscFunctionBegin; 1768 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1769 1770 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1771 B->data = (void*)b; 1772 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1773 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1774 B->factor = 0; 1775 B->assembled = PETSC_FALSE; 1776 B->mapping = 0; 1777 1778 B->insertmode = NOT_SET_VALUES; 1779 b->size = size; 1780 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1781 1782 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1783 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1784 1785 /* the information in the maps duplicates the information computed below, eventually 1786 we should remove the duplicate information that is not contained in the maps */ 1787 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1788 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1789 1790 /* build local table of row and column ownerships */ 1791 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1792 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1793 b->cowners = b->rowners + b->size + 2; 1794 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1795 b->rowners[0] = 0; 1796 for (i=2; i<=b->size; i++) { 1797 b->rowners[i] += b->rowners[i-1]; 1798 } 1799 b->rstart = b->rowners[b->rank]; 1800 b->rend = b->rowners[b->rank+1]; 1801 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1802 b->cowners[0] = 0; 1803 for (i=2; i<=b->size; i++) { 1804 b->cowners[i] += b->cowners[i-1]; 1805 } 1806 b->cstart = b->cowners[b->rank]; 1807 b->cend = b->cowners[b->rank+1]; 1808 1809 /* build cache for off array entries formed */ 1810 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1811 b->donotstash = PETSC_FALSE; 1812 b->colmap = 0; 1813 b->garray = 0; 1814 b->roworiented = PETSC_TRUE; 1815 1816 /* stuff used for matrix vector multiply */ 1817 b->lvec = PETSC_NULL; 1818 b->Mvctx = PETSC_NULL; 1819 1820 /* stuff for MatGetRow() */ 1821 b->rowindices = 0; 1822 b->rowvalues = 0; 1823 b->getrowactive = PETSC_FALSE; 1824 1825 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1826 "MatStoreValues_MPIAIJ", 1827 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1828 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1829 "MatRetrieveValues_MPIAIJ", 1830 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1831 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1832 "MatGetDiagonalBlock_MPIAIJ", 1833 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1834 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C", 1835 "MatIsSymmetric_MPIAIJ", 1836 MatIsSymmetric_MPIAIJ); CHKERRQ(ierr); 1837 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1838 "MatMPIAIJSetPreallocation_MPIAIJ", 1839 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1840 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1841 "MatDiagonalScaleLocal_MPIAIJ", 1842 MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 EXTERN_C_END 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1849 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1850 { 1851 Mat mat; 1852 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1853 int ierr; 1854 1855 PetscFunctionBegin; 1856 *newmat = 0; 1857 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1858 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1859 a = (Mat_MPIAIJ*)mat->data; 1860 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1861 mat->factor = matin->factor; 1862 mat->assembled = PETSC_TRUE; 1863 mat->insertmode = NOT_SET_VALUES; 1864 mat->preallocated = PETSC_TRUE; 1865 1866 a->rstart = oldmat->rstart; 1867 a->rend = oldmat->rend; 1868 a->cstart = oldmat->cstart; 1869 a->cend = oldmat->cend; 1870 a->size = oldmat->size; 1871 a->rank = oldmat->rank; 1872 a->donotstash = oldmat->donotstash; 1873 a->roworiented = oldmat->roworiented; 1874 a->rowindices = 0; 1875 a->rowvalues = 0; 1876 a->getrowactive = PETSC_FALSE; 1877 1878 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1879 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1880 if (oldmat->colmap) { 1881 #if defined (PETSC_USE_CTABLE) 1882 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1883 #else 1884 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1885 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1886 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1887 #endif 1888 } else a->colmap = 0; 1889 if (oldmat->garray) { 1890 int len; 1891 len = oldmat->B->n; 1892 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1893 PetscLogObjectMemory(mat,len*sizeof(int)); 1894 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1895 } else a->garray = 0; 1896 1897 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1898 PetscLogObjectParent(mat,a->lvec); 1899 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1900 PetscLogObjectParent(mat,a->Mvctx); 1901 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1902 PetscLogObjectParent(mat,a->A); 1903 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1904 PetscLogObjectParent(mat,a->B); 1905 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1906 *newmat = mat; 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #include "petscsys.h" 1911 1912 EXTERN_C_BEGIN 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "MatLoad_MPIAIJ" 1915 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1916 { 1917 Mat A; 1918 PetscScalar *vals,*svals; 1919 MPI_Comm comm = ((PetscObject)viewer)->comm; 1920 MPI_Status status; 1921 int i,nz,ierr,j,rstart,rend,fd; 1922 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1923 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1924 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1925 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS) 1926 PetscTruth flag; 1927 #endif 1928 1929 PetscFunctionBegin; 1930 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1931 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1932 if (!rank) { 1933 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1934 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1935 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1936 if (header[3] < 0) { 1937 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1938 } 1939 } 1940 1941 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1942 M = header[1]; N = header[2]; 1943 /* determine ownership of all rows */ 1944 m = M/size + ((M % size) > rank); 1945 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1946 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1947 rowners[0] = 0; 1948 for (i=2; i<=size; i++) { 1949 rowners[i] += rowners[i-1]; 1950 } 1951 rstart = rowners[rank]; 1952 rend = rowners[rank+1]; 1953 1954 /* distribute row lengths to all processors */ 1955 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1956 offlens = ourlens + (rend-rstart); 1957 if (!rank) { 1958 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1959 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1960 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1961 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1962 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1963 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1964 } else { 1965 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1966 } 1967 1968 if (!rank) { 1969 /* calculate the number of nonzeros on each processor */ 1970 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1971 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1972 for (i=0; i<size; i++) { 1973 for (j=rowners[i]; j< rowners[i+1]; j++) { 1974 procsnz[i] += rowlengths[j]; 1975 } 1976 } 1977 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1978 1979 /* determine max buffer needed and allocate it */ 1980 maxnz = 0; 1981 for (i=0; i<size; i++) { 1982 maxnz = PetscMax(maxnz,procsnz[i]); 1983 } 1984 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1985 1986 /* read in my part of the matrix column indices */ 1987 nz = procsnz[0]; 1988 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1989 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1990 1991 /* read in every one elses and ship off */ 1992 for (i=1; i<size; i++) { 1993 nz = procsnz[i]; 1994 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1995 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1996 } 1997 ierr = PetscFree(cols);CHKERRQ(ierr); 1998 } else { 1999 /* determine buffer space needed for message */ 2000 nz = 0; 2001 for (i=0; i<m; i++) { 2002 nz += ourlens[i]; 2003 } 2004 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2005 2006 /* receive message of column indices*/ 2007 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2008 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2009 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2010 } 2011 2012 /* determine column ownership if matrix is not square */ 2013 if (N != M) { 2014 n = N/size + ((N % size) > rank); 2015 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2016 cstart = cend - n; 2017 } else { 2018 cstart = rstart; 2019 cend = rend; 2020 n = cend - cstart; 2021 } 2022 2023 /* loop over local rows, determining number of off diagonal entries */ 2024 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2025 jj = 0; 2026 for (i=0; i<m; i++) { 2027 for (j=0; j<ourlens[i]; j++) { 2028 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2029 jj++; 2030 } 2031 } 2032 2033 /* create our matrix */ 2034 for (i=0; i<m; i++) { 2035 ourlens[i] -= offlens[i]; 2036 } 2037 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2038 A = *newmat; 2039 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2040 for (i=0; i<m; i++) { 2041 ourlens[i] += offlens[i]; 2042 } 2043 2044 if (!rank) { 2045 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2046 2047 /* read in my part of the matrix numerical values */ 2048 nz = procsnz[0]; 2049 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2050 2051 /* insert into matrix */ 2052 jj = rstart; 2053 smycols = mycols; 2054 svals = vals; 2055 for (i=0; i<m; i++) { 2056 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2057 smycols += ourlens[i]; 2058 svals += ourlens[i]; 2059 jj++; 2060 } 2061 2062 /* read in other processors and ship out */ 2063 for (i=1; i<size; i++) { 2064 nz = procsnz[i]; 2065 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2066 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2067 } 2068 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2069 } else { 2070 /* receive numeric values */ 2071 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2072 2073 /* receive message of values*/ 2074 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2075 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2076 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2077 2078 /* insert into matrix */ 2079 jj = rstart; 2080 smycols = mycols; 2081 svals = vals; 2082 for (i=0; i<m; i++) { 2083 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2084 smycols += ourlens[i]; 2085 svals += ourlens[i]; 2086 jj++; 2087 } 2088 } 2089 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2090 ierr = PetscFree(vals);CHKERRQ(ierr); 2091 ierr = PetscFree(mycols);CHKERRQ(ierr); 2092 ierr = PetscFree(rowners);CHKERRQ(ierr); 2093 2094 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2095 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2096 #if defined(PETSC_HAVE_SPOOLES) 2097 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 2098 if (flag) { 2099 if (size == 1) { 2100 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 2101 } else { 2102 ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr); 2103 } 2104 } 2105 #endif 2106 #if defined(PETSC_HAVE_SUPERLUDIST) 2107 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 2108 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); } 2109 #endif 2110 #if defined(PETSC_HAVE_MUMPS) 2111 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 2112 if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); } 2113 #endif 2114 PetscFunctionReturn(0); 2115 } 2116 EXTERN_C_END 2117 2118 #undef __FUNCT__ 2119 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2120 /* 2121 Not great since it makes two copies of the submatrix, first an SeqAIJ 2122 in local and then by concatenating the local matrices the end result. 2123 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2124 */ 2125 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2126 { 2127 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2128 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2129 Mat *local,M,Mreuse; 2130 PetscScalar *vwork,*aa; 2131 MPI_Comm comm = mat->comm; 2132 Mat_SeqAIJ *aij; 2133 2134 2135 PetscFunctionBegin; 2136 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2137 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2138 2139 if (call == MAT_REUSE_MATRIX) { 2140 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2141 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2142 local = &Mreuse; 2143 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2144 } else { 2145 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2146 Mreuse = *local; 2147 ierr = PetscFree(local);CHKERRQ(ierr); 2148 } 2149 2150 /* 2151 m - number of local rows 2152 n - number of columns (same on all processors) 2153 rstart - first row in new global matrix generated 2154 */ 2155 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2156 if (call == MAT_INITIAL_MATRIX) { 2157 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2158 ii = aij->i; 2159 jj = aij->j; 2160 2161 /* 2162 Determine the number of non-zeros in the diagonal and off-diagonal 2163 portions of the matrix in order to do correct preallocation 2164 */ 2165 2166 /* first get start and end of "diagonal" columns */ 2167 if (csize == PETSC_DECIDE) { 2168 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2169 if (mglobal == n) { /* square matrix */ 2170 nlocal = m; 2171 } else { 2172 nlocal = n/size + ((n % size) > rank); 2173 } 2174 } else { 2175 nlocal = csize; 2176 } 2177 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2178 rstart = rend - nlocal; 2179 if (rank == size - 1 && rend != n) { 2180 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2181 } 2182 2183 /* next, compute all the lengths */ 2184 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2185 olens = dlens + m; 2186 for (i=0; i<m; i++) { 2187 jend = ii[i+1] - ii[i]; 2188 olen = 0; 2189 dlen = 0; 2190 for (j=0; j<jend; j++) { 2191 if (*jj < rstart || *jj >= rend) olen++; 2192 else dlen++; 2193 jj++; 2194 } 2195 olens[i] = olen; 2196 dlens[i] = dlen; 2197 } 2198 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2199 ierr = PetscFree(dlens);CHKERRQ(ierr); 2200 } else { 2201 int ml,nl; 2202 2203 M = *newmat; 2204 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2205 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2206 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2207 /* 2208 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2209 rather than the slower MatSetValues(). 2210 */ 2211 M->was_assembled = PETSC_TRUE; 2212 M->assembled = PETSC_FALSE; 2213 } 2214 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2215 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2216 ii = aij->i; 2217 jj = aij->j; 2218 aa = aij->a; 2219 for (i=0; i<m; i++) { 2220 row = rstart + i; 2221 nz = ii[i+1] - ii[i]; 2222 cwork = jj; jj += nz; 2223 vwork = aa; aa += nz; 2224 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2225 } 2226 2227 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2228 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2229 *newmat = M; 2230 2231 /* save submatrix used in processor for next request */ 2232 if (call == MAT_INITIAL_MATRIX) { 2233 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2234 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2235 } 2236 2237 PetscFunctionReturn(0); 2238 } 2239 2240 #undef __FUNCT__ 2241 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2242 /*@C 2243 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2244 (the default parallel PETSc format). For good matrix assembly performance 2245 the user should preallocate the matrix storage by setting the parameters 2246 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2247 performance can be increased by more than a factor of 50. 2248 2249 Collective on MPI_Comm 2250 2251 Input Parameters: 2252 + A - the matrix 2253 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2254 (same value is used for all local rows) 2255 . d_nnz - array containing the number of nonzeros in the various rows of the 2256 DIAGONAL portion of the local submatrix (possibly different for each row) 2257 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2258 The size of this array is equal to the number of local rows, i.e 'm'. 2259 You must leave room for the diagonal entry even if it is zero. 2260 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2261 submatrix (same value is used for all local rows). 2262 - o_nnz - array containing the number of nonzeros in the various rows of the 2263 OFF-DIAGONAL portion of the local submatrix (possibly different for 2264 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2265 structure. The size of this array is equal to the number 2266 of local rows, i.e 'm'. 2267 2268 The AIJ format (also called the Yale sparse matrix format or 2269 compressed row storage), is fully compatible with standard Fortran 77 2270 storage. That is, the stored row and column indices can begin at 2271 either one (as in Fortran) or zero. See the users manual for details. 2272 2273 The user MUST specify either the local or global matrix dimensions 2274 (possibly both). 2275 2276 The parallel matrix is partitioned such that the first m0 rows belong to 2277 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2278 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2279 2280 The DIAGONAL portion of the local submatrix of a processor can be defined 2281 as the submatrix which is obtained by extraction the part corresponding 2282 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2283 first row that belongs to the processor, and r2 is the last row belonging 2284 to the this processor. This is a square mxm matrix. The remaining portion 2285 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2286 2287 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2288 2289 By default, this format uses inodes (identical nodes) when possible. 2290 We search for consecutive rows with the same nonzero structure, thereby 2291 reusing matrix information to achieve increased efficiency. 2292 2293 Options Database Keys: 2294 + -mat_aij_no_inode - Do not use inodes 2295 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2296 - -mat_aij_oneindex - Internally use indexing starting at 1 2297 rather than 0. Note that when calling MatSetValues(), 2298 the user still MUST index entries starting at 0! 2299 2300 Example usage: 2301 2302 Consider the following 8x8 matrix with 34 non-zero values, that is 2303 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2304 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2305 as follows: 2306 2307 .vb 2308 1 2 0 | 0 3 0 | 0 4 2309 Proc0 0 5 6 | 7 0 0 | 8 0 2310 9 0 10 | 11 0 0 | 12 0 2311 ------------------------------------- 2312 13 0 14 | 15 16 17 | 0 0 2313 Proc1 0 18 0 | 19 20 21 | 0 0 2314 0 0 0 | 22 23 0 | 24 0 2315 ------------------------------------- 2316 Proc2 25 26 27 | 0 0 28 | 29 0 2317 30 0 0 | 31 32 33 | 0 34 2318 .ve 2319 2320 This can be represented as a collection of submatrices as: 2321 2322 .vb 2323 A B C 2324 D E F 2325 G H I 2326 .ve 2327 2328 Where the submatrices A,B,C are owned by proc0, D,E,F are 2329 owned by proc1, G,H,I are owned by proc2. 2330 2331 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2332 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2333 The 'M','N' parameters are 8,8, and have the same values on all procs. 2334 2335 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2336 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2337 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2338 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2339 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2340 matrix, ans [DF] as another SeqAIJ matrix. 2341 2342 When d_nz, o_nz parameters are specified, d_nz storage elements are 2343 allocated for every row of the local diagonal submatrix, and o_nz 2344 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2345 One way to choose d_nz and o_nz is to use the max nonzerors per local 2346 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2347 In this case, the values of d_nz,o_nz are: 2348 .vb 2349 proc0 : dnz = 2, o_nz = 2 2350 proc1 : dnz = 3, o_nz = 2 2351 proc2 : dnz = 1, o_nz = 4 2352 .ve 2353 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2354 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2355 for proc3. i.e we are using 12+15+10=37 storage locations to store 2356 34 values. 2357 2358 When d_nnz, o_nnz parameters are specified, the storage is specified 2359 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2360 In the above case the values for d_nnz,o_nnz are: 2361 .vb 2362 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2363 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2364 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2365 .ve 2366 Here the space allocated is sum of all the above values i.e 34, and 2367 hence pre-allocation is perfect. 2368 2369 Level: intermediate 2370 2371 .keywords: matrix, aij, compressed row, sparse, parallel 2372 2373 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2374 @*/ 2375 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2376 { 2377 int ierr,(*f)(Mat,int,int*,int,int*); 2378 2379 PetscFunctionBegin; 2380 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2381 if (f) { 2382 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2383 } 2384 PetscFunctionReturn(0); 2385 } 2386 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "MatCreateMPIAIJ" 2389 /*@C 2390 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2391 (the default parallel PETSc format). For good matrix assembly performance 2392 the user should preallocate the matrix storage by setting the parameters 2393 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2394 performance can be increased by more than a factor of 50. 2395 2396 Collective on MPI_Comm 2397 2398 Input Parameters: 2399 + comm - MPI communicator 2400 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2401 This value should be the same as the local size used in creating the 2402 y vector for the matrix-vector product y = Ax. 2403 . n - This value should be the same as the local size used in creating the 2404 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2405 calculated if N is given) For square matrices n is almost always m. 2406 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2407 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2408 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2409 (same value is used for all local rows) 2410 . d_nnz - array containing the number of nonzeros in the various rows of the 2411 DIAGONAL portion of the local submatrix (possibly different for each row) 2412 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2413 The size of this array is equal to the number of local rows, i.e 'm'. 2414 You must leave room for the diagonal entry even if it is zero. 2415 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2416 submatrix (same value is used for all local rows). 2417 - o_nnz - array containing the number of nonzeros in the various rows of the 2418 OFF-DIAGONAL portion of the local submatrix (possibly different for 2419 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2420 structure. The size of this array is equal to the number 2421 of local rows, i.e 'm'. 2422 2423 Output Parameter: 2424 . A - the matrix 2425 2426 Notes: 2427 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2428 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2429 storage requirements for this matrix. 2430 2431 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2432 processor than it must be used on all processors that share the object for 2433 that argument. 2434 2435 The AIJ format (also called the Yale sparse matrix format or 2436 compressed row storage), is fully compatible with standard Fortran 77 2437 storage. That is, the stored row and column indices can begin at 2438 either one (as in Fortran) or zero. See the users manual for details. 2439 2440 The user MUST specify either the local or global matrix dimensions 2441 (possibly both). 2442 2443 The parallel matrix is partitioned such that the first m0 rows belong to 2444 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2445 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2446 2447 The DIAGONAL portion of the local submatrix of a processor can be defined 2448 as the submatrix which is obtained by extraction the part corresponding 2449 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2450 first row that belongs to the processor, and r2 is the last row belonging 2451 to the this processor. This is a square mxm matrix. The remaining portion 2452 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2453 2454 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2455 2456 By default, this format uses inodes (identical nodes) when possible. 2457 We search for consecutive rows with the same nonzero structure, thereby 2458 reusing matrix information to achieve increased efficiency. 2459 2460 Options Database Keys: 2461 + -mat_aij_no_inode - Do not use inodes 2462 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2463 - -mat_aij_oneindex - Internally use indexing starting at 1 2464 rather than 0. Note that when calling MatSetValues(), 2465 the user still MUST index entries starting at 0! 2466 2467 2468 Example usage: 2469 2470 Consider the following 8x8 matrix with 34 non-zero values, that is 2471 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2472 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2473 as follows: 2474 2475 .vb 2476 1 2 0 | 0 3 0 | 0 4 2477 Proc0 0 5 6 | 7 0 0 | 8 0 2478 9 0 10 | 11 0 0 | 12 0 2479 ------------------------------------- 2480 13 0 14 | 15 16 17 | 0 0 2481 Proc1 0 18 0 | 19 20 21 | 0 0 2482 0 0 0 | 22 23 0 | 24 0 2483 ------------------------------------- 2484 Proc2 25 26 27 | 0 0 28 | 29 0 2485 30 0 0 | 31 32 33 | 0 34 2486 .ve 2487 2488 This can be represented as a collection of submatrices as: 2489 2490 .vb 2491 A B C 2492 D E F 2493 G H I 2494 .ve 2495 2496 Where the submatrices A,B,C are owned by proc0, D,E,F are 2497 owned by proc1, G,H,I are owned by proc2. 2498 2499 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2500 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2501 The 'M','N' parameters are 8,8, and have the same values on all procs. 2502 2503 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2504 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2505 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2506 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2507 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2508 matrix, ans [DF] as another SeqAIJ matrix. 2509 2510 When d_nz, o_nz parameters are specified, d_nz storage elements are 2511 allocated for every row of the local diagonal submatrix, and o_nz 2512 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2513 One way to choose d_nz and o_nz is to use the max nonzerors per local 2514 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2515 In this case, the values of d_nz,o_nz are: 2516 .vb 2517 proc0 : dnz = 2, o_nz = 2 2518 proc1 : dnz = 3, o_nz = 2 2519 proc2 : dnz = 1, o_nz = 4 2520 .ve 2521 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2522 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2523 for proc3. i.e we are using 12+15+10=37 storage locations to store 2524 34 values. 2525 2526 When d_nnz, o_nnz parameters are specified, the storage is specified 2527 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2528 In the above case the values for d_nnz,o_nnz are: 2529 .vb 2530 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2531 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2532 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2533 .ve 2534 Here the space allocated is sum of all the above values i.e 34, and 2535 hence pre-allocation is perfect. 2536 2537 Level: intermediate 2538 2539 .keywords: matrix, aij, compressed row, sparse, parallel 2540 2541 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2542 @*/ 2543 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2544 { 2545 int ierr,size; 2546 2547 PetscFunctionBegin; 2548 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2549 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2550 if (size > 1) { 2551 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2552 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2553 } else { 2554 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2555 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2556 } 2557 PetscFunctionReturn(0); 2558 } 2559 2560 #undef __FUNCT__ 2561 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2562 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2563 { 2564 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2565 PetscFunctionBegin; 2566 *Ad = a->A; 2567 *Ao = a->B; 2568 *colmap = a->garray; 2569 PetscFunctionReturn(0); 2570 } 2571 2572 #undef __FUNCT__ 2573 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2574 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2575 { 2576 int ierr,i; 2577 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2578 2579 PetscFunctionBegin; 2580 if (coloring->ctype == IS_COLORING_LOCAL) { 2581 ISColoringValue *allcolors,*colors; 2582 ISColoring ocoloring; 2583 2584 /* set coloring for diagonal portion */ 2585 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2586 2587 /* set coloring for off-diagonal portion */ 2588 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2589 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2590 for (i=0; i<a->B->n; i++) { 2591 colors[i] = allcolors[a->garray[i]]; 2592 } 2593 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2594 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2595 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2596 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2597 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2598 ISColoringValue *colors; 2599 int *larray; 2600 ISColoring ocoloring; 2601 2602 /* set coloring for diagonal portion */ 2603 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2604 for (i=0; i<a->A->n; i++) { 2605 larray[i] = i + a->cstart; 2606 } 2607 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2608 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2609 for (i=0; i<a->A->n; i++) { 2610 colors[i] = coloring->colors[larray[i]]; 2611 } 2612 ierr = PetscFree(larray);CHKERRQ(ierr); 2613 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2614 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2615 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2616 2617 /* set coloring for off-diagonal portion */ 2618 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2619 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2620 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2621 for (i=0; i<a->B->n; i++) { 2622 colors[i] = coloring->colors[larray[i]]; 2623 } 2624 ierr = PetscFree(larray);CHKERRQ(ierr); 2625 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2626 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2627 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2628 } else { 2629 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2630 } 2631 2632 PetscFunctionReturn(0); 2633 } 2634 2635 #undef __FUNCT__ 2636 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2637 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2638 { 2639 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2640 int ierr; 2641 2642 PetscFunctionBegin; 2643 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2644 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2645 PetscFunctionReturn(0); 2646 } 2647 2648 #undef __FUNCT__ 2649 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2650 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2651 { 2652 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2653 int ierr; 2654 2655 PetscFunctionBegin; 2656 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2657 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2658 PetscFunctionReturn(0); 2659 } 2660 2661 #undef __FUNCT__ 2662 #define __FUNCT__ "MatMerge" 2663 /*@C 2664 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2665 matrices from each processor 2666 2667 Collective on MPI_Comm 2668 2669 Input Parameters: 2670 + comm - the communicators the parallel matrix will live on 2671 - inmat - the input sequential matrices 2672 2673 Output Parameter: 2674 . outmat - the parallel matrix generated 2675 2676 Level: advanced 2677 2678 Notes: The number of columns of the matrix in EACH of the seperate files 2679 MUST be the same. 2680 2681 @*/ 2682 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2683 { 2684 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2685 PetscScalar *values; 2686 PetscMap columnmap,rowmap; 2687 2688 PetscFunctionBegin; 2689 2690 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2691 2692 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2693 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2694 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2695 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2696 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2697 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2698 2699 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2700 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2701 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2702 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2703 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2704 2705 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2706 for (i=0;i<m;i++) { 2707 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2708 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2709 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2710 } 2711 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2712 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2713 2714 for (i=0;i<m;i++) { 2715 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2716 I = i + rstart; 2717 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2718 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2719 } 2720 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2721 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2722 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2723 2724 PetscFunctionReturn(0); 2725 } 2726 2727 #undef __FUNCT__ 2728 #define __FUNCT__ "MatFileSplit" 2729 int MatFileSplit(Mat A,char *outfile) 2730 { 2731 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2732 PetscViewer out; 2733 char *name; 2734 Mat B; 2735 PetscScalar *values; 2736 2737 PetscFunctionBegin; 2738 2739 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2740 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2741 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2742 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2743 for (i=0;i<m;i++) { 2744 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2745 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2746 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2747 } 2748 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2749 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2750 2751 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2752 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2753 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2754 sprintf(name,"%s.%d",outfile,rank); 2755 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2756 ierr = PetscFree(name); 2757 ierr = MatView(B,out);CHKERRQ(ierr); 2758 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2759 ierr = MatDestroy(B);CHKERRQ(ierr); 2760 PetscFunctionReturn(0); 2761 } 2762