1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 int CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 int n = aij->B->n,i,ierr; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (int*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(int));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (int*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(int));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 191 int cstart = aij->cstart,cend = aij->cend,row,col; 192 PetscTruth roworiented = aij->roworiented; 193 194 /* Some Variables required in the macro */ 195 Mat A = aij->A; 196 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 197 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 198 PetscScalar *aa = a->a; 199 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 200 Mat B = aij->B; 201 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 202 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 203 PetscScalar *ba = b->a; 204 205 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 206 int nonew = a->nonew,shift=0; 207 PetscScalar *ap; 208 209 PetscFunctionBegin; 210 for (i=0; i<m; i++) { 211 if (im[i] < 0) continue; 212 #if defined(PETSC_USE_BOPT_g) 213 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 214 #endif 215 if (im[i] >= rstart && im[i] < rend) { 216 row = im[i] - rstart; 217 for (j=0; j<n; j++) { 218 if (in[j] >= cstart && in[j] < cend){ 219 col = in[j] - cstart; 220 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 221 if (ignorezeroentries && value == 0.0) continue; 222 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 223 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 224 } else if (in[j] < 0) continue; 225 #if defined(PETSC_USE_BOPT_g) 226 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 227 #endif 228 else { 229 if (mat->was_assembled) { 230 if (!aij->colmap) { 231 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 232 } 233 #if defined (PETSC_USE_CTABLE) 234 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 235 col--; 236 #else 237 col = aij->colmap[in[j]] - 1; 238 #endif 239 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 240 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 241 col = in[j]; 242 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 243 B = aij->B; 244 b = (Mat_SeqAIJ*)B->data; 245 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 246 ba = b->a; 247 } 248 } else col = in[j]; 249 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 250 if (ignorezeroentries && value == 0.0) continue; 251 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (!aij->donotstash) { 257 if (roworiented) { 258 if (ignorezeroentries && v[i*n] == 0.0) continue; 259 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 260 } else { 261 if (ignorezeroentries && v[i] == 0.0) continue; 262 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatGetValues_MPIAIJ" 272 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 275 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 276 int cstart = aij->cstart,cend = aij->cend,row,col; 277 278 PetscFunctionBegin; 279 for (i=0; i<m; i++) { 280 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 281 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 282 if (idxm[i] >= rstart && idxm[i] < rend) { 283 row = idxm[i] - rstart; 284 for (j=0; j<n; j++) { 285 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 286 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 287 if (idxn[j] >= cstart && idxn[j] < cend){ 288 col = idxn[j] - cstart; 289 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 290 } else { 291 if (!aij->colmap) { 292 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 293 } 294 #if defined (PETSC_USE_CTABLE) 295 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 296 col --; 297 #else 298 col = aij->colmap[idxn[j]] - 1; 299 #endif 300 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 301 else { 302 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 303 } 304 } 305 } 306 } else { 307 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 308 } 309 } 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNCT__ 314 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 316 { 317 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 318 int ierr,nstash,reallocs; 319 InsertMode addv; 320 321 PetscFunctionBegin; 322 if (aij->donotstash) { 323 PetscFunctionReturn(0); 324 } 325 326 /* make sure all processors are either in INSERTMODE or ADDMODE */ 327 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 328 if (addv == (ADD_VALUES|INSERT_VALUES)) { 329 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 330 } 331 mat->insertmode = addv; /* in case this processor had no cache */ 332 333 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 334 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 335 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 336 PetscFunctionReturn(0); 337 } 338 339 340 #undef __FUNCT__ 341 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 342 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 343 { 344 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 345 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 346 int i,j,rstart,ncols,n,ierr,flg; 347 int *row,*col,other_disassembled; 348 PetscScalar *val; 349 InsertMode addv = mat->insertmode; 350 351 PetscFunctionBegin; 352 if (!aij->donotstash) { 353 while (1) { 354 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 355 if (!flg) break; 356 357 for (i=0; i<n;) { 358 /* Now identify the consecutive vals belonging to the same row */ 359 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 360 if (j < n) ncols = j-i; 361 else ncols = n-i; 362 /* Now assemble all these values with a single function call */ 363 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 364 i = j; 365 } 366 } 367 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 368 } 369 370 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 371 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 372 373 /* determine if any processor has disassembled, if so we must 374 also disassemble ourselfs, in order that we may reassemble. */ 375 /* 376 if nonzero structure of submatrix B cannot change then we know that 377 no processor disassembled thus we can skip this stuff 378 */ 379 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 380 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 381 if (mat->was_assembled && !other_disassembled) { 382 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 383 } 384 } 385 386 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 387 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 388 } 389 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 390 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 391 392 if (aij->rowvalues) { 393 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 394 aij->rowvalues = 0; 395 } 396 397 /* used by MatAXPY() */ 398 a->xtoy = 0; b->xtoy = 0; 399 a->XtoY = 0; b->XtoY = 0; 400 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 406 int MatZeroEntries_MPIAIJ(Mat A) 407 { 408 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 413 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatZeroRows_MPIAIJ" 419 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 420 { 421 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 422 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 423 int *nprocs,j,idx,nsends,row; 424 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 425 int *rvalues,tag = A->tag,count,base,slen,n,*source; 426 int *lens,imdex,*lrows,*values,rstart=l->rstart; 427 MPI_Comm comm = A->comm; 428 MPI_Request *send_waits,*recv_waits; 429 MPI_Status recv_status,*send_status; 430 IS istmp; 431 PetscTruth found; 432 433 PetscFunctionBegin; 434 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 435 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 436 437 /* first count number of contributors to each processor */ 438 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 439 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 440 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 441 for (i=0; i<N; i++) { 442 idx = rows[i]; 443 found = PETSC_FALSE; 444 for (j=0; j<size; j++) { 445 if (idx >= owners[j] && idx < owners[j+1]) { 446 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 447 } 448 } 449 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 450 } 451 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 452 453 /* inform other processors of number of messages and max length*/ 454 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 455 456 /* post receives: */ 457 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 458 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 459 for (i=0; i<nrecvs; i++) { 460 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 461 } 462 463 /* do sends: 464 1) starts[i] gives the starting index in svalues for stuff going to 465 the ith processor 466 */ 467 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 468 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 469 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 470 starts[0] = 0; 471 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 472 for (i=0; i<N; i++) { 473 svalues[starts[owner[i]]++] = rows[i]; 474 } 475 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 476 477 starts[0] = 0; 478 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 479 count = 0; 480 for (i=0; i<size; i++) { 481 if (nprocs[2*i+1]) { 482 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 483 } 484 } 485 ierr = PetscFree(starts);CHKERRQ(ierr); 486 487 base = owners[rank]; 488 489 /* wait on receives */ 490 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 491 source = lens + nrecvs; 492 count = nrecvs; slen = 0; 493 while (count) { 494 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 495 /* unpack receives into our local space */ 496 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 497 source[imdex] = recv_status.MPI_SOURCE; 498 lens[imdex] = n; 499 slen += n; 500 count--; 501 } 502 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 503 504 /* move the data into the send scatter */ 505 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 506 count = 0; 507 for (i=0; i<nrecvs; i++) { 508 values = rvalues + i*nmax; 509 for (j=0; j<lens[i]; j++) { 510 lrows[count++] = values[j] - base; 511 } 512 } 513 ierr = PetscFree(rvalues);CHKERRQ(ierr); 514 ierr = PetscFree(lens);CHKERRQ(ierr); 515 ierr = PetscFree(owner);CHKERRQ(ierr); 516 ierr = PetscFree(nprocs);CHKERRQ(ierr); 517 518 /* actually zap the local rows */ 519 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 520 PetscLogObjectParent(A,istmp); 521 522 /* 523 Zero the required rows. If the "diagonal block" of the matrix 524 is square and the user wishes to set the diagonal we use seperate 525 code so that MatSetValues() is not called for each diagonal allocating 526 new memory, thus calling lots of mallocs and slowing things down. 527 528 Contributed by: Mathew Knepley 529 */ 530 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 531 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 532 if (diag && (l->A->M == l->A->N)) { 533 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 534 } else if (diag) { 535 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 536 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 537 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 538 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 539 } 540 for (i = 0; i < slen; i++) { 541 row = lrows[i] + rstart; 542 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 543 } 544 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 } else { 547 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 548 } 549 ierr = ISDestroy(istmp);CHKERRQ(ierr); 550 ierr = PetscFree(lrows);CHKERRQ(ierr); 551 552 /* wait on sends */ 553 if (nsends) { 554 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 555 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 556 ierr = PetscFree(send_status);CHKERRQ(ierr); 557 } 558 ierr = PetscFree(send_waits);CHKERRQ(ierr); 559 ierr = PetscFree(svalues);CHKERRQ(ierr); 560 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatMult_MPIAIJ" 566 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 567 { 568 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 569 int ierr,nt; 570 571 PetscFunctionBegin; 572 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 573 if (nt != A->n) { 574 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 575 } 576 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 577 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 578 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 579 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatMultAdd_MPIAIJ" 585 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 586 { 587 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 588 int ierr; 589 590 PetscFunctionBegin; 591 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 592 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 593 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 594 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 600 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 601 { 602 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 603 int ierr; 604 605 PetscFunctionBegin; 606 /* do nondiagonal part */ 607 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 608 /* send it on its way */ 609 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 610 /* do local part */ 611 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 612 /* receive remote parts: note this assumes the values are not actually */ 613 /* inserted in yy until the next line, which is true for my implementation*/ 614 /* but is not perhaps always true. */ 615 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 619 EXTERN_C_BEGIN 620 #undef __FUNCT__ 621 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 622 int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 623 { 624 MPI_Comm comm; 625 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 626 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 627 IS Me,Notme; 628 int M,N,first,last,*notme,ntids,i, ierr; 629 630 PetscFunctionBegin; 631 632 /* Easy test: symmetric diagonal block */ 633 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 634 ierr = MatIsTranspose(Adia,Bdia,f); CHKERRQ(ierr); 635 if (!*f) PetscFunctionReturn(0); 636 ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr); 637 ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr); 638 if (ntids==1) PetscFunctionReturn(0); 639 640 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 641 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 642 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 643 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 644 for (i=0; i<first; i++) notme[i] = i; 645 for (i=last; i<M; i++) notme[i-last+first] = i; 646 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 647 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 648 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 649 Aoff = Aoffs[0]; 650 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 651 Boff = Boffs[0]; 652 ierr = MatIsTranspose(Aoff,Boff,f); CHKERRQ(ierr); 653 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 654 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 655 ierr = ISDestroy(Me); CHKERRQ(ierr); 656 ierr = ISDestroy(Notme); CHKERRQ(ierr); 657 658 PetscFunctionReturn(0); 659 } 660 EXTERN_C_END 661 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 664 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 665 { 666 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 667 int ierr; 668 669 PetscFunctionBegin; 670 /* do nondiagonal part */ 671 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 672 /* send it on its way */ 673 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 674 /* do local part */ 675 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 676 /* receive remote parts: note this assumes the values are not actually */ 677 /* inserted in yy until the next line, which is true for my implementation*/ 678 /* but is not perhaps always true. */ 679 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 680 PetscFunctionReturn(0); 681 } 682 683 /* 684 This only works correctly for square matrices where the subblock A->A is the 685 diagonal block 686 */ 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 689 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 690 { 691 int ierr; 692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 693 694 PetscFunctionBegin; 695 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 696 if (a->rstart != a->cstart || a->rend != a->cend) { 697 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 698 } 699 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatScale_MPIAIJ" 705 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 706 { 707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 708 int ierr; 709 710 PetscFunctionBegin; 711 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 712 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatDestroy_MPIAIJ" 718 int MatDestroy_MPIAIJ(Mat mat) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 int ierr; 722 723 PetscFunctionBegin; 724 #if defined(PETSC_USE_LOG) 725 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 726 #endif 727 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 728 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 729 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 730 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 731 #if defined (PETSC_USE_CTABLE) 732 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 733 #else 734 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 735 #endif 736 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 737 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 738 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 739 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 740 ierr = PetscFree(aij);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatView_MPIAIJ_Binary" 746 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 747 { 748 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 749 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 750 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 751 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 752 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 753 PetscScalar *column_values; 754 755 PetscFunctionBegin; 756 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 757 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 758 nz = A->nz + B->nz; 759 if (rank == 0) { 760 header[0] = MAT_FILE_COOKIE; 761 header[1] = mat->M; 762 header[2] = mat->N; 763 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 764 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 765 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 766 /* get largest number of rows any processor has */ 767 rlen = mat->m; 768 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 769 for (i=1; i<size; i++) { 770 rlen = PetscMax(rlen,range[i+1] - range[i]); 771 } 772 } else { 773 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 774 rlen = mat->m; 775 } 776 777 /* load up the local row counts */ 778 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 779 for (i=0; i<mat->m; i++) { 780 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 781 } 782 783 /* store the row lengths to the file */ 784 if (rank == 0) { 785 MPI_Status status; 786 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 787 for (i=1; i<size; i++) { 788 rlen = range[i+1] - range[i]; 789 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 790 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 791 } 792 } else { 793 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 794 } 795 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 796 797 /* load up the local column indices */ 798 nzmax = nz; /* )th processor needs space a largest processor needs */ 799 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 800 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 801 cnt = 0; 802 for (i=0; i<mat->m; i++) { 803 for (j=B->i[i]; j<B->i[i+1]; j++) { 804 if ( (col = garray[B->j[j]]) > cstart) break; 805 column_indices[cnt++] = col; 806 } 807 for (k=A->i[i]; k<A->i[i+1]; k++) { 808 column_indices[cnt++] = A->j[k] + cstart; 809 } 810 for (; j<B->i[i+1]; j++) { 811 column_indices[cnt++] = garray[B->j[j]]; 812 } 813 } 814 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 815 816 /* store the column indices to the file */ 817 if (rank == 0) { 818 MPI_Status status; 819 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 820 for (i=1; i<size; i++) { 821 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 823 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 824 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 825 } 826 } else { 827 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 829 } 830 ierr = PetscFree(column_indices);CHKERRQ(ierr); 831 832 /* load up the local column values */ 833 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 834 cnt = 0; 835 for (i=0; i<mat->m; i++) { 836 for (j=B->i[i]; j<B->i[i+1]; j++) { 837 if ( garray[B->j[j]] > cstart) break; 838 column_values[cnt++] = B->a[j]; 839 } 840 for (k=A->i[i]; k<A->i[i+1]; k++) { 841 column_values[cnt++] = A->a[k]; 842 } 843 for (; j<B->i[i+1]; j++) { 844 column_values[cnt++] = B->a[j]; 845 } 846 } 847 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 848 849 /* store the column values to the file */ 850 if (rank == 0) { 851 MPI_Status status; 852 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 853 for (i=1; i<size; i++) { 854 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 856 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 857 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 858 } 859 } else { 860 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 861 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 862 } 863 ierr = PetscFree(column_values);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 869 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 870 { 871 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 872 int ierr,rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,isascii,flg,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (isascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 886 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 887 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 888 if (flg) { 889 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 890 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 891 } else { 892 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 893 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 894 } 895 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 896 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 897 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 899 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 900 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } else if (format == PETSC_VIEWER_ASCII_INFO) { 903 PetscFunctionReturn(0); 904 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 905 PetscFunctionReturn(0); 906 } 907 } else if (isbinary) { 908 if (size == 1) { 909 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 910 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 911 } else { 912 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 913 } 914 PetscFunctionReturn(0); 915 } else if (isdraw) { 916 PetscDraw draw; 917 PetscTruth isnull; 918 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 919 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 920 } 921 922 if (size == 1) { 923 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 924 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 925 } else { 926 /* assemble the entire matrix onto first processor. */ 927 Mat A; 928 Mat_SeqAIJ *Aloc; 929 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 930 PetscScalar *a; 931 932 if (!rank) { 933 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 934 } else { 935 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 936 } 937 PetscLogObjectParent(mat,A); 938 939 /* copy over the A part */ 940 Aloc = (Mat_SeqAIJ*)aij->A->data; 941 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 942 row = aij->rstart; 943 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 944 for (i=0; i<m; i++) { 945 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 946 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 947 } 948 aj = Aloc->j; 949 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 950 951 /* copy over the B part */ 952 Aloc = (Mat_SeqAIJ*)aij->B->data; 953 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 954 row = aij->rstart; 955 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 956 ct = cols; 957 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 958 for (i=0; i<m; i++) { 959 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 960 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 961 } 962 ierr = PetscFree(ct);CHKERRQ(ierr); 963 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 964 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 965 /* 966 Everyone has to call to draw the matrix since the graphics waits are 967 synchronized across all processors that share the PetscDraw object 968 */ 969 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 970 if (!rank) { 971 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 972 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 973 } 974 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 975 ierr = MatDestroy(A);CHKERRQ(ierr); 976 } 977 PetscFunctionReturn(0); 978 } 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatView_MPIAIJ" 982 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 983 { 984 int ierr; 985 PetscTruth isascii,isdraw,issocket,isbinary; 986 987 PetscFunctionBegin; 988 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 989 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 990 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 991 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 992 if (isascii || isdraw || isbinary || issocket) { 993 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 994 } else { 995 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 996 } 997 PetscFunctionReturn(0); 998 } 999 1000 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatRelax_MPIAIJ" 1004 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1005 { 1006 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1007 int ierr; 1008 Vec bb1; 1009 PetscScalar mone=-1.0; 1010 1011 PetscFunctionBegin; 1012 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1013 1014 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1015 1016 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1017 if (flag & SOR_ZERO_INITIAL_GUESS) { 1018 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1019 its--; 1020 } 1021 1022 while (its--) { 1023 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1024 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1025 1026 /* update rhs: bb1 = bb - B*x */ 1027 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1028 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1029 1030 /* local sweep */ 1031 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1032 CHKERRQ(ierr); 1033 } 1034 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1035 if (flag & SOR_ZERO_INITIAL_GUESS) { 1036 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1037 its--; 1038 } 1039 while (its--) { 1040 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1042 1043 /* update rhs: bb1 = bb - B*x */ 1044 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1045 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1046 1047 /* local sweep */ 1048 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1049 CHKERRQ(ierr); 1050 } 1051 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1052 if (flag & SOR_ZERO_INITIAL_GUESS) { 1053 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1054 its--; 1055 } 1056 while (its--) { 1057 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1058 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1059 1060 /* update rhs: bb1 = bb - B*x */ 1061 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1062 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1063 1064 /* local sweep */ 1065 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1066 CHKERRQ(ierr); 1067 } 1068 } else { 1069 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1070 } 1071 1072 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1078 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1079 { 1080 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1081 Mat A = mat->A,B = mat->B; 1082 int ierr; 1083 PetscReal isend[5],irecv[5]; 1084 1085 PetscFunctionBegin; 1086 info->block_size = 1.0; 1087 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1088 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1089 isend[3] = info->memory; isend[4] = info->mallocs; 1090 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1091 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1092 isend[3] += info->memory; isend[4] += info->mallocs; 1093 if (flag == MAT_LOCAL) { 1094 info->nz_used = isend[0]; 1095 info->nz_allocated = isend[1]; 1096 info->nz_unneeded = isend[2]; 1097 info->memory = isend[3]; 1098 info->mallocs = isend[4]; 1099 } else if (flag == MAT_GLOBAL_MAX) { 1100 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1101 info->nz_used = irecv[0]; 1102 info->nz_allocated = irecv[1]; 1103 info->nz_unneeded = irecv[2]; 1104 info->memory = irecv[3]; 1105 info->mallocs = irecv[4]; 1106 } else if (flag == MAT_GLOBAL_SUM) { 1107 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1108 info->nz_used = irecv[0]; 1109 info->nz_allocated = irecv[1]; 1110 info->nz_unneeded = irecv[2]; 1111 info->memory = irecv[3]; 1112 info->mallocs = irecv[4]; 1113 } 1114 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1115 info->fill_ratio_needed = 0; 1116 info->factor_mallocs = 0; 1117 info->rows_global = (double)matin->M; 1118 info->columns_global = (double)matin->N; 1119 info->rows_local = (double)matin->m; 1120 info->columns_local = (double)matin->N; 1121 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatSetOption_MPIAIJ" 1127 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1128 { 1129 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1130 int ierr; 1131 1132 PetscFunctionBegin; 1133 switch (op) { 1134 case MAT_NO_NEW_NONZERO_LOCATIONS: 1135 case MAT_YES_NEW_NONZERO_LOCATIONS: 1136 case MAT_COLUMNS_UNSORTED: 1137 case MAT_COLUMNS_SORTED: 1138 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1139 case MAT_KEEP_ZEROED_ROWS: 1140 case MAT_NEW_NONZERO_LOCATION_ERR: 1141 case MAT_USE_INODES: 1142 case MAT_DO_NOT_USE_INODES: 1143 case MAT_IGNORE_ZERO_ENTRIES: 1144 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1145 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1146 break; 1147 case MAT_ROW_ORIENTED: 1148 a->roworiented = PETSC_TRUE; 1149 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1150 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1151 break; 1152 case MAT_ROWS_SORTED: 1153 case MAT_ROWS_UNSORTED: 1154 case MAT_YES_NEW_DIAGONALS: 1155 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1156 break; 1157 case MAT_COLUMN_ORIENTED: 1158 a->roworiented = PETSC_FALSE; 1159 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1160 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1161 break; 1162 case MAT_IGNORE_OFF_PROC_ENTRIES: 1163 a->donotstash = PETSC_TRUE; 1164 break; 1165 case MAT_NO_NEW_DIAGONALS: 1166 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1167 case MAT_SYMMETRIC: 1168 case MAT_STRUCTURALLY_SYMMETRIC: 1169 case MAT_NOT_SYMMETRIC: 1170 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1171 case MAT_HERMITIAN: 1172 case MAT_NOT_HERMITIAN: 1173 case MAT_SYMMETRY_ETERNAL: 1174 case MAT_NOT_SYMMETRY_ETERNAL: 1175 break; 1176 default: 1177 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1178 } 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatGetRow_MPIAIJ" 1184 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1185 { 1186 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1187 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1188 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1189 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1190 int *cmap,*idx_p; 1191 1192 PetscFunctionBegin; 1193 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1194 mat->getrowactive = PETSC_TRUE; 1195 1196 if (!mat->rowvalues && (idx || v)) { 1197 /* 1198 allocate enough space to hold information from the longest row. 1199 */ 1200 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1201 int max = 1,tmp; 1202 for (i=0; i<matin->m; i++) { 1203 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1204 if (max < tmp) { max = tmp; } 1205 } 1206 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1207 mat->rowindices = (int*)(mat->rowvalues + max); 1208 } 1209 1210 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1211 lrow = row - rstart; 1212 1213 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1214 if (!v) {pvA = 0; pvB = 0;} 1215 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1216 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1217 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1218 nztot = nzA + nzB; 1219 1220 cmap = mat->garray; 1221 if (v || idx) { 1222 if (nztot) { 1223 /* Sort by increasing column numbers, assuming A and B already sorted */ 1224 int imark = -1; 1225 if (v) { 1226 *v = v_p = mat->rowvalues; 1227 for (i=0; i<nzB; i++) { 1228 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1229 else break; 1230 } 1231 imark = i; 1232 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1233 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1234 } 1235 if (idx) { 1236 *idx = idx_p = mat->rowindices; 1237 if (imark > -1) { 1238 for (i=0; i<imark; i++) { 1239 idx_p[i] = cmap[cworkB[i]]; 1240 } 1241 } else { 1242 for (i=0; i<nzB; i++) { 1243 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1244 else break; 1245 } 1246 imark = i; 1247 } 1248 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1249 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1250 } 1251 } else { 1252 if (idx) *idx = 0; 1253 if (v) *v = 0; 1254 } 1255 } 1256 *nz = nztot; 1257 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1258 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1264 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1265 { 1266 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1267 1268 PetscFunctionBegin; 1269 if (aij->getrowactive == PETSC_FALSE) { 1270 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1271 } 1272 aij->getrowactive = PETSC_FALSE; 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatNorm_MPIAIJ" 1278 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1279 { 1280 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1281 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1282 int ierr,i,j,cstart = aij->cstart; 1283 PetscReal sum = 0.0; 1284 PetscScalar *v; 1285 1286 PetscFunctionBegin; 1287 if (aij->size == 1) { 1288 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1289 } else { 1290 if (type == NORM_FROBENIUS) { 1291 v = amat->a; 1292 for (i=0; i<amat->nz; i++) { 1293 #if defined(PETSC_USE_COMPLEX) 1294 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1295 #else 1296 sum += (*v)*(*v); v++; 1297 #endif 1298 } 1299 v = bmat->a; 1300 for (i=0; i<bmat->nz; i++) { 1301 #if defined(PETSC_USE_COMPLEX) 1302 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1303 #else 1304 sum += (*v)*(*v); v++; 1305 #endif 1306 } 1307 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1308 *norm = sqrt(*norm); 1309 } else if (type == NORM_1) { /* max column norm */ 1310 PetscReal *tmp,*tmp2; 1311 int *jj,*garray = aij->garray; 1312 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1313 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1314 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1315 *norm = 0.0; 1316 v = amat->a; jj = amat->j; 1317 for (j=0; j<amat->nz; j++) { 1318 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1319 } 1320 v = bmat->a; jj = bmat->j; 1321 for (j=0; j<bmat->nz; j++) { 1322 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1323 } 1324 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1325 for (j=0; j<mat->N; j++) { 1326 if (tmp2[j] > *norm) *norm = tmp2[j]; 1327 } 1328 ierr = PetscFree(tmp);CHKERRQ(ierr); 1329 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1330 } else if (type == NORM_INFINITY) { /* max row norm */ 1331 PetscReal ntemp = 0.0; 1332 for (j=0; j<aij->A->m; j++) { 1333 v = amat->a + amat->i[j]; 1334 sum = 0.0; 1335 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1336 sum += PetscAbsScalar(*v); v++; 1337 } 1338 v = bmat->a + bmat->i[j]; 1339 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1340 sum += PetscAbsScalar(*v); v++; 1341 } 1342 if (sum > ntemp) ntemp = sum; 1343 } 1344 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1345 } else { 1346 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1347 } 1348 } 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatTranspose_MPIAIJ" 1354 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1355 { 1356 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1357 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1358 int ierr; 1359 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1360 Mat B; 1361 PetscScalar *array; 1362 1363 PetscFunctionBegin; 1364 if (!matout && M != N) { 1365 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1366 } 1367 1368 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1369 1370 /* copy over the A part */ 1371 Aloc = (Mat_SeqAIJ*)a->A->data; 1372 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1373 row = a->rstart; 1374 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1375 for (i=0; i<m; i++) { 1376 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1377 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1378 } 1379 aj = Aloc->j; 1380 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1381 1382 /* copy over the B part */ 1383 Aloc = (Mat_SeqAIJ*)a->B->data; 1384 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1385 row = a->rstart; 1386 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1387 ct = cols; 1388 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1389 for (i=0; i<m; i++) { 1390 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1391 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1392 } 1393 ierr = PetscFree(ct);CHKERRQ(ierr); 1394 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1395 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1396 if (matout) { 1397 *matout = B; 1398 } else { 1399 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1406 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1407 { 1408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1409 Mat a = aij->A,b = aij->B; 1410 int ierr,s1,s2,s3; 1411 1412 PetscFunctionBegin; 1413 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1414 if (rr) { 1415 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1416 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1417 /* Overlap communication with computation. */ 1418 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1419 } 1420 if (ll) { 1421 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1422 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1423 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1424 } 1425 /* scale the diagonal block */ 1426 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1427 1428 if (rr) { 1429 /* Do a scatter end and then right scale the off-diagonal block */ 1430 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1431 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1432 } 1433 1434 PetscFunctionReturn(0); 1435 } 1436 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1440 int MatPrintHelp_MPIAIJ(Mat A) 1441 { 1442 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1443 int ierr; 1444 1445 PetscFunctionBegin; 1446 if (!a->rank) { 1447 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1448 } 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1454 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1455 { 1456 PetscFunctionBegin; 1457 *bs = 1; 1458 PetscFunctionReturn(0); 1459 } 1460 #undef __FUNCT__ 1461 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1462 int MatSetUnfactored_MPIAIJ(Mat A) 1463 { 1464 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1465 int ierr; 1466 1467 PetscFunctionBegin; 1468 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatEqual_MPIAIJ" 1474 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1475 { 1476 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1477 Mat a,b,c,d; 1478 PetscTruth flg; 1479 int ierr; 1480 1481 PetscFunctionBegin; 1482 a = matA->A; b = matA->B; 1483 c = matB->A; d = matB->B; 1484 1485 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1486 if (flg == PETSC_TRUE) { 1487 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1488 } 1489 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatCopy_MPIAIJ" 1495 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1496 { 1497 int ierr; 1498 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1499 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1500 1501 PetscFunctionBegin; 1502 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1503 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1504 /* because of the column compression in the off-processor part of the matrix a->B, 1505 the number of columns in a->B and b->B may be different, hence we cannot call 1506 the MatCopy() directly on the two parts. If need be, we can provide a more 1507 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1508 then copying the submatrices */ 1509 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1510 } else { 1511 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1512 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1513 } 1514 PetscFunctionReturn(0); 1515 } 1516 1517 #undef __FUNCT__ 1518 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1519 int MatSetUpPreallocation_MPIAIJ(Mat A) 1520 { 1521 int ierr; 1522 1523 PetscFunctionBegin; 1524 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1525 PetscFunctionReturn(0); 1526 } 1527 1528 #include "petscblaslapack.h" 1529 #undef __FUNCT__ 1530 #define __FUNCT__ "MatAXPY_MPIAIJ" 1531 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1532 { 1533 int ierr,one=1,i; 1534 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1535 Mat_SeqAIJ *x,*y; 1536 1537 PetscFunctionBegin; 1538 if (str == SAME_NONZERO_PATTERN) { 1539 x = (Mat_SeqAIJ *)xx->A->data; 1540 y = (Mat_SeqAIJ *)yy->A->data; 1541 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1542 x = (Mat_SeqAIJ *)xx->B->data; 1543 y = (Mat_SeqAIJ *)yy->B->data; 1544 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1545 } else if (str == SUBSET_NONZERO_PATTERN) { 1546 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1547 1548 x = (Mat_SeqAIJ *)xx->B->data; 1549 y = (Mat_SeqAIJ *)yy->B->data; 1550 if (y->xtoy && y->XtoY != xx->B) { 1551 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1552 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1553 } 1554 if (!y->xtoy) { /* get xtoy */ 1555 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1556 y->XtoY = xx->B; 1557 } 1558 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1559 } else { 1560 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1561 } 1562 PetscFunctionReturn(0); 1563 } 1564 1565 /* -------------------------------------------------------------------*/ 1566 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1567 MatGetRow_MPIAIJ, 1568 MatRestoreRow_MPIAIJ, 1569 MatMult_MPIAIJ, 1570 /* 4*/ MatMultAdd_MPIAIJ, 1571 MatMultTranspose_MPIAIJ, 1572 MatMultTransposeAdd_MPIAIJ, 1573 0, 1574 0, 1575 0, 1576 /*10*/ 0, 1577 0, 1578 0, 1579 MatRelax_MPIAIJ, 1580 MatTranspose_MPIAIJ, 1581 /*15*/ MatGetInfo_MPIAIJ, 1582 MatEqual_MPIAIJ, 1583 MatGetDiagonal_MPIAIJ, 1584 MatDiagonalScale_MPIAIJ, 1585 MatNorm_MPIAIJ, 1586 /*20*/ MatAssemblyBegin_MPIAIJ, 1587 MatAssemblyEnd_MPIAIJ, 1588 0, 1589 MatSetOption_MPIAIJ, 1590 MatZeroEntries_MPIAIJ, 1591 /*25*/ MatZeroRows_MPIAIJ, 1592 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1593 MatLUFactorSymbolic_MPIAIJ_TFS, 1594 #else 1595 0, 1596 #endif 1597 0, 1598 0, 1599 0, 1600 /*30*/ MatSetUpPreallocation_MPIAIJ, 1601 0, 1602 0, 1603 0, 1604 0, 1605 /*35*/ MatDuplicate_MPIAIJ, 1606 0, 1607 0, 1608 0, 1609 0, 1610 /*40*/ MatAXPY_MPIAIJ, 1611 MatGetSubMatrices_MPIAIJ, 1612 MatIncreaseOverlap_MPIAIJ, 1613 MatGetValues_MPIAIJ, 1614 MatCopy_MPIAIJ, 1615 /*45*/ MatPrintHelp_MPIAIJ, 1616 MatScale_MPIAIJ, 1617 0, 1618 0, 1619 0, 1620 /*50*/ MatGetBlockSize_MPIAIJ, 1621 0, 1622 0, 1623 0, 1624 0, 1625 /*55*/ MatFDColoringCreate_MPIAIJ, 1626 0, 1627 MatSetUnfactored_MPIAIJ, 1628 0, 1629 0, 1630 /*60*/ MatGetSubMatrix_MPIAIJ, 1631 MatDestroy_MPIAIJ, 1632 MatView_MPIAIJ, 1633 MatGetPetscMaps_Petsc, 1634 0, 1635 /*65*/ 0, 1636 0, 1637 0, 1638 0, 1639 0, 1640 /*70*/ 0, 1641 0, 1642 MatSetColoring_MPIAIJ, 1643 MatSetValuesAdic_MPIAIJ, 1644 MatSetValuesAdifor_MPIAIJ, 1645 /*75*/ 0, 1646 0, 1647 0, 1648 0, 1649 0, 1650 /*80*/ 0, 1651 0, 1652 0, 1653 0, 1654 /*85*/ MatLoad_MPIAIJ}; 1655 1656 /* ----------------------------------------------------------------------------------------*/ 1657 1658 EXTERN_C_BEGIN 1659 #undef __FUNCT__ 1660 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1661 int MatStoreValues_MPIAIJ(Mat mat) 1662 { 1663 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1664 int ierr; 1665 1666 PetscFunctionBegin; 1667 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1668 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1669 PetscFunctionReturn(0); 1670 } 1671 EXTERN_C_END 1672 1673 EXTERN_C_BEGIN 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1676 int MatRetrieveValues_MPIAIJ(Mat mat) 1677 { 1678 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1679 int ierr; 1680 1681 PetscFunctionBegin; 1682 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1683 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1684 PetscFunctionReturn(0); 1685 } 1686 EXTERN_C_END 1687 1688 #include "petscpc.h" 1689 EXTERN_C_BEGIN 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1692 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1693 { 1694 Mat_MPIAIJ *b; 1695 int ierr,i; 1696 1697 PetscFunctionBegin; 1698 B->preallocated = PETSC_TRUE; 1699 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1700 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1701 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1702 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1703 if (d_nnz) { 1704 for (i=0; i<B->m; i++) { 1705 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1706 } 1707 } 1708 if (o_nnz) { 1709 for (i=0; i<B->m; i++) { 1710 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1711 } 1712 } 1713 b = (Mat_MPIAIJ*)B->data; 1714 1715 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1716 PetscLogObjectParent(B,b->A); 1717 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1718 PetscLogObjectParent(B,b->B); 1719 1720 PetscFunctionReturn(0); 1721 } 1722 EXTERN_C_END 1723 1724 /*MC 1725 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1726 1727 Options Database Keys: 1728 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1729 1730 Level: beginner 1731 1732 .seealso: MatCreateMPIAIJ 1733 M*/ 1734 1735 EXTERN_C_BEGIN 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatCreate_MPIAIJ" 1738 int MatCreate_MPIAIJ(Mat B) 1739 { 1740 Mat_MPIAIJ *b; 1741 int ierr,i,size; 1742 1743 PetscFunctionBegin; 1744 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1745 1746 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1747 B->data = (void*)b; 1748 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1749 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1750 B->factor = 0; 1751 B->assembled = PETSC_FALSE; 1752 B->mapping = 0; 1753 1754 B->insertmode = NOT_SET_VALUES; 1755 b->size = size; 1756 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1757 1758 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1759 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1760 1761 /* the information in the maps duplicates the information computed below, eventually 1762 we should remove the duplicate information that is not contained in the maps */ 1763 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1764 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1765 1766 /* build local table of row and column ownerships */ 1767 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1768 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1769 b->cowners = b->rowners + b->size + 2; 1770 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1771 b->rowners[0] = 0; 1772 for (i=2; i<=b->size; i++) { 1773 b->rowners[i] += b->rowners[i-1]; 1774 } 1775 b->rstart = b->rowners[b->rank]; 1776 b->rend = b->rowners[b->rank+1]; 1777 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1778 b->cowners[0] = 0; 1779 for (i=2; i<=b->size; i++) { 1780 b->cowners[i] += b->cowners[i-1]; 1781 } 1782 b->cstart = b->cowners[b->rank]; 1783 b->cend = b->cowners[b->rank+1]; 1784 1785 /* build cache for off array entries formed */ 1786 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1787 b->donotstash = PETSC_FALSE; 1788 b->colmap = 0; 1789 b->garray = 0; 1790 b->roworiented = PETSC_TRUE; 1791 1792 /* stuff used for matrix vector multiply */ 1793 b->lvec = PETSC_NULL; 1794 b->Mvctx = PETSC_NULL; 1795 1796 /* stuff for MatGetRow() */ 1797 b->rowindices = 0; 1798 b->rowvalues = 0; 1799 b->getrowactive = PETSC_FALSE; 1800 1801 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1802 "MatStoreValues_MPIAIJ", 1803 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1804 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1805 "MatRetrieveValues_MPIAIJ", 1806 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1807 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1808 "MatGetDiagonalBlock_MPIAIJ", 1809 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1810 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1811 "MatIsTranspose_MPIAIJ", 1812 MatIsTranspose_MPIAIJ); CHKERRQ(ierr); 1813 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1814 "MatMPIAIJSetPreallocation_MPIAIJ", 1815 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1816 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1817 "MatDiagonalScaleLocal_MPIAIJ", 1818 MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr); 1819 PetscFunctionReturn(0); 1820 } 1821 EXTERN_C_END 1822 1823 /*MC 1824 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1825 1826 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1827 and MATMPIAIJ otherwise. 1828 1829 Options Database Keys: 1830 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1831 1832 Level: beginner 1833 1834 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1835 M*/ 1836 1837 EXTERN_C_BEGIN 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "MatCreate_AIJ" 1840 int MatCreate_AIJ(Mat A) { 1841 int ierr,size; 1842 1843 PetscFunctionBegin; 1844 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1845 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1846 if (size == 1) { 1847 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1848 } else { 1849 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1850 } 1851 PetscFunctionReturn(0); 1852 } 1853 EXTERN_C_END 1854 1855 #undef __FUNCT__ 1856 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1857 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1858 { 1859 Mat mat; 1860 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1861 int ierr; 1862 1863 PetscFunctionBegin; 1864 *newmat = 0; 1865 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1866 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1867 a = (Mat_MPIAIJ*)mat->data; 1868 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1869 mat->factor = matin->factor; 1870 mat->assembled = PETSC_TRUE; 1871 mat->insertmode = NOT_SET_VALUES; 1872 mat->preallocated = PETSC_TRUE; 1873 1874 a->rstart = oldmat->rstart; 1875 a->rend = oldmat->rend; 1876 a->cstart = oldmat->cstart; 1877 a->cend = oldmat->cend; 1878 a->size = oldmat->size; 1879 a->rank = oldmat->rank; 1880 a->donotstash = oldmat->donotstash; 1881 a->roworiented = oldmat->roworiented; 1882 a->rowindices = 0; 1883 a->rowvalues = 0; 1884 a->getrowactive = PETSC_FALSE; 1885 1886 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1887 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1888 if (oldmat->colmap) { 1889 #if defined (PETSC_USE_CTABLE) 1890 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1891 #else 1892 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1893 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1894 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1895 #endif 1896 } else a->colmap = 0; 1897 if (oldmat->garray) { 1898 int len; 1899 len = oldmat->B->n; 1900 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1901 PetscLogObjectMemory(mat,len*sizeof(int)); 1902 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1903 } else a->garray = 0; 1904 1905 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1906 PetscLogObjectParent(mat,a->lvec); 1907 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1908 PetscLogObjectParent(mat,a->Mvctx); 1909 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1910 PetscLogObjectParent(mat,a->A); 1911 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1912 PetscLogObjectParent(mat,a->B); 1913 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1914 *newmat = mat; 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #include "petscsys.h" 1919 1920 #undef __FUNCT__ 1921 #define __FUNCT__ "MatLoad_MPIAIJ" 1922 int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1923 { 1924 Mat A; 1925 PetscScalar *vals,*svals; 1926 MPI_Comm comm = ((PetscObject)viewer)->comm; 1927 MPI_Status status; 1928 int i,nz,ierr,j,rstart,rend,fd; 1929 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1930 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1931 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1932 1933 PetscFunctionBegin; 1934 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1935 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1936 if (!rank) { 1937 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1938 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1939 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1940 if (header[3] < 0) { 1941 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1942 } 1943 } 1944 1945 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1946 M = header[1]; N = header[2]; 1947 /* determine ownership of all rows */ 1948 m = M/size + ((M % size) > rank); 1949 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1950 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1951 rowners[0] = 0; 1952 for (i=2; i<=size; i++) { 1953 rowners[i] += rowners[i-1]; 1954 } 1955 rstart = rowners[rank]; 1956 rend = rowners[rank+1]; 1957 1958 /* distribute row lengths to all processors */ 1959 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1960 offlens = ourlens + (rend-rstart); 1961 if (!rank) { 1962 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1963 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1964 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1965 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1966 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1967 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1968 } else { 1969 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1970 } 1971 1972 if (!rank) { 1973 /* calculate the number of nonzeros on each processor */ 1974 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1975 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1976 for (i=0; i<size; i++) { 1977 for (j=rowners[i]; j< rowners[i+1]; j++) { 1978 procsnz[i] += rowlengths[j]; 1979 } 1980 } 1981 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1982 1983 /* determine max buffer needed and allocate it */ 1984 maxnz = 0; 1985 for (i=0; i<size; i++) { 1986 maxnz = PetscMax(maxnz,procsnz[i]); 1987 } 1988 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1989 1990 /* read in my part of the matrix column indices */ 1991 nz = procsnz[0]; 1992 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1993 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1994 1995 /* read in every one elses and ship off */ 1996 for (i=1; i<size; i++) { 1997 nz = procsnz[i]; 1998 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1999 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2000 } 2001 ierr = PetscFree(cols);CHKERRQ(ierr); 2002 } else { 2003 /* determine buffer space needed for message */ 2004 nz = 0; 2005 for (i=0; i<m; i++) { 2006 nz += ourlens[i]; 2007 } 2008 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2009 2010 /* receive message of column indices*/ 2011 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2012 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2013 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2014 } 2015 2016 /* determine column ownership if matrix is not square */ 2017 if (N != M) { 2018 n = N/size + ((N % size) > rank); 2019 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2020 cstart = cend - n; 2021 } else { 2022 cstart = rstart; 2023 cend = rend; 2024 n = cend - cstart; 2025 } 2026 2027 /* loop over local rows, determining number of off diagonal entries */ 2028 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2029 jj = 0; 2030 for (i=0; i<m; i++) { 2031 for (j=0; j<ourlens[i]; j++) { 2032 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2033 jj++; 2034 } 2035 } 2036 2037 /* create our matrix */ 2038 for (i=0; i<m; i++) { 2039 ourlens[i] -= offlens[i]; 2040 } 2041 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2042 ierr = MatSetType(A,type);CHKERRQ(ierr); 2043 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2044 2045 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2046 for (i=0; i<m; i++) { 2047 ourlens[i] += offlens[i]; 2048 } 2049 2050 if (!rank) { 2051 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2052 2053 /* read in my part of the matrix numerical values */ 2054 nz = procsnz[0]; 2055 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2056 2057 /* insert into matrix */ 2058 jj = rstart; 2059 smycols = mycols; 2060 svals = vals; 2061 for (i=0; i<m; i++) { 2062 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2063 smycols += ourlens[i]; 2064 svals += ourlens[i]; 2065 jj++; 2066 } 2067 2068 /* read in other processors and ship out */ 2069 for (i=1; i<size; i++) { 2070 nz = procsnz[i]; 2071 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2072 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2073 } 2074 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2075 } else { 2076 /* receive numeric values */ 2077 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2078 2079 /* receive message of values*/ 2080 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2081 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2082 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2083 2084 /* insert into matrix */ 2085 jj = rstart; 2086 smycols = mycols; 2087 svals = vals; 2088 for (i=0; i<m; i++) { 2089 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2090 smycols += ourlens[i]; 2091 svals += ourlens[i]; 2092 jj++; 2093 } 2094 } 2095 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2096 ierr = PetscFree(vals);CHKERRQ(ierr); 2097 ierr = PetscFree(mycols);CHKERRQ(ierr); 2098 ierr = PetscFree(rowners);CHKERRQ(ierr); 2099 2100 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2101 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2102 *newmat = A; 2103 PetscFunctionReturn(0); 2104 } 2105 2106 #undef __FUNCT__ 2107 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2108 /* 2109 Not great since it makes two copies of the submatrix, first an SeqAIJ 2110 in local and then by concatenating the local matrices the end result. 2111 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2112 */ 2113 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2114 { 2115 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2116 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2117 Mat *local,M,Mreuse; 2118 PetscScalar *vwork,*aa; 2119 MPI_Comm comm = mat->comm; 2120 Mat_SeqAIJ *aij; 2121 2122 2123 PetscFunctionBegin; 2124 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2125 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2126 2127 if (call == MAT_REUSE_MATRIX) { 2128 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2129 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2130 local = &Mreuse; 2131 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2132 } else { 2133 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2134 Mreuse = *local; 2135 ierr = PetscFree(local);CHKERRQ(ierr); 2136 } 2137 2138 /* 2139 m - number of local rows 2140 n - number of columns (same on all processors) 2141 rstart - first row in new global matrix generated 2142 */ 2143 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2144 if (call == MAT_INITIAL_MATRIX) { 2145 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2146 ii = aij->i; 2147 jj = aij->j; 2148 2149 /* 2150 Determine the number of non-zeros in the diagonal and off-diagonal 2151 portions of the matrix in order to do correct preallocation 2152 */ 2153 2154 /* first get start and end of "diagonal" columns */ 2155 if (csize == PETSC_DECIDE) { 2156 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2157 if (mglobal == n) { /* square matrix */ 2158 nlocal = m; 2159 } else { 2160 nlocal = n/size + ((n % size) > rank); 2161 } 2162 } else { 2163 nlocal = csize; 2164 } 2165 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2166 rstart = rend - nlocal; 2167 if (rank == size - 1 && rend != n) { 2168 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2169 } 2170 2171 /* next, compute all the lengths */ 2172 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2173 olens = dlens + m; 2174 for (i=0; i<m; i++) { 2175 jend = ii[i+1] - ii[i]; 2176 olen = 0; 2177 dlen = 0; 2178 for (j=0; j<jend; j++) { 2179 if (*jj < rstart || *jj >= rend) olen++; 2180 else dlen++; 2181 jj++; 2182 } 2183 olens[i] = olen; 2184 dlens[i] = dlen; 2185 } 2186 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2187 ierr = PetscFree(dlens);CHKERRQ(ierr); 2188 } else { 2189 int ml,nl; 2190 2191 M = *newmat; 2192 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2193 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2194 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2195 /* 2196 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2197 rather than the slower MatSetValues(). 2198 */ 2199 M->was_assembled = PETSC_TRUE; 2200 M->assembled = PETSC_FALSE; 2201 } 2202 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2203 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2204 ii = aij->i; 2205 jj = aij->j; 2206 aa = aij->a; 2207 for (i=0; i<m; i++) { 2208 row = rstart + i; 2209 nz = ii[i+1] - ii[i]; 2210 cwork = jj; jj += nz; 2211 vwork = aa; aa += nz; 2212 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2213 } 2214 2215 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2216 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2217 *newmat = M; 2218 2219 /* save submatrix used in processor for next request */ 2220 if (call == MAT_INITIAL_MATRIX) { 2221 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2222 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2223 } 2224 2225 PetscFunctionReturn(0); 2226 } 2227 2228 #undef __FUNCT__ 2229 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2230 /*@C 2231 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2232 (the default parallel PETSc format). For good matrix assembly performance 2233 the user should preallocate the matrix storage by setting the parameters 2234 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2235 performance can be increased by more than a factor of 50. 2236 2237 Collective on MPI_Comm 2238 2239 Input Parameters: 2240 + A - the matrix 2241 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2242 (same value is used for all local rows) 2243 . d_nnz - array containing the number of nonzeros in the various rows of the 2244 DIAGONAL portion of the local submatrix (possibly different for each row) 2245 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2246 The size of this array is equal to the number of local rows, i.e 'm'. 2247 You must leave room for the diagonal entry even if it is zero. 2248 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2249 submatrix (same value is used for all local rows). 2250 - o_nnz - array containing the number of nonzeros in the various rows of the 2251 OFF-DIAGONAL portion of the local submatrix (possibly different for 2252 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2253 structure. The size of this array is equal to the number 2254 of local rows, i.e 'm'. 2255 2256 The AIJ format (also called the Yale sparse matrix format or 2257 compressed row storage), is fully compatible with standard Fortran 77 2258 storage. That is, the stored row and column indices can begin at 2259 either one (as in Fortran) or zero. See the users manual for details. 2260 2261 The user MUST specify either the local or global matrix dimensions 2262 (possibly both). 2263 2264 The parallel matrix is partitioned such that the first m0 rows belong to 2265 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2266 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2267 2268 The DIAGONAL portion of the local submatrix of a processor can be defined 2269 as the submatrix which is obtained by extraction the part corresponding 2270 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2271 first row that belongs to the processor, and r2 is the last row belonging 2272 to the this processor. This is a square mxm matrix. The remaining portion 2273 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2274 2275 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2276 2277 By default, this format uses inodes (identical nodes) when possible. 2278 We search for consecutive rows with the same nonzero structure, thereby 2279 reusing matrix information to achieve increased efficiency. 2280 2281 Options Database Keys: 2282 + -mat_aij_no_inode - Do not use inodes 2283 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2284 - -mat_aij_oneindex - Internally use indexing starting at 1 2285 rather than 0. Note that when calling MatSetValues(), 2286 the user still MUST index entries starting at 0! 2287 2288 Example usage: 2289 2290 Consider the following 8x8 matrix with 34 non-zero values, that is 2291 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2292 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2293 as follows: 2294 2295 .vb 2296 1 2 0 | 0 3 0 | 0 4 2297 Proc0 0 5 6 | 7 0 0 | 8 0 2298 9 0 10 | 11 0 0 | 12 0 2299 ------------------------------------- 2300 13 0 14 | 15 16 17 | 0 0 2301 Proc1 0 18 0 | 19 20 21 | 0 0 2302 0 0 0 | 22 23 0 | 24 0 2303 ------------------------------------- 2304 Proc2 25 26 27 | 0 0 28 | 29 0 2305 30 0 0 | 31 32 33 | 0 34 2306 .ve 2307 2308 This can be represented as a collection of submatrices as: 2309 2310 .vb 2311 A B C 2312 D E F 2313 G H I 2314 .ve 2315 2316 Where the submatrices A,B,C are owned by proc0, D,E,F are 2317 owned by proc1, G,H,I are owned by proc2. 2318 2319 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2320 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2321 The 'M','N' parameters are 8,8, and have the same values on all procs. 2322 2323 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2324 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2325 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2326 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2327 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2328 matrix, ans [DF] as another SeqAIJ matrix. 2329 2330 When d_nz, o_nz parameters are specified, d_nz storage elements are 2331 allocated for every row of the local diagonal submatrix, and o_nz 2332 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2333 One way to choose d_nz and o_nz is to use the max nonzerors per local 2334 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2335 In this case, the values of d_nz,o_nz are: 2336 .vb 2337 proc0 : dnz = 2, o_nz = 2 2338 proc1 : dnz = 3, o_nz = 2 2339 proc2 : dnz = 1, o_nz = 4 2340 .ve 2341 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2342 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2343 for proc3. i.e we are using 12+15+10=37 storage locations to store 2344 34 values. 2345 2346 When d_nnz, o_nnz parameters are specified, the storage is specified 2347 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2348 In the above case the values for d_nnz,o_nnz are: 2349 .vb 2350 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2351 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2352 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2353 .ve 2354 Here the space allocated is sum of all the above values i.e 34, and 2355 hence pre-allocation is perfect. 2356 2357 Level: intermediate 2358 2359 .keywords: matrix, aij, compressed row, sparse, parallel 2360 2361 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2362 @*/ 2363 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2364 { 2365 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2366 2367 PetscFunctionBegin; 2368 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2369 if (f) { 2370 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2371 } 2372 PetscFunctionReturn(0); 2373 } 2374 2375 #undef __FUNCT__ 2376 #define __FUNCT__ "MatCreateMPIAIJ" 2377 /*@C 2378 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2379 (the default parallel PETSc format). For good matrix assembly performance 2380 the user should preallocate the matrix storage by setting the parameters 2381 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2382 performance can be increased by more than a factor of 50. 2383 2384 Collective on MPI_Comm 2385 2386 Input Parameters: 2387 + comm - MPI communicator 2388 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2389 This value should be the same as the local size used in creating the 2390 y vector for the matrix-vector product y = Ax. 2391 . n - This value should be the same as the local size used in creating the 2392 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2393 calculated if N is given) For square matrices n is almost always m. 2394 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2395 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2396 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2397 (same value is used for all local rows) 2398 . d_nnz - array containing the number of nonzeros in the various rows of the 2399 DIAGONAL portion of the local submatrix (possibly different for each row) 2400 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2401 The size of this array is equal to the number of local rows, i.e 'm'. 2402 You must leave room for the diagonal entry even if it is zero. 2403 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2404 submatrix (same value is used for all local rows). 2405 - o_nnz - array containing the number of nonzeros in the various rows of the 2406 OFF-DIAGONAL portion of the local submatrix (possibly different for 2407 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2408 structure. The size of this array is equal to the number 2409 of local rows, i.e 'm'. 2410 2411 Output Parameter: 2412 . A - the matrix 2413 2414 Notes: 2415 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2416 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2417 storage requirements for this matrix. 2418 2419 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2420 processor than it must be used on all processors that share the object for 2421 that argument. 2422 2423 The AIJ format (also called the Yale sparse matrix format or 2424 compressed row storage), is fully compatible with standard Fortran 77 2425 storage. That is, the stored row and column indices can begin at 2426 either one (as in Fortran) or zero. See the users manual for details. 2427 2428 The user MUST specify either the local or global matrix dimensions 2429 (possibly both). 2430 2431 The parallel matrix is partitioned such that the first m0 rows belong to 2432 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2433 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2434 2435 The DIAGONAL portion of the local submatrix of a processor can be defined 2436 as the submatrix which is obtained by extraction the part corresponding 2437 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2438 first row that belongs to the processor, and r2 is the last row belonging 2439 to the this processor. This is a square mxm matrix. The remaining portion 2440 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2441 2442 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2443 2444 When calling this routine with a single process communicator, a matrix of 2445 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2446 type of communicator, use the construction mechanism: 2447 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2448 2449 By default, this format uses inodes (identical nodes) when possible. 2450 We search for consecutive rows with the same nonzero structure, thereby 2451 reusing matrix information to achieve increased efficiency. 2452 2453 Options Database Keys: 2454 + -mat_aij_no_inode - Do not use inodes 2455 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2456 - -mat_aij_oneindex - Internally use indexing starting at 1 2457 rather than 0. Note that when calling MatSetValues(), 2458 the user still MUST index entries starting at 0! 2459 2460 2461 Example usage: 2462 2463 Consider the following 8x8 matrix with 34 non-zero values, that is 2464 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2465 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2466 as follows: 2467 2468 .vb 2469 1 2 0 | 0 3 0 | 0 4 2470 Proc0 0 5 6 | 7 0 0 | 8 0 2471 9 0 10 | 11 0 0 | 12 0 2472 ------------------------------------- 2473 13 0 14 | 15 16 17 | 0 0 2474 Proc1 0 18 0 | 19 20 21 | 0 0 2475 0 0 0 | 22 23 0 | 24 0 2476 ------------------------------------- 2477 Proc2 25 26 27 | 0 0 28 | 29 0 2478 30 0 0 | 31 32 33 | 0 34 2479 .ve 2480 2481 This can be represented as a collection of submatrices as: 2482 2483 .vb 2484 A B C 2485 D E F 2486 G H I 2487 .ve 2488 2489 Where the submatrices A,B,C are owned by proc0, D,E,F are 2490 owned by proc1, G,H,I are owned by proc2. 2491 2492 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2493 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2494 The 'M','N' parameters are 8,8, and have the same values on all procs. 2495 2496 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2497 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2498 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2499 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2500 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2501 matrix, ans [DF] as another SeqAIJ matrix. 2502 2503 When d_nz, o_nz parameters are specified, d_nz storage elements are 2504 allocated for every row of the local diagonal submatrix, and o_nz 2505 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2506 One way to choose d_nz and o_nz is to use the max nonzerors per local 2507 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2508 In this case, the values of d_nz,o_nz are: 2509 .vb 2510 proc0 : dnz = 2, o_nz = 2 2511 proc1 : dnz = 3, o_nz = 2 2512 proc2 : dnz = 1, o_nz = 4 2513 .ve 2514 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2515 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2516 for proc3. i.e we are using 12+15+10=37 storage locations to store 2517 34 values. 2518 2519 When d_nnz, o_nnz parameters are specified, the storage is specified 2520 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2521 In the above case the values for d_nnz,o_nnz are: 2522 .vb 2523 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2524 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2525 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2526 .ve 2527 Here the space allocated is sum of all the above values i.e 34, and 2528 hence pre-allocation is perfect. 2529 2530 Level: intermediate 2531 2532 .keywords: matrix, aij, compressed row, sparse, parallel 2533 2534 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2535 @*/ 2536 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2537 { 2538 int ierr,size; 2539 2540 PetscFunctionBegin; 2541 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2542 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2543 if (size > 1) { 2544 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2545 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2546 } else { 2547 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2548 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2549 } 2550 PetscFunctionReturn(0); 2551 } 2552 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2555 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2556 { 2557 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2558 PetscFunctionBegin; 2559 *Ad = a->A; 2560 *Ao = a->B; 2561 *colmap = a->garray; 2562 PetscFunctionReturn(0); 2563 } 2564 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2567 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2568 { 2569 int ierr,i; 2570 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2571 2572 PetscFunctionBegin; 2573 if (coloring->ctype == IS_COLORING_LOCAL) { 2574 ISColoringValue *allcolors,*colors; 2575 ISColoring ocoloring; 2576 2577 /* set coloring for diagonal portion */ 2578 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2579 2580 /* set coloring for off-diagonal portion */ 2581 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2582 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2583 for (i=0; i<a->B->n; i++) { 2584 colors[i] = allcolors[a->garray[i]]; 2585 } 2586 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2587 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2588 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2589 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2590 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2591 ISColoringValue *colors; 2592 int *larray; 2593 ISColoring ocoloring; 2594 2595 /* set coloring for diagonal portion */ 2596 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2597 for (i=0; i<a->A->n; i++) { 2598 larray[i] = i + a->cstart; 2599 } 2600 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2601 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2602 for (i=0; i<a->A->n; i++) { 2603 colors[i] = coloring->colors[larray[i]]; 2604 } 2605 ierr = PetscFree(larray);CHKERRQ(ierr); 2606 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2607 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2608 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2609 2610 /* set coloring for off-diagonal portion */ 2611 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2612 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2613 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2614 for (i=0; i<a->B->n; i++) { 2615 colors[i] = coloring->colors[larray[i]]; 2616 } 2617 ierr = PetscFree(larray);CHKERRQ(ierr); 2618 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2619 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2620 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2621 } else { 2622 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2623 } 2624 2625 PetscFunctionReturn(0); 2626 } 2627 2628 #undef __FUNCT__ 2629 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2630 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2631 { 2632 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2633 int ierr; 2634 2635 PetscFunctionBegin; 2636 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2637 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2638 PetscFunctionReturn(0); 2639 } 2640 2641 #undef __FUNCT__ 2642 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2643 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2644 { 2645 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2646 int ierr; 2647 2648 PetscFunctionBegin; 2649 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2650 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2651 PetscFunctionReturn(0); 2652 } 2653 2654 #undef __FUNCT__ 2655 #define __FUNCT__ "MatMerge" 2656 /*@C 2657 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2658 matrices from each processor 2659 2660 Collective on MPI_Comm 2661 2662 Input Parameters: 2663 + comm - the communicators the parallel matrix will live on 2664 - inmat - the input sequential matrices 2665 2666 Output Parameter: 2667 . outmat - the parallel matrix generated 2668 2669 Level: advanced 2670 2671 Notes: The number of columns of the matrix in EACH of the seperate files 2672 MUST be the same. 2673 2674 @*/ 2675 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2676 { 2677 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2678 PetscScalar *values; 2679 PetscMap columnmap,rowmap; 2680 2681 PetscFunctionBegin; 2682 2683 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2684 2685 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2686 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2687 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2688 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2689 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2690 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2691 2692 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2693 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2694 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2695 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2696 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2697 2698 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2699 for (i=0;i<m;i++) { 2700 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2701 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2702 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2703 } 2704 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2705 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2706 2707 for (i=0;i<m;i++) { 2708 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2709 I = i + rstart; 2710 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2711 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2712 } 2713 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2714 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2715 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2716 2717 PetscFunctionReturn(0); 2718 } 2719 2720 #undef __FUNCT__ 2721 #define __FUNCT__ "MatFileSplit" 2722 int MatFileSplit(Mat A,char *outfile) 2723 { 2724 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2725 PetscViewer out; 2726 char *name; 2727 Mat B; 2728 PetscScalar *values; 2729 2730 PetscFunctionBegin; 2731 2732 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2733 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2734 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2735 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2736 for (i=0;i<m;i++) { 2737 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2738 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2739 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2740 } 2741 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2742 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2743 2744 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2745 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2746 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2747 sprintf(name,"%s.%d",outfile,rank); 2748 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2749 ierr = PetscFree(name); 2750 ierr = MatView(B,out);CHKERRQ(ierr); 2751 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2752 ierr = MatDestroy(B);CHKERRQ(ierr); 2753 PetscFunctionReturn(0); 2754 } 2755