1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 int CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 int n = aij->B->n,i,ierr; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (int*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(int));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (int*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(int));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 191 int cstart = aij->cstart,cend = aij->cend,row,col; 192 PetscTruth roworiented = aij->roworiented; 193 194 /* Some Variables required in the macro */ 195 Mat A = aij->A; 196 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 197 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 198 PetscScalar *aa = a->a; 199 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 200 Mat B = aij->B; 201 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 202 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 203 PetscScalar *ba = b->a; 204 205 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 206 int nonew = a->nonew,shift=0; 207 PetscScalar *ap; 208 209 PetscFunctionBegin; 210 for (i=0; i<m; i++) { 211 if (im[i] < 0) continue; 212 #if defined(PETSC_USE_BOPT_g) 213 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 214 #endif 215 if (im[i] >= rstart && im[i] < rend) { 216 row = im[i] - rstart; 217 for (j=0; j<n; j++) { 218 if (in[j] >= cstart && in[j] < cend){ 219 col = in[j] - cstart; 220 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 221 if (ignorezeroentries && value == 0.0) continue; 222 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 223 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 224 } else if (in[j] < 0) continue; 225 #if defined(PETSC_USE_BOPT_g) 226 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 227 #endif 228 else { 229 if (mat->was_assembled) { 230 if (!aij->colmap) { 231 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 232 } 233 #if defined (PETSC_USE_CTABLE) 234 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 235 col--; 236 #else 237 col = aij->colmap[in[j]] - 1; 238 #endif 239 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 240 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 241 col = in[j]; 242 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 243 B = aij->B; 244 b = (Mat_SeqAIJ*)B->data; 245 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 246 ba = b->a; 247 } 248 } else col = in[j]; 249 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 250 if (ignorezeroentries && value == 0.0) continue; 251 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (!aij->donotstash) { 257 if (roworiented) { 258 if (ignorezeroentries && v[i*n] == 0.0) continue; 259 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 260 } else { 261 if (ignorezeroentries && v[i] == 0.0) continue; 262 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatGetValues_MPIAIJ" 272 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 275 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 276 int cstart = aij->cstart,cend = aij->cend,row,col; 277 278 PetscFunctionBegin; 279 for (i=0; i<m; i++) { 280 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 281 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 282 if (idxm[i] >= rstart && idxm[i] < rend) { 283 row = idxm[i] - rstart; 284 for (j=0; j<n; j++) { 285 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 286 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 287 if (idxn[j] >= cstart && idxn[j] < cend){ 288 col = idxn[j] - cstart; 289 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 290 } else { 291 if (!aij->colmap) { 292 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 293 } 294 #if defined (PETSC_USE_CTABLE) 295 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 296 col --; 297 #else 298 col = aij->colmap[idxn[j]] - 1; 299 #endif 300 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 301 else { 302 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 303 } 304 } 305 } 306 } else { 307 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 308 } 309 } 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNCT__ 314 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 316 { 317 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 318 int ierr,nstash,reallocs; 319 InsertMode addv; 320 321 PetscFunctionBegin; 322 if (aij->donotstash) { 323 PetscFunctionReturn(0); 324 } 325 326 /* make sure all processors are either in INSERTMODE or ADDMODE */ 327 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 328 if (addv == (ADD_VALUES|INSERT_VALUES)) { 329 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 330 } 331 mat->insertmode = addv; /* in case this processor had no cache */ 332 333 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 334 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 335 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 336 PetscFunctionReturn(0); 337 } 338 339 340 #undef __FUNCT__ 341 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 342 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 343 { 344 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 345 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 346 int i,j,rstart,ncols,n,ierr,flg; 347 int *row,*col,other_disassembled; 348 PetscScalar *val; 349 InsertMode addv = mat->insertmode; 350 351 PetscFunctionBegin; 352 if (!aij->donotstash) { 353 while (1) { 354 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 355 if (!flg) break; 356 357 for (i=0; i<n;) { 358 /* Now identify the consecutive vals belonging to the same row */ 359 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 360 if (j < n) ncols = j-i; 361 else ncols = n-i; 362 /* Now assemble all these values with a single function call */ 363 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 364 i = j; 365 } 366 } 367 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 368 } 369 370 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 371 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 372 373 /* determine if any processor has disassembled, if so we must 374 also disassemble ourselfs, in order that we may reassemble. */ 375 /* 376 if nonzero structure of submatrix B cannot change then we know that 377 no processor disassembled thus we can skip this stuff 378 */ 379 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 380 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 381 if (mat->was_assembled && !other_disassembled) { 382 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 383 } 384 } 385 386 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 387 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 388 } 389 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 390 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 391 392 if (aij->rowvalues) { 393 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 394 aij->rowvalues = 0; 395 } 396 397 /* used by MatAXPY() */ 398 a->xtoy = 0; b->xtoy = 0; 399 a->XtoY = 0; b->XtoY = 0; 400 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 406 int MatZeroEntries_MPIAIJ(Mat A) 407 { 408 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 413 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatZeroRows_MPIAIJ" 419 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 420 { 421 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 422 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 423 int *nprocs,j,idx,nsends,row; 424 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 425 int *rvalues,tag = A->tag,count,base,slen,n,*source; 426 int *lens,imdex,*lrows,*values,rstart=l->rstart; 427 MPI_Comm comm = A->comm; 428 MPI_Request *send_waits,*recv_waits; 429 MPI_Status recv_status,*send_status; 430 IS istmp; 431 PetscTruth found; 432 433 PetscFunctionBegin; 434 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 435 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 436 437 /* first count number of contributors to each processor */ 438 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 439 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 440 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 441 for (i=0; i<N; i++) { 442 idx = rows[i]; 443 found = PETSC_FALSE; 444 for (j=0; j<size; j++) { 445 if (idx >= owners[j] && idx < owners[j+1]) { 446 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 447 } 448 } 449 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 450 } 451 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 452 453 /* inform other processors of number of messages and max length*/ 454 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 455 456 /* post receives: */ 457 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 458 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 459 for (i=0; i<nrecvs; i++) { 460 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 461 } 462 463 /* do sends: 464 1) starts[i] gives the starting index in svalues for stuff going to 465 the ith processor 466 */ 467 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 468 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 469 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 470 starts[0] = 0; 471 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 472 for (i=0; i<N; i++) { 473 svalues[starts[owner[i]]++] = rows[i]; 474 } 475 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 476 477 starts[0] = 0; 478 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 479 count = 0; 480 for (i=0; i<size; i++) { 481 if (nprocs[2*i+1]) { 482 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 483 } 484 } 485 ierr = PetscFree(starts);CHKERRQ(ierr); 486 487 base = owners[rank]; 488 489 /* wait on receives */ 490 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 491 source = lens + nrecvs; 492 count = nrecvs; slen = 0; 493 while (count) { 494 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 495 /* unpack receives into our local space */ 496 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 497 source[imdex] = recv_status.MPI_SOURCE; 498 lens[imdex] = n; 499 slen += n; 500 count--; 501 } 502 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 503 504 /* move the data into the send scatter */ 505 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 506 count = 0; 507 for (i=0; i<nrecvs; i++) { 508 values = rvalues + i*nmax; 509 for (j=0; j<lens[i]; j++) { 510 lrows[count++] = values[j] - base; 511 } 512 } 513 ierr = PetscFree(rvalues);CHKERRQ(ierr); 514 ierr = PetscFree(lens);CHKERRQ(ierr); 515 ierr = PetscFree(owner);CHKERRQ(ierr); 516 ierr = PetscFree(nprocs);CHKERRQ(ierr); 517 518 /* actually zap the local rows */ 519 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 520 PetscLogObjectParent(A,istmp); 521 522 /* 523 Zero the required rows. If the "diagonal block" of the matrix 524 is square and the user wishes to set the diagonal we use seperate 525 code so that MatSetValues() is not called for each diagonal allocating 526 new memory, thus calling lots of mallocs and slowing things down. 527 528 Contributed by: Mathew Knepley 529 */ 530 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 531 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 532 if (diag && (l->A->M == l->A->N)) { 533 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 534 } else if (diag) { 535 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 536 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 537 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 538 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 539 } 540 for (i = 0; i < slen; i++) { 541 row = lrows[i] + rstart; 542 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 543 } 544 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 } else { 547 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 548 } 549 ierr = ISDestroy(istmp);CHKERRQ(ierr); 550 ierr = PetscFree(lrows);CHKERRQ(ierr); 551 552 /* wait on sends */ 553 if (nsends) { 554 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 555 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 556 ierr = PetscFree(send_status);CHKERRQ(ierr); 557 } 558 ierr = PetscFree(send_waits);CHKERRQ(ierr); 559 ierr = PetscFree(svalues);CHKERRQ(ierr); 560 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatMult_MPIAIJ" 566 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 567 { 568 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 569 int ierr,nt; 570 571 PetscFunctionBegin; 572 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 573 if (nt != A->n) { 574 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 575 } 576 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 577 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 578 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 579 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatMultAdd_MPIAIJ" 585 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 586 { 587 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 588 int ierr; 589 590 PetscFunctionBegin; 591 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 592 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 593 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 594 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 600 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 601 { 602 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 603 int ierr; 604 605 PetscFunctionBegin; 606 /* do nondiagonal part */ 607 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 608 /* send it on its way */ 609 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 610 /* do local part */ 611 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 612 /* receive remote parts: note this assumes the values are not actually */ 613 /* inserted in yy until the next line, which is true for my implementation*/ 614 /* but is not perhaps always true. */ 615 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 619 EXTERN_C_BEGIN 620 #undef __FUNCT__ 621 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 622 int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 623 { 624 MPI_Comm comm; 625 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 626 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 627 IS Me,Notme; 628 int M,N,first,last,*notme,ntids,i, ierr; 629 630 PetscFunctionBegin; 631 632 /* Easy test: symmetric diagonal block */ 633 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 634 ierr = MatIsTranspose(Adia,Bdia,f); CHKERRQ(ierr); 635 if (!*f) PetscFunctionReturn(0); 636 ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr); 637 ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr); 638 if (ntids==1) PetscFunctionReturn(0); 639 640 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 641 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 642 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 643 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 644 for (i=0; i<first; i++) notme[i] = i; 645 for (i=last; i<M; i++) notme[i-last+first] = i; 646 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 647 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 648 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 649 Aoff = Aoffs[0]; 650 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 651 Boff = Boffs[0]; 652 ierr = MatIsTranspose(Aoff,Boff,f); CHKERRQ(ierr); 653 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 654 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 655 ierr = ISDestroy(Me); CHKERRQ(ierr); 656 ierr = ISDestroy(Notme); CHKERRQ(ierr); 657 658 PetscFunctionReturn(0); 659 } 660 EXTERN_C_END 661 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 664 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 665 { 666 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 667 int ierr; 668 669 PetscFunctionBegin; 670 /* do nondiagonal part */ 671 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 672 /* send it on its way */ 673 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 674 /* do local part */ 675 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 676 /* receive remote parts: note this assumes the values are not actually */ 677 /* inserted in yy until the next line, which is true for my implementation*/ 678 /* but is not perhaps always true. */ 679 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 680 PetscFunctionReturn(0); 681 } 682 683 /* 684 This only works correctly for square matrices where the subblock A->A is the 685 diagonal block 686 */ 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 689 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 690 { 691 int ierr; 692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 693 694 PetscFunctionBegin; 695 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 696 if (a->rstart != a->cstart || a->rend != a->cend) { 697 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 698 } 699 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatScale_MPIAIJ" 705 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 706 { 707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 708 int ierr; 709 710 PetscFunctionBegin; 711 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 712 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatDestroy_MPIAIJ" 718 int MatDestroy_MPIAIJ(Mat mat) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 int ierr; 722 723 PetscFunctionBegin; 724 #if defined(PETSC_USE_LOG) 725 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 726 #endif 727 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 728 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 729 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 730 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 731 #if defined (PETSC_USE_CTABLE) 732 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 733 #else 734 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 735 #endif 736 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 737 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 738 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 739 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 740 ierr = PetscFree(aij);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatView_MPIAIJ_Binary" 746 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 747 { 748 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 749 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 750 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 751 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 752 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 753 PetscScalar *column_values; 754 755 PetscFunctionBegin; 756 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 757 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 758 nz = A->nz + B->nz; 759 if (rank == 0) { 760 header[0] = MAT_FILE_COOKIE; 761 header[1] = mat->M; 762 header[2] = mat->N; 763 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 764 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 765 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 766 /* get largest number of rows any processor has */ 767 rlen = mat->m; 768 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 769 for (i=1; i<size; i++) { 770 rlen = PetscMax(rlen,range[i+1] - range[i]); 771 } 772 } else { 773 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 774 rlen = mat->m; 775 } 776 777 /* load up the local row counts */ 778 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 779 for (i=0; i<mat->m; i++) { 780 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 781 } 782 783 /* store the row lengths to the file */ 784 if (rank == 0) { 785 MPI_Status status; 786 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 787 for (i=1; i<size; i++) { 788 rlen = range[i+1] - range[i]; 789 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 790 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 791 } 792 } else { 793 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 794 } 795 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 796 797 /* load up the local column indices */ 798 nzmax = nz; /* )th processor needs space a largest processor needs */ 799 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 800 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 801 cnt = 0; 802 for (i=0; i<mat->m; i++) { 803 for (j=B->i[i]; j<B->i[i+1]; j++) { 804 if ( (col = garray[B->j[j]]) > cstart) break; 805 column_indices[cnt++] = col; 806 } 807 for (k=A->i[i]; k<A->i[i+1]; k++) { 808 column_indices[cnt++] = A->j[k] + cstart; 809 } 810 for (; j<B->i[i+1]; j++) { 811 column_indices[cnt++] = garray[B->j[j]]; 812 } 813 } 814 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 815 816 /* store the column indices to the file */ 817 if (rank == 0) { 818 MPI_Status status; 819 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 820 for (i=1; i<size; i++) { 821 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 823 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 824 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 825 } 826 } else { 827 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 829 } 830 ierr = PetscFree(column_indices);CHKERRQ(ierr); 831 832 /* load up the local column values */ 833 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 834 cnt = 0; 835 for (i=0; i<mat->m; i++) { 836 for (j=B->i[i]; j<B->i[i+1]; j++) { 837 if ( garray[B->j[j]] > cstart) break; 838 column_values[cnt++] = B->a[j]; 839 } 840 for (k=A->i[i]; k<A->i[i+1]; k++) { 841 column_values[cnt++] = A->a[k]; 842 } 843 for (; j<B->i[i+1]; j++) { 844 column_values[cnt++] = B->a[j]; 845 } 846 } 847 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 848 849 /* store the column values to the file */ 850 if (rank == 0) { 851 MPI_Status status; 852 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 853 for (i=1; i<size; i++) { 854 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 856 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 857 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 858 } 859 } else { 860 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 861 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 862 } 863 ierr = PetscFree(column_values);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 869 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 870 { 871 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 872 int ierr,rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,isascii,flg,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (isascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 886 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 887 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 888 if (flg) { 889 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 890 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 891 } else { 892 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 893 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 894 } 895 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 896 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 897 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 899 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 900 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } else if (format == PETSC_VIEWER_ASCII_INFO) { 903 PetscFunctionReturn(0); 904 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 905 PetscFunctionReturn(0); 906 } 907 } else if (isbinary) { 908 if (size == 1) { 909 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 910 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 911 } else { 912 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 913 } 914 PetscFunctionReturn(0); 915 } else if (isdraw) { 916 PetscDraw draw; 917 PetscTruth isnull; 918 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 919 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 920 } 921 922 if (size == 1) { 923 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 924 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 925 } else { 926 /* assemble the entire matrix onto first processor. */ 927 Mat A; 928 Mat_SeqAIJ *Aloc; 929 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 930 PetscScalar *a; 931 932 if (!rank) { 933 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 934 } else { 935 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 936 } 937 PetscLogObjectParent(mat,A); 938 939 /* copy over the A part */ 940 Aloc = (Mat_SeqAIJ*)aij->A->data; 941 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 942 row = aij->rstart; 943 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 944 for (i=0; i<m; i++) { 945 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 946 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 947 } 948 aj = Aloc->j; 949 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 950 951 /* copy over the B part */ 952 Aloc = (Mat_SeqAIJ*)aij->B->data; 953 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 954 row = aij->rstart; 955 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 956 ct = cols; 957 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 958 for (i=0; i<m; i++) { 959 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 960 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 961 } 962 ierr = PetscFree(ct);CHKERRQ(ierr); 963 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 964 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 965 /* 966 Everyone has to call to draw the matrix since the graphics waits are 967 synchronized across all processors that share the PetscDraw object 968 */ 969 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 970 if (!rank) { 971 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 972 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 973 } 974 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 975 ierr = MatDestroy(A);CHKERRQ(ierr); 976 } 977 PetscFunctionReturn(0); 978 } 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatView_MPIAIJ" 982 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 983 { 984 int ierr; 985 PetscTruth isascii,isdraw,issocket,isbinary; 986 987 PetscFunctionBegin; 988 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 989 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 990 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 991 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 992 if (isascii || isdraw || isbinary || issocket) { 993 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 994 } else { 995 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 996 } 997 PetscFunctionReturn(0); 998 } 999 1000 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatRelax_MPIAIJ" 1004 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1005 { 1006 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1007 int ierr; 1008 Vec bb1; 1009 PetscScalar mone=-1.0; 1010 1011 PetscFunctionBegin; 1012 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1013 1014 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1015 1016 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1017 if (flag & SOR_ZERO_INITIAL_GUESS) { 1018 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1019 its--; 1020 } 1021 1022 while (its--) { 1023 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1024 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1025 1026 /* update rhs: bb1 = bb - B*x */ 1027 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1028 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1029 1030 /* local sweep */ 1031 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1032 CHKERRQ(ierr); 1033 } 1034 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1035 if (flag & SOR_ZERO_INITIAL_GUESS) { 1036 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1037 its--; 1038 } 1039 while (its--) { 1040 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1042 1043 /* update rhs: bb1 = bb - B*x */ 1044 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1045 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1046 1047 /* local sweep */ 1048 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1049 CHKERRQ(ierr); 1050 } 1051 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1052 if (flag & SOR_ZERO_INITIAL_GUESS) { 1053 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1054 its--; 1055 } 1056 while (its--) { 1057 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1058 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1059 1060 /* update rhs: bb1 = bb - B*x */ 1061 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1062 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1063 1064 /* local sweep */ 1065 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1066 CHKERRQ(ierr); 1067 } 1068 } else { 1069 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1070 } 1071 1072 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1078 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1079 { 1080 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1081 Mat A = mat->A,B = mat->B; 1082 int ierr; 1083 PetscReal isend[5],irecv[5]; 1084 1085 PetscFunctionBegin; 1086 info->block_size = 1.0; 1087 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1088 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1089 isend[3] = info->memory; isend[4] = info->mallocs; 1090 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1091 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1092 isend[3] += info->memory; isend[4] += info->mallocs; 1093 if (flag == MAT_LOCAL) { 1094 info->nz_used = isend[0]; 1095 info->nz_allocated = isend[1]; 1096 info->nz_unneeded = isend[2]; 1097 info->memory = isend[3]; 1098 info->mallocs = isend[4]; 1099 } else if (flag == MAT_GLOBAL_MAX) { 1100 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1101 info->nz_used = irecv[0]; 1102 info->nz_allocated = irecv[1]; 1103 info->nz_unneeded = irecv[2]; 1104 info->memory = irecv[3]; 1105 info->mallocs = irecv[4]; 1106 } else if (flag == MAT_GLOBAL_SUM) { 1107 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1108 info->nz_used = irecv[0]; 1109 info->nz_allocated = irecv[1]; 1110 info->nz_unneeded = irecv[2]; 1111 info->memory = irecv[3]; 1112 info->mallocs = irecv[4]; 1113 } 1114 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1115 info->fill_ratio_needed = 0; 1116 info->factor_mallocs = 0; 1117 info->rows_global = (double)matin->M; 1118 info->columns_global = (double)matin->N; 1119 info->rows_local = (double)matin->m; 1120 info->columns_local = (double)matin->N; 1121 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatSetOption_MPIAIJ" 1127 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1128 { 1129 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1130 int ierr; 1131 1132 PetscFunctionBegin; 1133 switch (op) { 1134 case MAT_NO_NEW_NONZERO_LOCATIONS: 1135 case MAT_YES_NEW_NONZERO_LOCATIONS: 1136 case MAT_COLUMNS_UNSORTED: 1137 case MAT_COLUMNS_SORTED: 1138 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1139 case MAT_KEEP_ZEROED_ROWS: 1140 case MAT_NEW_NONZERO_LOCATION_ERR: 1141 case MAT_USE_INODES: 1142 case MAT_DO_NOT_USE_INODES: 1143 case MAT_IGNORE_ZERO_ENTRIES: 1144 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1145 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1146 break; 1147 case MAT_ROW_ORIENTED: 1148 a->roworiented = PETSC_TRUE; 1149 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1150 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1151 break; 1152 case MAT_ROWS_SORTED: 1153 case MAT_ROWS_UNSORTED: 1154 case MAT_YES_NEW_DIAGONALS: 1155 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1156 break; 1157 case MAT_COLUMN_ORIENTED: 1158 a->roworiented = PETSC_FALSE; 1159 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1160 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1161 break; 1162 case MAT_IGNORE_OFF_PROC_ENTRIES: 1163 a->donotstash = PETSC_TRUE; 1164 break; 1165 case MAT_NO_NEW_DIAGONALS: 1166 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1167 case MAT_SYMMETRIC: 1168 case MAT_STRUCTURALLY_SYMMETRIC: 1169 case MAT_NOT_SYMMETRIC: 1170 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1171 case MAT_HERMITIAN: 1172 case MAT_NOT_HERMITIAN: 1173 case MAT_SYMMETRY_ETERNAL: 1174 case MAT_NOT_SYMMETRY_ETERNAL: 1175 break; 1176 default: 1177 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1178 } 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatGetRow_MPIAIJ" 1184 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1185 { 1186 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1187 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1188 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1189 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1190 int *cmap,*idx_p; 1191 1192 PetscFunctionBegin; 1193 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1194 mat->getrowactive = PETSC_TRUE; 1195 1196 if (!mat->rowvalues && (idx || v)) { 1197 /* 1198 allocate enough space to hold information from the longest row. 1199 */ 1200 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1201 int max = 1,tmp; 1202 for (i=0; i<matin->m; i++) { 1203 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1204 if (max < tmp) { max = tmp; } 1205 } 1206 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1207 mat->rowindices = (int*)(mat->rowvalues + max); 1208 } 1209 1210 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1211 lrow = row - rstart; 1212 1213 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1214 if (!v) {pvA = 0; pvB = 0;} 1215 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1216 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1217 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1218 nztot = nzA + nzB; 1219 1220 cmap = mat->garray; 1221 if (v || idx) { 1222 if (nztot) { 1223 /* Sort by increasing column numbers, assuming A and B already sorted */ 1224 int imark = -1; 1225 if (v) { 1226 *v = v_p = mat->rowvalues; 1227 for (i=0; i<nzB; i++) { 1228 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1229 else break; 1230 } 1231 imark = i; 1232 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1233 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1234 } 1235 if (idx) { 1236 *idx = idx_p = mat->rowindices; 1237 if (imark > -1) { 1238 for (i=0; i<imark; i++) { 1239 idx_p[i] = cmap[cworkB[i]]; 1240 } 1241 } else { 1242 for (i=0; i<nzB; i++) { 1243 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1244 else break; 1245 } 1246 imark = i; 1247 } 1248 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1249 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1250 } 1251 } else { 1252 if (idx) *idx = 0; 1253 if (v) *v = 0; 1254 } 1255 } 1256 *nz = nztot; 1257 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1258 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1264 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1265 { 1266 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1267 1268 PetscFunctionBegin; 1269 if (aij->getrowactive == PETSC_FALSE) { 1270 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1271 } 1272 aij->getrowactive = PETSC_FALSE; 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatNorm_MPIAIJ" 1278 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1279 { 1280 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1281 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1282 int ierr,i,j,cstart = aij->cstart; 1283 PetscReal sum = 0.0; 1284 PetscScalar *v; 1285 1286 PetscFunctionBegin; 1287 if (aij->size == 1) { 1288 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1289 } else { 1290 if (type == NORM_FROBENIUS) { 1291 v = amat->a; 1292 for (i=0; i<amat->nz; i++) { 1293 #if defined(PETSC_USE_COMPLEX) 1294 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1295 #else 1296 sum += (*v)*(*v); v++; 1297 #endif 1298 } 1299 v = bmat->a; 1300 for (i=0; i<bmat->nz; i++) { 1301 #if defined(PETSC_USE_COMPLEX) 1302 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1303 #else 1304 sum += (*v)*(*v); v++; 1305 #endif 1306 } 1307 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1308 *norm = sqrt(*norm); 1309 } else if (type == NORM_1) { /* max column norm */ 1310 PetscReal *tmp,*tmp2; 1311 int *jj,*garray = aij->garray; 1312 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1313 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1314 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1315 *norm = 0.0; 1316 v = amat->a; jj = amat->j; 1317 for (j=0; j<amat->nz; j++) { 1318 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1319 } 1320 v = bmat->a; jj = bmat->j; 1321 for (j=0; j<bmat->nz; j++) { 1322 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1323 } 1324 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1325 for (j=0; j<mat->N; j++) { 1326 if (tmp2[j] > *norm) *norm = tmp2[j]; 1327 } 1328 ierr = PetscFree(tmp);CHKERRQ(ierr); 1329 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1330 } else if (type == NORM_INFINITY) { /* max row norm */ 1331 PetscReal ntemp = 0.0; 1332 for (j=0; j<aij->A->m; j++) { 1333 v = amat->a + amat->i[j]; 1334 sum = 0.0; 1335 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1336 sum += PetscAbsScalar(*v); v++; 1337 } 1338 v = bmat->a + bmat->i[j]; 1339 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1340 sum += PetscAbsScalar(*v); v++; 1341 } 1342 if (sum > ntemp) ntemp = sum; 1343 } 1344 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1345 } else { 1346 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1347 } 1348 } 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatTranspose_MPIAIJ" 1354 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1355 { 1356 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1357 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1358 int ierr; 1359 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1360 Mat B; 1361 PetscScalar *array; 1362 1363 PetscFunctionBegin; 1364 if (!matout && M != N) { 1365 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1366 } 1367 1368 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1369 1370 /* copy over the A part */ 1371 Aloc = (Mat_SeqAIJ*)a->A->data; 1372 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1373 row = a->rstart; 1374 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1375 for (i=0; i<m; i++) { 1376 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1377 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1378 } 1379 aj = Aloc->j; 1380 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1381 1382 /* copy over the B part */ 1383 Aloc = (Mat_SeqAIJ*)a->B->data; 1384 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1385 row = a->rstart; 1386 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1387 ct = cols; 1388 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1389 for (i=0; i<m; i++) { 1390 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1391 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1392 } 1393 ierr = PetscFree(ct);CHKERRQ(ierr); 1394 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1395 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1396 if (matout) { 1397 *matout = B; 1398 } else { 1399 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1406 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1407 { 1408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1409 Mat a = aij->A,b = aij->B; 1410 int ierr,s1,s2,s3; 1411 1412 PetscFunctionBegin; 1413 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1414 if (rr) { 1415 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1416 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1417 /* Overlap communication with computation. */ 1418 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1419 } 1420 if (ll) { 1421 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1422 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1423 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1424 } 1425 /* scale the diagonal block */ 1426 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1427 1428 if (rr) { 1429 /* Do a scatter end and then right scale the off-diagonal block */ 1430 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1431 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1432 } 1433 1434 PetscFunctionReturn(0); 1435 } 1436 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1440 int MatPrintHelp_MPIAIJ(Mat A) 1441 { 1442 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1443 int ierr; 1444 1445 PetscFunctionBegin; 1446 if (!a->rank) { 1447 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1448 } 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1454 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1455 { 1456 PetscFunctionBegin; 1457 *bs = 1; 1458 PetscFunctionReturn(0); 1459 } 1460 #undef __FUNCT__ 1461 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1462 int MatSetUnfactored_MPIAIJ(Mat A) 1463 { 1464 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1465 int ierr; 1466 1467 PetscFunctionBegin; 1468 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatEqual_MPIAIJ" 1474 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1475 { 1476 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1477 Mat a,b,c,d; 1478 PetscTruth flg; 1479 int ierr; 1480 1481 PetscFunctionBegin; 1482 a = matA->A; b = matA->B; 1483 c = matB->A; d = matB->B; 1484 1485 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1486 if (flg == PETSC_TRUE) { 1487 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1488 } 1489 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatCopy_MPIAIJ" 1495 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1496 { 1497 int ierr; 1498 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1499 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1500 1501 PetscFunctionBegin; 1502 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1503 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1504 /* because of the column compression in the off-processor part of the matrix a->B, 1505 the number of columns in a->B and b->B may be different, hence we cannot call 1506 the MatCopy() directly on the two parts. If need be, we can provide a more 1507 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1508 then copying the submatrices */ 1509 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1510 } else { 1511 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1512 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1513 } 1514 PetscFunctionReturn(0); 1515 } 1516 1517 #undef __FUNCT__ 1518 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1519 int MatSetUpPreallocation_MPIAIJ(Mat A) 1520 { 1521 int ierr; 1522 1523 PetscFunctionBegin; 1524 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1525 PetscFunctionReturn(0); 1526 } 1527 1528 #include "petscblaslapack.h" 1529 #undef __FUNCT__ 1530 #define __FUNCT__ "MatAXPY_MPIAIJ" 1531 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1532 { 1533 int ierr,one=1,i; 1534 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1535 Mat_SeqAIJ *x,*y; 1536 1537 PetscFunctionBegin; 1538 if (str == SAME_NONZERO_PATTERN) { 1539 x = (Mat_SeqAIJ *)xx->A->data; 1540 y = (Mat_SeqAIJ *)yy->A->data; 1541 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1542 x = (Mat_SeqAIJ *)xx->B->data; 1543 y = (Mat_SeqAIJ *)yy->B->data; 1544 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1545 } else if (str == SUBSET_NONZERO_PATTERN) { 1546 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1547 1548 x = (Mat_SeqAIJ *)xx->B->data; 1549 y = (Mat_SeqAIJ *)yy->B->data; 1550 if (y->xtoy && y->XtoY != xx->B) { 1551 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1552 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1553 } 1554 if (!y->xtoy) { /* get xtoy */ 1555 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1556 y->XtoY = xx->B; 1557 } 1558 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1559 } else { 1560 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1561 } 1562 PetscFunctionReturn(0); 1563 } 1564 1565 /* -------------------------------------------------------------------*/ 1566 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1567 MatGetRow_MPIAIJ, 1568 MatRestoreRow_MPIAIJ, 1569 MatMult_MPIAIJ, 1570 /* 4*/ MatMultAdd_MPIAIJ, 1571 MatMultTranspose_MPIAIJ, 1572 MatMultTransposeAdd_MPIAIJ, 1573 0, 1574 0, 1575 0, 1576 /*10*/ 0, 1577 0, 1578 0, 1579 MatRelax_MPIAIJ, 1580 MatTranspose_MPIAIJ, 1581 /*15*/ MatGetInfo_MPIAIJ, 1582 MatEqual_MPIAIJ, 1583 MatGetDiagonal_MPIAIJ, 1584 MatDiagonalScale_MPIAIJ, 1585 MatNorm_MPIAIJ, 1586 /*20*/ MatAssemblyBegin_MPIAIJ, 1587 MatAssemblyEnd_MPIAIJ, 1588 0, 1589 MatSetOption_MPIAIJ, 1590 MatZeroEntries_MPIAIJ, 1591 /*25*/ MatZeroRows_MPIAIJ, 1592 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1593 MatLUFactorSymbolic_MPIAIJ_TFS, 1594 #else 1595 0, 1596 #endif 1597 0, 1598 0, 1599 0, 1600 /*30*/ MatSetUpPreallocation_MPIAIJ, 1601 0, 1602 0, 1603 0, 1604 0, 1605 /*35*/ MatDuplicate_MPIAIJ, 1606 0, 1607 0, 1608 0, 1609 0, 1610 /*40*/ MatAXPY_MPIAIJ, 1611 MatGetSubMatrices_MPIAIJ, 1612 MatIncreaseOverlap_MPIAIJ, 1613 MatGetValues_MPIAIJ, 1614 MatCopy_MPIAIJ, 1615 /*45*/ MatPrintHelp_MPIAIJ, 1616 MatScale_MPIAIJ, 1617 0, 1618 0, 1619 0, 1620 /*50*/ MatGetBlockSize_MPIAIJ, 1621 0, 1622 0, 1623 0, 1624 0, 1625 /*55*/ MatFDColoringCreate_MPIAIJ, 1626 0, 1627 MatSetUnfactored_MPIAIJ, 1628 0, 1629 0, 1630 /*60*/ MatGetSubMatrix_MPIAIJ, 1631 MatDestroy_MPIAIJ, 1632 MatView_MPIAIJ, 1633 MatGetPetscMaps_Petsc, 1634 0, 1635 /*65*/ 0, 1636 0, 1637 0, 1638 0, 1639 0, 1640 /*70*/ 0, 1641 0, 1642 MatSetColoring_MPIAIJ, 1643 MatSetValuesAdic_MPIAIJ, 1644 MatSetValuesAdifor_MPIAIJ, 1645 /*75*/ 0, 1646 0, 1647 0, 1648 0, 1649 0, 1650 /*80*/ 0, 1651 0, 1652 0, 1653 0, 1654 /*85*/ MatLoad_MPIAIJ}; 1655 1656 /* ----------------------------------------------------------------------------------------*/ 1657 1658 EXTERN_C_BEGIN 1659 #undef __FUNCT__ 1660 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1661 int MatStoreValues_MPIAIJ(Mat mat) 1662 { 1663 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1664 int ierr; 1665 1666 PetscFunctionBegin; 1667 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1668 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1669 PetscFunctionReturn(0); 1670 } 1671 EXTERN_C_END 1672 1673 EXTERN_C_BEGIN 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1676 int MatRetrieveValues_MPIAIJ(Mat mat) 1677 { 1678 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1679 int ierr; 1680 1681 PetscFunctionBegin; 1682 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1683 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1684 PetscFunctionReturn(0); 1685 } 1686 EXTERN_C_END 1687 1688 #include "petscpc.h" 1689 EXTERN_C_BEGIN 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1692 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1693 { 1694 Mat_MPIAIJ *b; 1695 int ierr,i; 1696 1697 PetscFunctionBegin; 1698 B->preallocated = PETSC_TRUE; 1699 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1700 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1701 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1702 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1703 if (d_nnz) { 1704 for (i=0; i<B->m; i++) { 1705 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1706 } 1707 } 1708 if (o_nnz) { 1709 for (i=0; i<B->m; i++) { 1710 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1711 } 1712 } 1713 b = (Mat_MPIAIJ*)B->data; 1714 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1715 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1716 1717 PetscFunctionReturn(0); 1718 } 1719 EXTERN_C_END 1720 1721 /*MC 1722 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1723 1724 Options Database Keys: 1725 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1726 1727 Level: beginner 1728 1729 .seealso: MatCreateMPIAIJ 1730 M*/ 1731 1732 EXTERN_C_BEGIN 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatCreate_MPIAIJ" 1735 int MatCreate_MPIAIJ(Mat B) 1736 { 1737 Mat_MPIAIJ *b; 1738 int ierr,i,size; 1739 1740 PetscFunctionBegin; 1741 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1742 1743 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1744 B->data = (void*)b; 1745 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1746 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1747 B->factor = 0; 1748 B->assembled = PETSC_FALSE; 1749 B->mapping = 0; 1750 1751 B->insertmode = NOT_SET_VALUES; 1752 b->size = size; 1753 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1754 1755 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1756 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1757 1758 /* the information in the maps duplicates the information computed below, eventually 1759 we should remove the duplicate information that is not contained in the maps */ 1760 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1761 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1762 1763 /* build local table of row and column ownerships */ 1764 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1765 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1766 b->cowners = b->rowners + b->size + 2; 1767 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1768 b->rowners[0] = 0; 1769 for (i=2; i<=b->size; i++) { 1770 b->rowners[i] += b->rowners[i-1]; 1771 } 1772 b->rstart = b->rowners[b->rank]; 1773 b->rend = b->rowners[b->rank+1]; 1774 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1775 b->cowners[0] = 0; 1776 for (i=2; i<=b->size; i++) { 1777 b->cowners[i] += b->cowners[i-1]; 1778 } 1779 b->cstart = b->cowners[b->rank]; 1780 b->cend = b->cowners[b->rank+1]; 1781 1782 /* build cache for off array entries formed */ 1783 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1784 b->donotstash = PETSC_FALSE; 1785 b->colmap = 0; 1786 b->garray = 0; 1787 b->roworiented = PETSC_TRUE; 1788 1789 /* stuff used for matrix vector multiply */ 1790 b->lvec = PETSC_NULL; 1791 b->Mvctx = PETSC_NULL; 1792 1793 /* stuff for MatGetRow() */ 1794 b->rowindices = 0; 1795 b->rowvalues = 0; 1796 b->getrowactive = PETSC_FALSE; 1797 1798 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1799 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1800 PetscLogObjectParent(B,b->A); 1801 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1802 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1803 PetscLogObjectParent(B,b->B); 1804 1805 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1806 "MatStoreValues_MPIAIJ", 1807 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1808 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1809 "MatRetrieveValues_MPIAIJ", 1810 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1811 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1812 "MatGetDiagonalBlock_MPIAIJ", 1813 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1814 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1815 "MatIsTranspose_MPIAIJ", 1816 MatIsTranspose_MPIAIJ); CHKERRQ(ierr); 1817 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1818 "MatMPIAIJSetPreallocation_MPIAIJ", 1819 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1820 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1821 "MatDiagonalScaleLocal_MPIAIJ", 1822 MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 EXTERN_C_END 1826 1827 /*MC 1828 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1829 1830 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1831 and MATMPIAIJ otherwise. 1832 1833 Options Database Keys: 1834 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1835 1836 Level: beginner 1837 1838 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1839 M*/ 1840 1841 EXTERN_C_BEGIN 1842 #undef __FUNCT__ 1843 #define __FUNCT__ "MatCreate_AIJ" 1844 int MatCreate_AIJ(Mat A) { 1845 int ierr,size; 1846 1847 PetscFunctionBegin; 1848 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1849 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1850 if (size == 1) { 1851 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1852 } else { 1853 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1854 } 1855 PetscFunctionReturn(0); 1856 } 1857 EXTERN_C_END 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1861 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1862 { 1863 Mat mat; 1864 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1865 int ierr; 1866 1867 PetscFunctionBegin; 1868 *newmat = 0; 1869 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1870 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1871 a = (Mat_MPIAIJ*)mat->data; 1872 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1873 mat->factor = matin->factor; 1874 mat->assembled = PETSC_TRUE; 1875 mat->insertmode = NOT_SET_VALUES; 1876 mat->preallocated = PETSC_TRUE; 1877 1878 a->rstart = oldmat->rstart; 1879 a->rend = oldmat->rend; 1880 a->cstart = oldmat->cstart; 1881 a->cend = oldmat->cend; 1882 a->size = oldmat->size; 1883 a->rank = oldmat->rank; 1884 a->donotstash = oldmat->donotstash; 1885 a->roworiented = oldmat->roworiented; 1886 a->rowindices = 0; 1887 a->rowvalues = 0; 1888 a->getrowactive = PETSC_FALSE; 1889 1890 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1891 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1892 if (oldmat->colmap) { 1893 #if defined (PETSC_USE_CTABLE) 1894 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1895 #else 1896 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1897 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1898 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1899 #endif 1900 } else a->colmap = 0; 1901 if (oldmat->garray) { 1902 int len; 1903 len = oldmat->B->n; 1904 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1905 PetscLogObjectMemory(mat,len*sizeof(int)); 1906 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1907 } else a->garray = 0; 1908 1909 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1910 PetscLogObjectParent(mat,a->lvec); 1911 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1912 PetscLogObjectParent(mat,a->Mvctx); 1913 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1914 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1915 PetscLogObjectParent(mat,a->A); 1916 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1917 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1918 PetscLogObjectParent(mat,a->B); 1919 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1920 *newmat = mat; 1921 PetscFunctionReturn(0); 1922 } 1923 1924 #include "petscsys.h" 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "MatLoad_MPIAIJ" 1928 int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1929 { 1930 Mat A; 1931 PetscScalar *vals,*svals; 1932 MPI_Comm comm = ((PetscObject)viewer)->comm; 1933 MPI_Status status; 1934 int i,nz,ierr,j,rstart,rend,fd; 1935 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1936 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1937 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1938 1939 PetscFunctionBegin; 1940 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1941 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1942 if (!rank) { 1943 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1944 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1945 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1946 if (header[3] < 0) { 1947 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1948 } 1949 } 1950 1951 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1952 M = header[1]; N = header[2]; 1953 /* determine ownership of all rows */ 1954 m = M/size + ((M % size) > rank); 1955 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1956 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1957 rowners[0] = 0; 1958 for (i=2; i<=size; i++) { 1959 rowners[i] += rowners[i-1]; 1960 } 1961 rstart = rowners[rank]; 1962 rend = rowners[rank+1]; 1963 1964 /* distribute row lengths to all processors */ 1965 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1966 offlens = ourlens + (rend-rstart); 1967 if (!rank) { 1968 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1969 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1970 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1971 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1972 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1973 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1974 } else { 1975 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1976 } 1977 1978 if (!rank) { 1979 /* calculate the number of nonzeros on each processor */ 1980 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1981 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1982 for (i=0; i<size; i++) { 1983 for (j=rowners[i]; j< rowners[i+1]; j++) { 1984 procsnz[i] += rowlengths[j]; 1985 } 1986 } 1987 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1988 1989 /* determine max buffer needed and allocate it */ 1990 maxnz = 0; 1991 for (i=0; i<size; i++) { 1992 maxnz = PetscMax(maxnz,procsnz[i]); 1993 } 1994 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1995 1996 /* read in my part of the matrix column indices */ 1997 nz = procsnz[0]; 1998 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1999 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2000 2001 /* read in every one elses and ship off */ 2002 for (i=1; i<size; i++) { 2003 nz = procsnz[i]; 2004 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2005 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2006 } 2007 ierr = PetscFree(cols);CHKERRQ(ierr); 2008 } else { 2009 /* determine buffer space needed for message */ 2010 nz = 0; 2011 for (i=0; i<m; i++) { 2012 nz += ourlens[i]; 2013 } 2014 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2015 2016 /* receive message of column indices*/ 2017 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2018 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2019 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2020 } 2021 2022 /* determine column ownership if matrix is not square */ 2023 if (N != M) { 2024 n = N/size + ((N % size) > rank); 2025 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2026 cstart = cend - n; 2027 } else { 2028 cstart = rstart; 2029 cend = rend; 2030 n = cend - cstart; 2031 } 2032 2033 /* loop over local rows, determining number of off diagonal entries */ 2034 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2035 jj = 0; 2036 for (i=0; i<m; i++) { 2037 for (j=0; j<ourlens[i]; j++) { 2038 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2039 jj++; 2040 } 2041 } 2042 2043 /* create our matrix */ 2044 for (i=0; i<m; i++) { 2045 ourlens[i] -= offlens[i]; 2046 } 2047 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2048 ierr = MatSetType(A,type);CHKERRQ(ierr); 2049 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2050 2051 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2052 for (i=0; i<m; i++) { 2053 ourlens[i] += offlens[i]; 2054 } 2055 2056 if (!rank) { 2057 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2058 2059 /* read in my part of the matrix numerical values */ 2060 nz = procsnz[0]; 2061 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2062 2063 /* insert into matrix */ 2064 jj = rstart; 2065 smycols = mycols; 2066 svals = vals; 2067 for (i=0; i<m; i++) { 2068 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2069 smycols += ourlens[i]; 2070 svals += ourlens[i]; 2071 jj++; 2072 } 2073 2074 /* read in other processors and ship out */ 2075 for (i=1; i<size; i++) { 2076 nz = procsnz[i]; 2077 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2078 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2079 } 2080 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2081 } else { 2082 /* receive numeric values */ 2083 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2084 2085 /* receive message of values*/ 2086 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2087 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2088 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2089 2090 /* insert into matrix */ 2091 jj = rstart; 2092 smycols = mycols; 2093 svals = vals; 2094 for (i=0; i<m; i++) { 2095 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2096 smycols += ourlens[i]; 2097 svals += ourlens[i]; 2098 jj++; 2099 } 2100 } 2101 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2102 ierr = PetscFree(vals);CHKERRQ(ierr); 2103 ierr = PetscFree(mycols);CHKERRQ(ierr); 2104 ierr = PetscFree(rowners);CHKERRQ(ierr); 2105 2106 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2107 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2108 *newmat = A; 2109 PetscFunctionReturn(0); 2110 } 2111 2112 #undef __FUNCT__ 2113 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2114 /* 2115 Not great since it makes two copies of the submatrix, first an SeqAIJ 2116 in local and then by concatenating the local matrices the end result. 2117 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2118 */ 2119 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2120 { 2121 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2122 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2123 Mat *local,M,Mreuse; 2124 PetscScalar *vwork,*aa; 2125 MPI_Comm comm = mat->comm; 2126 Mat_SeqAIJ *aij; 2127 2128 2129 PetscFunctionBegin; 2130 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2131 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2132 2133 if (call == MAT_REUSE_MATRIX) { 2134 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2135 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2136 local = &Mreuse; 2137 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2138 } else { 2139 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2140 Mreuse = *local; 2141 ierr = PetscFree(local);CHKERRQ(ierr); 2142 } 2143 2144 /* 2145 m - number of local rows 2146 n - number of columns (same on all processors) 2147 rstart - first row in new global matrix generated 2148 */ 2149 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2150 if (call == MAT_INITIAL_MATRIX) { 2151 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2152 ii = aij->i; 2153 jj = aij->j; 2154 2155 /* 2156 Determine the number of non-zeros in the diagonal and off-diagonal 2157 portions of the matrix in order to do correct preallocation 2158 */ 2159 2160 /* first get start and end of "diagonal" columns */ 2161 if (csize == PETSC_DECIDE) { 2162 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2163 if (mglobal == n) { /* square matrix */ 2164 nlocal = m; 2165 } else { 2166 nlocal = n/size + ((n % size) > rank); 2167 } 2168 } else { 2169 nlocal = csize; 2170 } 2171 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2172 rstart = rend - nlocal; 2173 if (rank == size - 1 && rend != n) { 2174 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2175 } 2176 2177 /* next, compute all the lengths */ 2178 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2179 olens = dlens + m; 2180 for (i=0; i<m; i++) { 2181 jend = ii[i+1] - ii[i]; 2182 olen = 0; 2183 dlen = 0; 2184 for (j=0; j<jend; j++) { 2185 if (*jj < rstart || *jj >= rend) olen++; 2186 else dlen++; 2187 jj++; 2188 } 2189 olens[i] = olen; 2190 dlens[i] = dlen; 2191 } 2192 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2193 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2194 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2195 ierr = PetscFree(dlens);CHKERRQ(ierr); 2196 } else { 2197 int ml,nl; 2198 2199 M = *newmat; 2200 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2201 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2202 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2203 /* 2204 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2205 rather than the slower MatSetValues(). 2206 */ 2207 M->was_assembled = PETSC_TRUE; 2208 M->assembled = PETSC_FALSE; 2209 } 2210 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2211 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2212 ii = aij->i; 2213 jj = aij->j; 2214 aa = aij->a; 2215 for (i=0; i<m; i++) { 2216 row = rstart + i; 2217 nz = ii[i+1] - ii[i]; 2218 cwork = jj; jj += nz; 2219 vwork = aa; aa += nz; 2220 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2221 } 2222 2223 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2224 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2225 *newmat = M; 2226 2227 /* save submatrix used in processor for next request */ 2228 if (call == MAT_INITIAL_MATRIX) { 2229 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2230 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2231 } 2232 2233 PetscFunctionReturn(0); 2234 } 2235 2236 #undef __FUNCT__ 2237 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2238 /*@C 2239 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2240 (the default parallel PETSc format). For good matrix assembly performance 2241 the user should preallocate the matrix storage by setting the parameters 2242 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2243 performance can be increased by more than a factor of 50. 2244 2245 Collective on MPI_Comm 2246 2247 Input Parameters: 2248 + A - the matrix 2249 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2250 (same value is used for all local rows) 2251 . d_nnz - array containing the number of nonzeros in the various rows of the 2252 DIAGONAL portion of the local submatrix (possibly different for each row) 2253 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2254 The size of this array is equal to the number of local rows, i.e 'm'. 2255 You must leave room for the diagonal entry even if it is zero. 2256 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2257 submatrix (same value is used for all local rows). 2258 - o_nnz - array containing the number of nonzeros in the various rows of the 2259 OFF-DIAGONAL portion of the local submatrix (possibly different for 2260 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2261 structure. The size of this array is equal to the number 2262 of local rows, i.e 'm'. 2263 2264 The AIJ format (also called the Yale sparse matrix format or 2265 compressed row storage), is fully compatible with standard Fortran 77 2266 storage. That is, the stored row and column indices can begin at 2267 either one (as in Fortran) or zero. See the users manual for details. 2268 2269 The user MUST specify either the local or global matrix dimensions 2270 (possibly both). 2271 2272 The parallel matrix is partitioned such that the first m0 rows belong to 2273 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2274 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2275 2276 The DIAGONAL portion of the local submatrix of a processor can be defined 2277 as the submatrix which is obtained by extraction the part corresponding 2278 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2279 first row that belongs to the processor, and r2 is the last row belonging 2280 to the this processor. This is a square mxm matrix. The remaining portion 2281 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2282 2283 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2284 2285 By default, this format uses inodes (identical nodes) when possible. 2286 We search for consecutive rows with the same nonzero structure, thereby 2287 reusing matrix information to achieve increased efficiency. 2288 2289 Options Database Keys: 2290 + -mat_aij_no_inode - Do not use inodes 2291 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2292 - -mat_aij_oneindex - Internally use indexing starting at 1 2293 rather than 0. Note that when calling MatSetValues(), 2294 the user still MUST index entries starting at 0! 2295 2296 Example usage: 2297 2298 Consider the following 8x8 matrix with 34 non-zero values, that is 2299 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2300 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2301 as follows: 2302 2303 .vb 2304 1 2 0 | 0 3 0 | 0 4 2305 Proc0 0 5 6 | 7 0 0 | 8 0 2306 9 0 10 | 11 0 0 | 12 0 2307 ------------------------------------- 2308 13 0 14 | 15 16 17 | 0 0 2309 Proc1 0 18 0 | 19 20 21 | 0 0 2310 0 0 0 | 22 23 0 | 24 0 2311 ------------------------------------- 2312 Proc2 25 26 27 | 0 0 28 | 29 0 2313 30 0 0 | 31 32 33 | 0 34 2314 .ve 2315 2316 This can be represented as a collection of submatrices as: 2317 2318 .vb 2319 A B C 2320 D E F 2321 G H I 2322 .ve 2323 2324 Where the submatrices A,B,C are owned by proc0, D,E,F are 2325 owned by proc1, G,H,I are owned by proc2. 2326 2327 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2328 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2329 The 'M','N' parameters are 8,8, and have the same values on all procs. 2330 2331 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2332 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2333 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2334 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2335 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2336 matrix, ans [DF] as another SeqAIJ matrix. 2337 2338 When d_nz, o_nz parameters are specified, d_nz storage elements are 2339 allocated for every row of the local diagonal submatrix, and o_nz 2340 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2341 One way to choose d_nz and o_nz is to use the max nonzerors per local 2342 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2343 In this case, the values of d_nz,o_nz are: 2344 .vb 2345 proc0 : dnz = 2, o_nz = 2 2346 proc1 : dnz = 3, o_nz = 2 2347 proc2 : dnz = 1, o_nz = 4 2348 .ve 2349 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2350 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2351 for proc3. i.e we are using 12+15+10=37 storage locations to store 2352 34 values. 2353 2354 When d_nnz, o_nnz parameters are specified, the storage is specified 2355 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2356 In the above case the values for d_nnz,o_nnz are: 2357 .vb 2358 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2359 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2360 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2361 .ve 2362 Here the space allocated is sum of all the above values i.e 34, and 2363 hence pre-allocation is perfect. 2364 2365 Level: intermediate 2366 2367 .keywords: matrix, aij, compressed row, sparse, parallel 2368 2369 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2370 @*/ 2371 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2372 { 2373 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2374 2375 PetscFunctionBegin; 2376 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2377 if (f) { 2378 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2379 } 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatCreateMPIAIJ" 2385 /*@C 2386 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2387 (the default parallel PETSc format). For good matrix assembly performance 2388 the user should preallocate the matrix storage by setting the parameters 2389 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2390 performance can be increased by more than a factor of 50. 2391 2392 Collective on MPI_Comm 2393 2394 Input Parameters: 2395 + comm - MPI communicator 2396 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2397 This value should be the same as the local size used in creating the 2398 y vector for the matrix-vector product y = Ax. 2399 . n - This value should be the same as the local size used in creating the 2400 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2401 calculated if N is given) For square matrices n is almost always m. 2402 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2403 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2404 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2405 (same value is used for all local rows) 2406 . d_nnz - array containing the number of nonzeros in the various rows of the 2407 DIAGONAL portion of the local submatrix (possibly different for each row) 2408 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2409 The size of this array is equal to the number of local rows, i.e 'm'. 2410 You must leave room for the diagonal entry even if it is zero. 2411 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2412 submatrix (same value is used for all local rows). 2413 - o_nnz - array containing the number of nonzeros in the various rows of the 2414 OFF-DIAGONAL portion of the local submatrix (possibly different for 2415 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2416 structure. The size of this array is equal to the number 2417 of local rows, i.e 'm'. 2418 2419 Output Parameter: 2420 . A - the matrix 2421 2422 Notes: 2423 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2424 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2425 storage requirements for this matrix. 2426 2427 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2428 processor than it must be used on all processors that share the object for 2429 that argument. 2430 2431 The AIJ format (also called the Yale sparse matrix format or 2432 compressed row storage), is fully compatible with standard Fortran 77 2433 storage. That is, the stored row and column indices can begin at 2434 either one (as in Fortran) or zero. See the users manual for details. 2435 2436 The user MUST specify either the local or global matrix dimensions 2437 (possibly both). 2438 2439 The parallel matrix is partitioned such that the first m0 rows belong to 2440 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2441 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2442 2443 The DIAGONAL portion of the local submatrix of a processor can be defined 2444 as the submatrix which is obtained by extraction the part corresponding 2445 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2446 first row that belongs to the processor, and r2 is the last row belonging 2447 to the this processor. This is a square mxm matrix. The remaining portion 2448 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2449 2450 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2451 2452 When calling this routine with a single process communicator, a matrix of 2453 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2454 type of communicator, use the construction mechanism: 2455 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2456 2457 By default, this format uses inodes (identical nodes) when possible. 2458 We search for consecutive rows with the same nonzero structure, thereby 2459 reusing matrix information to achieve increased efficiency. 2460 2461 Options Database Keys: 2462 + -mat_aij_no_inode - Do not use inodes 2463 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2464 - -mat_aij_oneindex - Internally use indexing starting at 1 2465 rather than 0. Note that when calling MatSetValues(), 2466 the user still MUST index entries starting at 0! 2467 2468 2469 Example usage: 2470 2471 Consider the following 8x8 matrix with 34 non-zero values, that is 2472 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2473 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2474 as follows: 2475 2476 .vb 2477 1 2 0 | 0 3 0 | 0 4 2478 Proc0 0 5 6 | 7 0 0 | 8 0 2479 9 0 10 | 11 0 0 | 12 0 2480 ------------------------------------- 2481 13 0 14 | 15 16 17 | 0 0 2482 Proc1 0 18 0 | 19 20 21 | 0 0 2483 0 0 0 | 22 23 0 | 24 0 2484 ------------------------------------- 2485 Proc2 25 26 27 | 0 0 28 | 29 0 2486 30 0 0 | 31 32 33 | 0 34 2487 .ve 2488 2489 This can be represented as a collection of submatrices as: 2490 2491 .vb 2492 A B C 2493 D E F 2494 G H I 2495 .ve 2496 2497 Where the submatrices A,B,C are owned by proc0, D,E,F are 2498 owned by proc1, G,H,I are owned by proc2. 2499 2500 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2501 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2502 The 'M','N' parameters are 8,8, and have the same values on all procs. 2503 2504 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2505 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2506 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2507 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2508 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2509 matrix, ans [DF] as another SeqAIJ matrix. 2510 2511 When d_nz, o_nz parameters are specified, d_nz storage elements are 2512 allocated for every row of the local diagonal submatrix, and o_nz 2513 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2514 One way to choose d_nz and o_nz is to use the max nonzerors per local 2515 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2516 In this case, the values of d_nz,o_nz are: 2517 .vb 2518 proc0 : dnz = 2, o_nz = 2 2519 proc1 : dnz = 3, o_nz = 2 2520 proc2 : dnz = 1, o_nz = 4 2521 .ve 2522 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2523 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2524 for proc3. i.e we are using 12+15+10=37 storage locations to store 2525 34 values. 2526 2527 When d_nnz, o_nnz parameters are specified, the storage is specified 2528 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2529 In the above case the values for d_nnz,o_nnz are: 2530 .vb 2531 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2532 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2533 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2534 .ve 2535 Here the space allocated is sum of all the above values i.e 34, and 2536 hence pre-allocation is perfect. 2537 2538 Level: intermediate 2539 2540 .keywords: matrix, aij, compressed row, sparse, parallel 2541 2542 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2543 @*/ 2544 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2545 { 2546 int ierr,size; 2547 2548 PetscFunctionBegin; 2549 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2550 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2551 if (size > 1) { 2552 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2553 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2554 } else { 2555 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2556 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2557 } 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2563 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2564 { 2565 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2566 PetscFunctionBegin; 2567 *Ad = a->A; 2568 *Ao = a->B; 2569 *colmap = a->garray; 2570 PetscFunctionReturn(0); 2571 } 2572 2573 #undef __FUNCT__ 2574 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2575 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2576 { 2577 int ierr,i; 2578 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2579 2580 PetscFunctionBegin; 2581 if (coloring->ctype == IS_COLORING_LOCAL) { 2582 ISColoringValue *allcolors,*colors; 2583 ISColoring ocoloring; 2584 2585 /* set coloring for diagonal portion */ 2586 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2587 2588 /* set coloring for off-diagonal portion */ 2589 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2590 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2591 for (i=0; i<a->B->n; i++) { 2592 colors[i] = allcolors[a->garray[i]]; 2593 } 2594 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2595 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2596 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2597 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2598 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2599 ISColoringValue *colors; 2600 int *larray; 2601 ISColoring ocoloring; 2602 2603 /* set coloring for diagonal portion */ 2604 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2605 for (i=0; i<a->A->n; i++) { 2606 larray[i] = i + a->cstart; 2607 } 2608 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2609 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2610 for (i=0; i<a->A->n; i++) { 2611 colors[i] = coloring->colors[larray[i]]; 2612 } 2613 ierr = PetscFree(larray);CHKERRQ(ierr); 2614 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2615 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2616 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2617 2618 /* set coloring for off-diagonal portion */ 2619 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2620 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2621 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2622 for (i=0; i<a->B->n; i++) { 2623 colors[i] = coloring->colors[larray[i]]; 2624 } 2625 ierr = PetscFree(larray);CHKERRQ(ierr); 2626 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2627 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2628 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2629 } else { 2630 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2631 } 2632 2633 PetscFunctionReturn(0); 2634 } 2635 2636 #undef __FUNCT__ 2637 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2638 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2639 { 2640 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2641 int ierr; 2642 2643 PetscFunctionBegin; 2644 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2645 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2646 PetscFunctionReturn(0); 2647 } 2648 2649 #undef __FUNCT__ 2650 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2651 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2652 { 2653 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2654 int ierr; 2655 2656 PetscFunctionBegin; 2657 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2658 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2659 PetscFunctionReturn(0); 2660 } 2661 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "MatMerge" 2664 /*@C 2665 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2666 matrices from each processor 2667 2668 Collective on MPI_Comm 2669 2670 Input Parameters: 2671 + comm - the communicators the parallel matrix will live on 2672 - inmat - the input sequential matrices 2673 2674 Output Parameter: 2675 . outmat - the parallel matrix generated 2676 2677 Level: advanced 2678 2679 Notes: The number of columns of the matrix in EACH of the seperate files 2680 MUST be the same. 2681 2682 @*/ 2683 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2684 { 2685 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2686 PetscScalar *values; 2687 PetscMap columnmap,rowmap; 2688 2689 PetscFunctionBegin; 2690 2691 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2692 2693 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2694 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2695 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2696 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2697 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2698 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2699 2700 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2701 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2702 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2703 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2704 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2705 2706 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2707 for (i=0;i<m;i++) { 2708 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2709 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2710 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2711 } 2712 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2713 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2714 2715 for (i=0;i<m;i++) { 2716 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2717 I = i + rstart; 2718 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2719 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2720 } 2721 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2722 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2723 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2724 2725 PetscFunctionReturn(0); 2726 } 2727 2728 #undef __FUNCT__ 2729 #define __FUNCT__ "MatFileSplit" 2730 int MatFileSplit(Mat A,char *outfile) 2731 { 2732 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2733 PetscViewer out; 2734 char *name; 2735 Mat B; 2736 PetscScalar *values; 2737 2738 PetscFunctionBegin; 2739 2740 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2741 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2742 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2743 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2744 for (i=0;i<m;i++) { 2745 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2746 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2747 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2748 } 2749 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2750 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2751 2752 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2753 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2754 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2755 sprintf(name,"%s.%d",outfile,rank); 2756 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2757 ierr = PetscFree(name); 2758 ierr = MatView(B,out);CHKERRQ(ierr); 2759 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2760 ierr = MatDestroy(B);CHKERRQ(ierr); 2761 PetscFunctionReturn(0); 2762 } 2763