1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 int n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (int*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(int));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (int*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(int));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 int i,j,rstart = aij->rstart,rend = aij->rend; 192 int cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 int nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 int i,j,rstart = aij->rstart,rend = aij->rend; 278 int cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 int nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 int i,j,rstart,ncols,n,flg; 351 int *row,*col,other_disassembled; 352 PetscScalar *val; 353 InsertMode addv = mat->insertmode; 354 355 PetscFunctionBegin; 356 if (!aij->donotstash) { 357 while (1) { 358 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 359 if (!flg) break; 360 361 for (i=0; i<n;) { 362 /* Now identify the consecutive vals belonging to the same row */ 363 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 364 if (j < n) ncols = j-i; 365 else ncols = n-i; 366 /* Now assemble all these values with a single function call */ 367 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 368 i = j; 369 } 370 } 371 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 372 } 373 374 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 375 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 376 377 /* determine if any processor has disassembled, if so we must 378 also disassemble ourselfs, in order that we may reassemble. */ 379 /* 380 if nonzero structure of submatrix B cannot change then we know that 381 no processor disassembled thus we can skip this stuff 382 */ 383 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 384 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 385 if (mat->was_assembled && !other_disassembled) { 386 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 387 } 388 } 389 390 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 391 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 392 } 393 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 395 396 if (aij->rowvalues) { 397 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 398 aij->rowvalues = 0; 399 } 400 401 /* used by MatAXPY() */ 402 a->xtoy = 0; b->xtoy = 0; 403 a->XtoY = 0; b->XtoY = 0; 404 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 410 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 411 { 412 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 413 PetscErrorCode ierr; 414 415 PetscFunctionBegin; 416 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 417 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatZeroRows_MPIAIJ" 423 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 424 { 425 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 426 PetscErrorCode ierr; 427 int i,N,*rows,*owners = l->rowners,size = l->size; 428 int *nprocs,j,idx,nsends,row; 429 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 430 int *rvalues,tag = A->tag,count,base,slen,n,*source; 431 int *lens,imdex,*lrows,*values,rstart=l->rstart; 432 MPI_Comm comm = A->comm; 433 MPI_Request *send_waits,*recv_waits; 434 MPI_Status recv_status,*send_status; 435 IS istmp; 436 PetscTruth found; 437 438 PetscFunctionBegin; 439 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 440 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 441 442 /* first count number of contributors to each processor */ 443 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 444 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 445 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 446 for (i=0; i<N; i++) { 447 idx = rows[i]; 448 found = PETSC_FALSE; 449 for (j=0; j<size; j++) { 450 if (idx >= owners[j] && idx < owners[j+1]) { 451 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 452 } 453 } 454 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 455 } 456 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 457 458 /* inform other processors of number of messages and max length*/ 459 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 460 461 /* post receives: */ 462 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 463 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 464 for (i=0; i<nrecvs; i++) { 465 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 466 } 467 468 /* do sends: 469 1) starts[i] gives the starting index in svalues for stuff going to 470 the ith processor 471 */ 472 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 473 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 474 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 475 starts[0] = 0; 476 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 477 for (i=0; i<N; i++) { 478 svalues[starts[owner[i]]++] = rows[i]; 479 } 480 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 481 482 starts[0] = 0; 483 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 484 count = 0; 485 for (i=0; i<size; i++) { 486 if (nprocs[2*i+1]) { 487 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 488 } 489 } 490 ierr = PetscFree(starts);CHKERRQ(ierr); 491 492 base = owners[rank]; 493 494 /* wait on receives */ 495 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 496 source = lens + nrecvs; 497 count = nrecvs; slen = 0; 498 while (count) { 499 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 500 /* unpack receives into our local space */ 501 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 502 source[imdex] = recv_status.MPI_SOURCE; 503 lens[imdex] = n; 504 slen += n; 505 count--; 506 } 507 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 508 509 /* move the data into the send scatter */ 510 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 511 count = 0; 512 for (i=0; i<nrecvs; i++) { 513 values = rvalues + i*nmax; 514 for (j=0; j<lens[i]; j++) { 515 lrows[count++] = values[j] - base; 516 } 517 } 518 ierr = PetscFree(rvalues);CHKERRQ(ierr); 519 ierr = PetscFree(lens);CHKERRQ(ierr); 520 ierr = PetscFree(owner);CHKERRQ(ierr); 521 ierr = PetscFree(nprocs);CHKERRQ(ierr); 522 523 /* actually zap the local rows */ 524 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 525 PetscLogObjectParent(A,istmp); 526 527 /* 528 Zero the required rows. If the "diagonal block" of the matrix 529 is square and the user wishes to set the diagonal we use seperate 530 code so that MatSetValues() is not called for each diagonal allocating 531 new memory, thus calling lots of mallocs and slowing things down. 532 533 Contributed by: Mathew Knepley 534 */ 535 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 536 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 537 if (diag && (l->A->M == l->A->N)) { 538 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 539 } else if (diag) { 540 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 541 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 542 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 544 } 545 for (i = 0; i < slen; i++) { 546 row = lrows[i] + rstart; 547 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 548 } 549 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 } else { 552 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 553 } 554 ierr = ISDestroy(istmp);CHKERRQ(ierr); 555 ierr = PetscFree(lrows);CHKERRQ(ierr); 556 557 /* wait on sends */ 558 if (nsends) { 559 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 560 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 561 ierr = PetscFree(send_status);CHKERRQ(ierr); 562 } 563 ierr = PetscFree(send_waits);CHKERRQ(ierr); 564 ierr = PetscFree(svalues);CHKERRQ(ierr); 565 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatMult_MPIAIJ" 571 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 572 { 573 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 574 PetscErrorCode ierr; 575 int nt; 576 577 PetscFunctionBegin; 578 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 579 if (nt != A->n) { 580 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 581 } 582 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 583 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 584 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMultAdd_MPIAIJ" 591 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 PetscErrorCode ierr; 595 596 PetscFunctionBegin; 597 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 599 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 606 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 607 { 608 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 609 PetscErrorCode ierr; 610 611 PetscFunctionBegin; 612 /* do nondiagonal part */ 613 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 614 /* send it on its way */ 615 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 616 /* do local part */ 617 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 618 /* receive remote parts: note this assumes the values are not actually */ 619 /* inserted in yy until the next line, which is true for my implementation*/ 620 /* but is not perhaps always true. */ 621 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 EXTERN_C_BEGIN 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 628 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 629 { 630 MPI_Comm comm; 631 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 632 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 633 IS Me,Notme; 634 PetscErrorCode ierr; 635 int M,N,first,last,*notme,ntids,i; 636 637 PetscFunctionBegin; 638 639 /* Easy test: symmetric diagonal block */ 640 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 641 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 642 if (!*f) PetscFunctionReturn(0); 643 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 644 ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr); 645 if (ntids==1) PetscFunctionReturn(0); 646 647 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 648 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 649 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 650 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me);CHKERRQ(ierr); 651 for (i=0; i<first; i++) notme[i] = i; 652 for (i=last; i<M; i++) notme[i-last+first] = i; 653 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 654 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 655 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 656 Aoff = Aoffs[0]; 657 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 658 Boff = Boffs[0]; 659 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 660 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 661 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 662 ierr = ISDestroy(Me);CHKERRQ(ierr); 663 ierr = ISDestroy(Notme);CHKERRQ(ierr); 664 665 PetscFunctionReturn(0); 666 } 667 EXTERN_C_END 668 669 #undef __FUNCT__ 670 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 671 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 672 { 673 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 674 PetscErrorCode ierr; 675 676 PetscFunctionBegin; 677 /* do nondiagonal part */ 678 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 679 /* send it on its way */ 680 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 681 /* do local part */ 682 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 683 /* receive remote parts: note this assumes the values are not actually */ 684 /* inserted in yy until the next line, which is true for my implementation*/ 685 /* but is not perhaps always true. */ 686 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 /* 691 This only works correctly for square matrices where the subblock A->A is the 692 diagonal block 693 */ 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 696 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 697 { 698 PetscErrorCode ierr; 699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 700 701 PetscFunctionBegin; 702 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 703 if (a->rstart != a->cstart || a->rend != a->cend) { 704 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 705 } 706 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatScale_MPIAIJ" 712 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 713 { 714 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 715 PetscErrorCode ierr; 716 717 PetscFunctionBegin; 718 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 719 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatDestroy_MPIAIJ" 725 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 726 { 727 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 728 PetscErrorCode ierr; 729 730 PetscFunctionBegin; 731 #if defined(PETSC_USE_LOG) 732 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 733 #endif 734 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 735 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 736 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 737 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 738 #if defined (PETSC_USE_CTABLE) 739 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 740 #else 741 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 742 #endif 743 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 744 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 745 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 746 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 747 ierr = PetscFree(aij);CHKERRQ(ierr); 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatView_MPIAIJ_Binary" 753 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 754 { 755 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 756 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 757 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 758 PetscErrorCode ierr; 759 int nz,fd,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 760 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 761 PetscScalar *column_values; 762 763 PetscFunctionBegin; 764 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 765 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 766 nz = A->nz + B->nz; 767 if (!rank) { 768 header[0] = MAT_FILE_COOKIE; 769 header[1] = mat->M; 770 header[2] = mat->N; 771 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 772 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 773 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 774 /* get largest number of rows any processor has */ 775 rlen = mat->m; 776 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 777 for (i=1; i<size; i++) { 778 rlen = PetscMax(rlen,range[i+1] - range[i]); 779 } 780 } else { 781 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 782 rlen = mat->m; 783 } 784 785 /* load up the local row counts */ 786 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 787 for (i=0; i<mat->m; i++) { 788 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 789 } 790 791 /* store the row lengths to the file */ 792 if (!rank) { 793 MPI_Status status; 794 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 795 for (i=1; i<size; i++) { 796 rlen = range[i+1] - range[i]; 797 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 798 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 799 } 800 } else { 801 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 802 } 803 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 804 805 /* load up the local column indices */ 806 nzmax = nz; /* )th processor needs space a largest processor needs */ 807 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 808 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 809 cnt = 0; 810 for (i=0; i<mat->m; i++) { 811 for (j=B->i[i]; j<B->i[i+1]; j++) { 812 if ( (col = garray[B->j[j]]) > cstart) break; 813 column_indices[cnt++] = col; 814 } 815 for (k=A->i[i]; k<A->i[i+1]; k++) { 816 column_indices[cnt++] = A->j[k] + cstart; 817 } 818 for (; j<B->i[i+1]; j++) { 819 column_indices[cnt++] = garray[B->j[j]]; 820 } 821 } 822 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 823 824 /* store the column indices to the file */ 825 if (!rank) { 826 MPI_Status status; 827 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 828 for (i=1; i<size; i++) { 829 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 830 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 831 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 832 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 833 } 834 } else { 835 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 836 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 837 } 838 ierr = PetscFree(column_indices);CHKERRQ(ierr); 839 840 /* load up the local column values */ 841 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 842 cnt = 0; 843 for (i=0; i<mat->m; i++) { 844 for (j=B->i[i]; j<B->i[i+1]; j++) { 845 if ( garray[B->j[j]] > cstart) break; 846 column_values[cnt++] = B->a[j]; 847 } 848 for (k=A->i[i]; k<A->i[i+1]; k++) { 849 column_values[cnt++] = A->a[k]; 850 } 851 for (; j<B->i[i+1]; j++) { 852 column_values[cnt++] = B->a[j]; 853 } 854 } 855 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 856 857 /* store the column values to the file */ 858 if (!rank) { 859 MPI_Status status; 860 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 861 for (i=1; i<size; i++) { 862 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 863 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 864 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 865 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 866 } 867 } else { 868 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 869 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 870 } 871 ierr = PetscFree(column_values);CHKERRQ(ierr); 872 PetscFunctionReturn(0); 873 } 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 877 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 878 { 879 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 880 PetscErrorCode ierr; 881 int rank = aij->rank,size = aij->size; 882 PetscTruth isdraw,iascii,flg,isbinary; 883 PetscViewer sviewer; 884 PetscViewerFormat format; 885 886 PetscFunctionBegin; 887 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 888 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 889 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 890 if (iascii) { 891 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 892 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 893 MatInfo info; 894 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 895 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 896 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 897 if (flg) { 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 899 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 900 } else { 901 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 902 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 903 } 904 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 905 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 906 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 907 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 908 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 909 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 910 PetscFunctionReturn(0); 911 } else if (format == PETSC_VIEWER_ASCII_INFO) { 912 PetscFunctionReturn(0); 913 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 914 PetscFunctionReturn(0); 915 } 916 } else if (isbinary) { 917 if (size == 1) { 918 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 919 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 920 } else { 921 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 922 } 923 PetscFunctionReturn(0); 924 } else if (isdraw) { 925 PetscDraw draw; 926 PetscTruth isnull; 927 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 928 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 929 } 930 931 if (size == 1) { 932 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 933 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 934 } else { 935 /* assemble the entire matrix onto first processor. */ 936 Mat A; 937 Mat_SeqAIJ *Aloc; 938 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 939 PetscScalar *a; 940 941 if (!rank) { 942 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 943 } else { 944 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 945 } 946 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 947 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 948 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 949 PetscLogObjectParent(mat,A); 950 951 /* copy over the A part */ 952 Aloc = (Mat_SeqAIJ*)aij->A->data; 953 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 954 row = aij->rstart; 955 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 956 for (i=0; i<m; i++) { 957 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 958 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 959 } 960 aj = Aloc->j; 961 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 962 963 /* copy over the B part */ 964 Aloc = (Mat_SeqAIJ*)aij->B->data; 965 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 966 row = aij->rstart; 967 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 968 ct = cols; 969 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 970 for (i=0; i<m; i++) { 971 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 972 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 973 } 974 ierr = PetscFree(ct);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 /* 978 Everyone has to call to draw the matrix since the graphics waits are 979 synchronized across all processors that share the PetscDraw object 980 */ 981 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 982 if (!rank) { 983 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 984 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 985 } 986 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 ierr = MatDestroy(A);CHKERRQ(ierr); 988 } 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatView_MPIAIJ" 994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 995 { 996 PetscErrorCode ierr; 997 PetscTruth iascii,isdraw,issocket,isbinary; 998 999 PetscFunctionBegin; 1000 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1004 if (iascii || isdraw || isbinary || issocket) { 1005 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1006 } else { 1007 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatRelax_MPIAIJ" 1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscErrorCode ierr; 1020 Vec bb1; 1021 PetscScalar mone=-1.0; 1022 1023 PetscFunctionBegin; 1024 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1025 1026 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1027 1028 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1029 if (flag & SOR_ZERO_INITIAL_GUESS) { 1030 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1031 its--; 1032 } 1033 1034 while (its--) { 1035 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1037 1038 /* update rhs: bb1 = bb - B*x */ 1039 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1040 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1041 1042 /* local sweep */ 1043 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1044 CHKERRQ(ierr); 1045 } 1046 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1047 if (flag & SOR_ZERO_INITIAL_GUESS) { 1048 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1049 its--; 1050 } 1051 while (its--) { 1052 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1054 1055 /* update rhs: bb1 = bb - B*x */ 1056 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1057 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1058 1059 /* local sweep */ 1060 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1061 CHKERRQ(ierr); 1062 } 1063 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1064 if (flag & SOR_ZERO_INITIAL_GUESS) { 1065 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1066 its--; 1067 } 1068 while (its--) { 1069 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1071 1072 /* update rhs: bb1 = bb - B*x */ 1073 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1074 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1075 1076 /* local sweep */ 1077 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1078 CHKERRQ(ierr); 1079 } 1080 } else { 1081 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1082 } 1083 1084 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1090 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1091 { 1092 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1093 Mat A = mat->A,B = mat->B; 1094 PetscErrorCode ierr; 1095 PetscReal isend[5],irecv[5]; 1096 1097 PetscFunctionBegin; 1098 info->block_size = 1.0; 1099 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1100 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1101 isend[3] = info->memory; isend[4] = info->mallocs; 1102 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1103 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1104 isend[3] += info->memory; isend[4] += info->mallocs; 1105 if (flag == MAT_LOCAL) { 1106 info->nz_used = isend[0]; 1107 info->nz_allocated = isend[1]; 1108 info->nz_unneeded = isend[2]; 1109 info->memory = isend[3]; 1110 info->mallocs = isend[4]; 1111 } else if (flag == MAT_GLOBAL_MAX) { 1112 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1113 info->nz_used = irecv[0]; 1114 info->nz_allocated = irecv[1]; 1115 info->nz_unneeded = irecv[2]; 1116 info->memory = irecv[3]; 1117 info->mallocs = irecv[4]; 1118 } else if (flag == MAT_GLOBAL_SUM) { 1119 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1120 info->nz_used = irecv[0]; 1121 info->nz_allocated = irecv[1]; 1122 info->nz_unneeded = irecv[2]; 1123 info->memory = irecv[3]; 1124 info->mallocs = irecv[4]; 1125 } 1126 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1127 info->fill_ratio_needed = 0; 1128 info->factor_mallocs = 0; 1129 info->rows_global = (double)matin->M; 1130 info->columns_global = (double)matin->N; 1131 info->rows_local = (double)matin->m; 1132 info->columns_local = (double)matin->N; 1133 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatSetOption_MPIAIJ" 1139 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1140 { 1141 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1142 PetscErrorCode ierr; 1143 1144 PetscFunctionBegin; 1145 switch (op) { 1146 case MAT_NO_NEW_NONZERO_LOCATIONS: 1147 case MAT_YES_NEW_NONZERO_LOCATIONS: 1148 case MAT_COLUMNS_UNSORTED: 1149 case MAT_COLUMNS_SORTED: 1150 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1151 case MAT_KEEP_ZEROED_ROWS: 1152 case MAT_NEW_NONZERO_LOCATION_ERR: 1153 case MAT_USE_INODES: 1154 case MAT_DO_NOT_USE_INODES: 1155 case MAT_IGNORE_ZERO_ENTRIES: 1156 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1157 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1158 break; 1159 case MAT_ROW_ORIENTED: 1160 a->roworiented = PETSC_TRUE; 1161 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1162 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1163 break; 1164 case MAT_ROWS_SORTED: 1165 case MAT_ROWS_UNSORTED: 1166 case MAT_YES_NEW_DIAGONALS: 1167 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1168 break; 1169 case MAT_COLUMN_ORIENTED: 1170 a->roworiented = PETSC_FALSE; 1171 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1172 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1173 break; 1174 case MAT_IGNORE_OFF_PROC_ENTRIES: 1175 a->donotstash = PETSC_TRUE; 1176 break; 1177 case MAT_NO_NEW_DIAGONALS: 1178 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1179 case MAT_SYMMETRIC: 1180 case MAT_STRUCTURALLY_SYMMETRIC: 1181 case MAT_HERMITIAN: 1182 case MAT_SYMMETRY_ETERNAL: 1183 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1184 break; 1185 case MAT_NOT_SYMMETRIC: 1186 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1187 case MAT_NOT_HERMITIAN: 1188 case MAT_NOT_SYMMETRY_ETERNAL: 1189 break; 1190 default: 1191 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1192 } 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatGetRow_MPIAIJ" 1198 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1199 { 1200 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1201 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1202 PetscErrorCode ierr; 1203 int i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1204 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1205 int *cmap,*idx_p; 1206 1207 PetscFunctionBegin; 1208 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1209 mat->getrowactive = PETSC_TRUE; 1210 1211 if (!mat->rowvalues && (idx || v)) { 1212 /* 1213 allocate enough space to hold information from the longest row. 1214 */ 1215 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1216 int max = 1,tmp; 1217 for (i=0; i<matin->m; i++) { 1218 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1219 if (max < tmp) { max = tmp; } 1220 } 1221 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1222 mat->rowindices = (int*)(mat->rowvalues + max); 1223 } 1224 1225 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1226 lrow = row - rstart; 1227 1228 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1229 if (!v) {pvA = 0; pvB = 0;} 1230 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1231 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1232 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1233 nztot = nzA + nzB; 1234 1235 cmap = mat->garray; 1236 if (v || idx) { 1237 if (nztot) { 1238 /* Sort by increasing column numbers, assuming A and B already sorted */ 1239 int imark = -1; 1240 if (v) { 1241 *v = v_p = mat->rowvalues; 1242 for (i=0; i<nzB; i++) { 1243 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1244 else break; 1245 } 1246 imark = i; 1247 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1248 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1249 } 1250 if (idx) { 1251 *idx = idx_p = mat->rowindices; 1252 if (imark > -1) { 1253 for (i=0; i<imark; i++) { 1254 idx_p[i] = cmap[cworkB[i]]; 1255 } 1256 } else { 1257 for (i=0; i<nzB; i++) { 1258 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1259 else break; 1260 } 1261 imark = i; 1262 } 1263 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1264 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1265 } 1266 } else { 1267 if (idx) *idx = 0; 1268 if (v) *v = 0; 1269 } 1270 } 1271 *nz = nztot; 1272 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1273 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1279 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1280 { 1281 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1282 1283 PetscFunctionBegin; 1284 if (aij->getrowactive == PETSC_FALSE) { 1285 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1286 } 1287 aij->getrowactive = PETSC_FALSE; 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatNorm_MPIAIJ" 1293 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1294 { 1295 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1296 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1297 PetscErrorCode ierr; 1298 int i,j,cstart = aij->cstart; 1299 PetscReal sum = 0.0; 1300 PetscScalar *v; 1301 1302 PetscFunctionBegin; 1303 if (aij->size == 1) { 1304 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1305 } else { 1306 if (type == NORM_FROBENIUS) { 1307 v = amat->a; 1308 for (i=0; i<amat->nz; i++) { 1309 #if defined(PETSC_USE_COMPLEX) 1310 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1311 #else 1312 sum += (*v)*(*v); v++; 1313 #endif 1314 } 1315 v = bmat->a; 1316 for (i=0; i<bmat->nz; i++) { 1317 #if defined(PETSC_USE_COMPLEX) 1318 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1319 #else 1320 sum += (*v)*(*v); v++; 1321 #endif 1322 } 1323 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1324 *norm = sqrt(*norm); 1325 } else if (type == NORM_1) { /* max column norm */ 1326 PetscReal *tmp,*tmp2; 1327 int *jj,*garray = aij->garray; 1328 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1329 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1330 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1331 *norm = 0.0; 1332 v = amat->a; jj = amat->j; 1333 for (j=0; j<amat->nz; j++) { 1334 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1335 } 1336 v = bmat->a; jj = bmat->j; 1337 for (j=0; j<bmat->nz; j++) { 1338 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1339 } 1340 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1341 for (j=0; j<mat->N; j++) { 1342 if (tmp2[j] > *norm) *norm = tmp2[j]; 1343 } 1344 ierr = PetscFree(tmp);CHKERRQ(ierr); 1345 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1346 } else if (type == NORM_INFINITY) { /* max row norm */ 1347 PetscReal ntemp = 0.0; 1348 for (j=0; j<aij->A->m; j++) { 1349 v = amat->a + amat->i[j]; 1350 sum = 0.0; 1351 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1352 sum += PetscAbsScalar(*v); v++; 1353 } 1354 v = bmat->a + bmat->i[j]; 1355 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1356 sum += PetscAbsScalar(*v); v++; 1357 } 1358 if (sum > ntemp) ntemp = sum; 1359 } 1360 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1361 } else { 1362 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1363 } 1364 } 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatTranspose_MPIAIJ" 1370 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1371 { 1372 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1373 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1374 PetscErrorCode ierr; 1375 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1376 Mat B; 1377 PetscScalar *array; 1378 1379 PetscFunctionBegin; 1380 if (!matout && M != N) { 1381 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1382 } 1383 1384 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1385 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1386 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1387 1388 /* copy over the A part */ 1389 Aloc = (Mat_SeqAIJ*)a->A->data; 1390 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1391 row = a->rstart; 1392 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1393 for (i=0; i<m; i++) { 1394 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1395 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1396 } 1397 aj = Aloc->j; 1398 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1399 1400 /* copy over the B part */ 1401 Aloc = (Mat_SeqAIJ*)a->B->data; 1402 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1403 row = a->rstart; 1404 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1405 ct = cols; 1406 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1407 for (i=0; i<m; i++) { 1408 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1409 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1410 } 1411 ierr = PetscFree(ct);CHKERRQ(ierr); 1412 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1413 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1414 if (matout) { 1415 *matout = B; 1416 } else { 1417 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1418 } 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1424 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1425 { 1426 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1427 Mat a = aij->A,b = aij->B; 1428 PetscErrorCode ierr; 1429 int s1,s2,s3; 1430 1431 PetscFunctionBegin; 1432 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1433 if (rr) { 1434 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1435 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1436 /* Overlap communication with computation. */ 1437 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1438 } 1439 if (ll) { 1440 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1441 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1442 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1443 } 1444 /* scale the diagonal block */ 1445 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1446 1447 if (rr) { 1448 /* Do a scatter end and then right scale the off-diagonal block */ 1449 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1450 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1451 } 1452 1453 PetscFunctionReturn(0); 1454 } 1455 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1459 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1460 { 1461 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1462 PetscErrorCode ierr; 1463 1464 PetscFunctionBegin; 1465 if (!a->rank) { 1466 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1467 } 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1473 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1474 { 1475 PetscFunctionBegin; 1476 *bs = 1; 1477 PetscFunctionReturn(0); 1478 } 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1481 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1482 { 1483 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1484 PetscErrorCode ierr; 1485 1486 PetscFunctionBegin; 1487 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1488 PetscFunctionReturn(0); 1489 } 1490 1491 #undef __FUNCT__ 1492 #define __FUNCT__ "MatEqual_MPIAIJ" 1493 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1494 { 1495 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1496 Mat a,b,c,d; 1497 PetscTruth flg; 1498 PetscErrorCode ierr; 1499 1500 PetscFunctionBegin; 1501 a = matA->A; b = matA->B; 1502 c = matB->A; d = matB->B; 1503 1504 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1505 if (flg == PETSC_TRUE) { 1506 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1507 } 1508 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "MatCopy_MPIAIJ" 1514 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1515 { 1516 PetscErrorCode ierr; 1517 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1518 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1519 1520 PetscFunctionBegin; 1521 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1522 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1523 /* because of the column compression in the off-processor part of the matrix a->B, 1524 the number of columns in a->B and b->B may be different, hence we cannot call 1525 the MatCopy() directly on the two parts. If need be, we can provide a more 1526 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1527 then copying the submatrices */ 1528 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1529 } else { 1530 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1531 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1532 } 1533 PetscFunctionReturn(0); 1534 } 1535 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1538 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1539 { 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1544 PetscFunctionReturn(0); 1545 } 1546 1547 #include "petscblaslapack.h" 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatAXPY_MPIAIJ" 1550 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1551 { 1552 PetscErrorCode ierr; 1553 int i; 1554 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1555 PetscBLASInt bnz,one=1; 1556 Mat_SeqAIJ *x,*y; 1557 1558 PetscFunctionBegin; 1559 if (str == SAME_NONZERO_PATTERN) { 1560 x = (Mat_SeqAIJ *)xx->A->data; 1561 y = (Mat_SeqAIJ *)yy->A->data; 1562 bnz = (PetscBLASInt)x->nz; 1563 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1564 x = (Mat_SeqAIJ *)xx->B->data; 1565 y = (Mat_SeqAIJ *)yy->B->data; 1566 bnz = (PetscBLASInt)x->nz; 1567 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1568 } else if (str == SUBSET_NONZERO_PATTERN) { 1569 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1570 1571 x = (Mat_SeqAIJ *)xx->B->data; 1572 y = (Mat_SeqAIJ *)yy->B->data; 1573 if (y->xtoy && y->XtoY != xx->B) { 1574 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1575 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1576 } 1577 if (!y->xtoy) { /* get xtoy */ 1578 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1579 y->XtoY = xx->B; 1580 } 1581 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1582 } else { 1583 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1584 } 1585 PetscFunctionReturn(0); 1586 } 1587 1588 /* -------------------------------------------------------------------*/ 1589 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1590 MatGetRow_MPIAIJ, 1591 MatRestoreRow_MPIAIJ, 1592 MatMult_MPIAIJ, 1593 /* 4*/ MatMultAdd_MPIAIJ, 1594 MatMultTranspose_MPIAIJ, 1595 MatMultTransposeAdd_MPIAIJ, 1596 0, 1597 0, 1598 0, 1599 /*10*/ 0, 1600 0, 1601 0, 1602 MatRelax_MPIAIJ, 1603 MatTranspose_MPIAIJ, 1604 /*15*/ MatGetInfo_MPIAIJ, 1605 MatEqual_MPIAIJ, 1606 MatGetDiagonal_MPIAIJ, 1607 MatDiagonalScale_MPIAIJ, 1608 MatNorm_MPIAIJ, 1609 /*20*/ MatAssemblyBegin_MPIAIJ, 1610 MatAssemblyEnd_MPIAIJ, 1611 0, 1612 MatSetOption_MPIAIJ, 1613 MatZeroEntries_MPIAIJ, 1614 /*25*/ MatZeroRows_MPIAIJ, 1615 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1616 MatLUFactorSymbolic_MPIAIJ_TFS, 1617 #else 1618 0, 1619 #endif 1620 0, 1621 0, 1622 0, 1623 /*30*/ MatSetUpPreallocation_MPIAIJ, 1624 0, 1625 0, 1626 0, 1627 0, 1628 /*35*/ MatDuplicate_MPIAIJ, 1629 0, 1630 0, 1631 0, 1632 0, 1633 /*40*/ MatAXPY_MPIAIJ, 1634 MatGetSubMatrices_MPIAIJ, 1635 MatIncreaseOverlap_MPIAIJ, 1636 MatGetValues_MPIAIJ, 1637 MatCopy_MPIAIJ, 1638 /*45*/ MatPrintHelp_MPIAIJ, 1639 MatScale_MPIAIJ, 1640 0, 1641 0, 1642 0, 1643 /*50*/ MatGetBlockSize_MPIAIJ, 1644 0, 1645 0, 1646 0, 1647 0, 1648 /*55*/ MatFDColoringCreate_MPIAIJ, 1649 0, 1650 MatSetUnfactored_MPIAIJ, 1651 0, 1652 0, 1653 /*60*/ MatGetSubMatrix_MPIAIJ, 1654 MatDestroy_MPIAIJ, 1655 MatView_MPIAIJ, 1656 MatGetPetscMaps_Petsc, 1657 0, 1658 /*65*/ 0, 1659 0, 1660 0, 1661 0, 1662 0, 1663 /*70*/ 0, 1664 0, 1665 MatSetColoring_MPIAIJ, 1666 MatSetValuesAdic_MPIAIJ, 1667 MatSetValuesAdifor_MPIAIJ, 1668 /*75*/ 0, 1669 0, 1670 0, 1671 0, 1672 0, 1673 /*80*/ 0, 1674 0, 1675 0, 1676 0, 1677 /*84*/ MatLoad_MPIAIJ, 1678 0, 1679 0, 1680 0, 1681 0, 1682 /*89*/ 0, 1683 MatMatMult_MPIAIJ_MPIAIJ, 1684 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1685 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1686 MatPtAP_MPIAIJ_MPIAIJ, 1687 /*94*/ MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1688 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1689 }; 1690 1691 /* ----------------------------------------------------------------------------------------*/ 1692 1693 EXTERN_C_BEGIN 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1696 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1697 { 1698 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1703 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1704 PetscFunctionReturn(0); 1705 } 1706 EXTERN_C_END 1707 1708 EXTERN_C_BEGIN 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1711 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1712 { 1713 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1714 PetscErrorCode ierr; 1715 1716 PetscFunctionBegin; 1717 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1718 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1719 PetscFunctionReturn(0); 1720 } 1721 EXTERN_C_END 1722 1723 #include "petscpc.h" 1724 EXTERN_C_BEGIN 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1727 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1728 { 1729 Mat_MPIAIJ *b; 1730 PetscErrorCode ierr; 1731 int i; 1732 1733 PetscFunctionBegin; 1734 B->preallocated = PETSC_TRUE; 1735 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1736 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1737 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1738 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1739 if (d_nnz) { 1740 for (i=0; i<B->m; i++) { 1741 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1742 } 1743 } 1744 if (o_nnz) { 1745 for (i=0; i<B->m; i++) { 1746 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1747 } 1748 } 1749 b = (Mat_MPIAIJ*)B->data; 1750 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1751 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1752 1753 PetscFunctionReturn(0); 1754 } 1755 EXTERN_C_END 1756 1757 /*MC 1758 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1759 1760 Options Database Keys: 1761 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1762 1763 Level: beginner 1764 1765 .seealso: MatCreateMPIAIJ 1766 M*/ 1767 1768 EXTERN_C_BEGIN 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "MatCreate_MPIAIJ" 1771 PetscErrorCode MatCreate_MPIAIJ(Mat B) 1772 { 1773 Mat_MPIAIJ *b; 1774 PetscErrorCode ierr; 1775 int i,size; 1776 1777 PetscFunctionBegin; 1778 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1779 1780 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1781 B->data = (void*)b; 1782 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1783 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1784 B->factor = 0; 1785 B->assembled = PETSC_FALSE; 1786 B->mapping = 0; 1787 1788 B->insertmode = NOT_SET_VALUES; 1789 b->size = size; 1790 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1791 1792 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1793 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1794 1795 /* the information in the maps duplicates the information computed below, eventually 1796 we should remove the duplicate information that is not contained in the maps */ 1797 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1798 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1799 1800 /* build local table of row and column ownerships */ 1801 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1802 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1803 b->cowners = b->rowners + b->size + 2; 1804 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1805 b->rowners[0] = 0; 1806 for (i=2; i<=b->size; i++) { 1807 b->rowners[i] += b->rowners[i-1]; 1808 } 1809 b->rstart = b->rowners[b->rank]; 1810 b->rend = b->rowners[b->rank+1]; 1811 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1812 b->cowners[0] = 0; 1813 for (i=2; i<=b->size; i++) { 1814 b->cowners[i] += b->cowners[i-1]; 1815 } 1816 b->cstart = b->cowners[b->rank]; 1817 b->cend = b->cowners[b->rank+1]; 1818 1819 /* build cache for off array entries formed */ 1820 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1821 b->donotstash = PETSC_FALSE; 1822 b->colmap = 0; 1823 b->garray = 0; 1824 b->roworiented = PETSC_TRUE; 1825 1826 /* stuff used for matrix vector multiply */ 1827 b->lvec = PETSC_NULL; 1828 b->Mvctx = PETSC_NULL; 1829 1830 /* stuff for MatGetRow() */ 1831 b->rowindices = 0; 1832 b->rowvalues = 0; 1833 b->getrowactive = PETSC_FALSE; 1834 1835 /* Explicitly create 2 MATSEQAIJ matrices. */ 1836 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1837 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1838 PetscLogObjectParent(B,b->A); 1839 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1840 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1841 PetscLogObjectParent(B,b->B); 1842 1843 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1844 "MatStoreValues_MPIAIJ", 1845 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1847 "MatRetrieveValues_MPIAIJ", 1848 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1849 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1850 "MatGetDiagonalBlock_MPIAIJ", 1851 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1852 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1853 "MatIsTranspose_MPIAIJ", 1854 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 1855 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1856 "MatMPIAIJSetPreallocation_MPIAIJ", 1857 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 1858 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1859 "MatDiagonalScaleLocal_MPIAIJ", 1860 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 1861 PetscFunctionReturn(0); 1862 } 1863 EXTERN_C_END 1864 1865 /*MC 1866 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1867 1868 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1869 and MATMPIAIJ otherwise. As a result, for single process communicators, 1870 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1871 for communicators controlling multiple processes. It is recommended that you call both of 1872 the above preallocation routines for simplicity. 1873 1874 Options Database Keys: 1875 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1876 1877 Level: beginner 1878 1879 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1880 M*/ 1881 1882 EXTERN_C_BEGIN 1883 #undef __FUNCT__ 1884 #define __FUNCT__ "MatCreate_AIJ" 1885 PetscErrorCode MatCreate_AIJ(Mat A) 1886 { 1887 PetscErrorCode ierr; 1888 int size; 1889 1890 PetscFunctionBegin; 1891 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1892 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1893 if (size == 1) { 1894 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1895 } else { 1896 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1897 } 1898 PetscFunctionReturn(0); 1899 } 1900 EXTERN_C_END 1901 1902 #undef __FUNCT__ 1903 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1904 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1905 { 1906 Mat mat; 1907 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1908 PetscErrorCode ierr; 1909 1910 PetscFunctionBegin; 1911 *newmat = 0; 1912 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1913 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1914 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1915 a = (Mat_MPIAIJ*)mat->data; 1916 1917 mat->factor = matin->factor; 1918 mat->assembled = PETSC_TRUE; 1919 mat->insertmode = NOT_SET_VALUES; 1920 mat->preallocated = PETSC_TRUE; 1921 1922 a->rstart = oldmat->rstart; 1923 a->rend = oldmat->rend; 1924 a->cstart = oldmat->cstart; 1925 a->cend = oldmat->cend; 1926 a->size = oldmat->size; 1927 a->rank = oldmat->rank; 1928 a->donotstash = oldmat->donotstash; 1929 a->roworiented = oldmat->roworiented; 1930 a->rowindices = 0; 1931 a->rowvalues = 0; 1932 a->getrowactive = PETSC_FALSE; 1933 1934 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1935 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1936 if (oldmat->colmap) { 1937 #if defined (PETSC_USE_CTABLE) 1938 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1939 #else 1940 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1941 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1942 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1943 #endif 1944 } else a->colmap = 0; 1945 if (oldmat->garray) { 1946 int len; 1947 len = oldmat->B->n; 1948 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1949 PetscLogObjectMemory(mat,len*sizeof(int)); 1950 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1951 } else a->garray = 0; 1952 1953 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1954 PetscLogObjectParent(mat,a->lvec); 1955 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1956 PetscLogObjectParent(mat,a->Mvctx); 1957 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1958 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1959 PetscLogObjectParent(mat,a->A); 1960 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1961 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1962 PetscLogObjectParent(mat,a->B); 1963 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1964 *newmat = mat; 1965 PetscFunctionReturn(0); 1966 } 1967 1968 #include "petscsys.h" 1969 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatLoad_MPIAIJ" 1972 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1973 { 1974 Mat A; 1975 PetscScalar *vals,*svals; 1976 MPI_Comm comm = ((PetscObject)viewer)->comm; 1977 MPI_Status status; 1978 PetscErrorCode ierr; 1979 int i,nz,j,rstart,rend,fd; 1980 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1981 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1982 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1983 1984 PetscFunctionBegin; 1985 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1986 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1987 if (!rank) { 1988 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1989 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1990 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1991 if (header[3] < 0) { 1992 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1993 } 1994 } 1995 1996 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1997 M = header[1]; N = header[2]; 1998 /* determine ownership of all rows */ 1999 m = M/size + ((M % size) > rank); 2000 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2001 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2002 rowners[0] = 0; 2003 for (i=2; i<=size; i++) { 2004 rowners[i] += rowners[i-1]; 2005 } 2006 rstart = rowners[rank]; 2007 rend = rowners[rank+1]; 2008 2009 /* distribute row lengths to all processors */ 2010 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 2011 offlens = ourlens + (rend-rstart); 2012 if (!rank) { 2013 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 2014 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2015 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2016 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2017 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2018 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2019 } else { 2020 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2021 } 2022 2023 if (!rank) { 2024 /* calculate the number of nonzeros on each processor */ 2025 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2026 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2027 for (i=0; i<size; i++) { 2028 for (j=rowners[i]; j< rowners[i+1]; j++) { 2029 procsnz[i] += rowlengths[j]; 2030 } 2031 } 2032 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2033 2034 /* determine max buffer needed and allocate it */ 2035 maxnz = 0; 2036 for (i=0; i<size; i++) { 2037 maxnz = PetscMax(maxnz,procsnz[i]); 2038 } 2039 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2040 2041 /* read in my part of the matrix column indices */ 2042 nz = procsnz[0]; 2043 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2044 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2045 2046 /* read in every one elses and ship off */ 2047 for (i=1; i<size; i++) { 2048 nz = procsnz[i]; 2049 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2050 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2051 } 2052 ierr = PetscFree(cols);CHKERRQ(ierr); 2053 } else { 2054 /* determine buffer space needed for message */ 2055 nz = 0; 2056 for (i=0; i<m; i++) { 2057 nz += ourlens[i]; 2058 } 2059 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2060 2061 /* receive message of column indices*/ 2062 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2063 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2064 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2065 } 2066 2067 /* determine column ownership if matrix is not square */ 2068 if (N != M) { 2069 n = N/size + ((N % size) > rank); 2070 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2071 cstart = cend - n; 2072 } else { 2073 cstart = rstart; 2074 cend = rend; 2075 n = cend - cstart; 2076 } 2077 2078 /* loop over local rows, determining number of off diagonal entries */ 2079 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2080 jj = 0; 2081 for (i=0; i<m; i++) { 2082 for (j=0; j<ourlens[i]; j++) { 2083 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2084 jj++; 2085 } 2086 } 2087 2088 /* create our matrix */ 2089 for (i=0; i<m; i++) { 2090 ourlens[i] -= offlens[i]; 2091 } 2092 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2093 ierr = MatSetType(A,type);CHKERRQ(ierr); 2094 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2095 2096 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2097 for (i=0; i<m; i++) { 2098 ourlens[i] += offlens[i]; 2099 } 2100 2101 if (!rank) { 2102 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2103 2104 /* read in my part of the matrix numerical values */ 2105 nz = procsnz[0]; 2106 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2107 2108 /* insert into matrix */ 2109 jj = rstart; 2110 smycols = mycols; 2111 svals = vals; 2112 for (i=0; i<m; i++) { 2113 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2114 smycols += ourlens[i]; 2115 svals += ourlens[i]; 2116 jj++; 2117 } 2118 2119 /* read in other processors and ship out */ 2120 for (i=1; i<size; i++) { 2121 nz = procsnz[i]; 2122 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2123 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2124 } 2125 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2126 } else { 2127 /* receive numeric values */ 2128 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2129 2130 /* receive message of values*/ 2131 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2132 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2133 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2134 2135 /* insert into matrix */ 2136 jj = rstart; 2137 smycols = mycols; 2138 svals = vals; 2139 for (i=0; i<m; i++) { 2140 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2141 smycols += ourlens[i]; 2142 svals += ourlens[i]; 2143 jj++; 2144 } 2145 } 2146 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2147 ierr = PetscFree(vals);CHKERRQ(ierr); 2148 ierr = PetscFree(mycols);CHKERRQ(ierr); 2149 ierr = PetscFree(rowners);CHKERRQ(ierr); 2150 2151 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2152 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2153 *newmat = A; 2154 PetscFunctionReturn(0); 2155 } 2156 2157 #undef __FUNCT__ 2158 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2159 /* 2160 Not great since it makes two copies of the submatrix, first an SeqAIJ 2161 in local and then by concatenating the local matrices the end result. 2162 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2163 */ 2164 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2165 { 2166 PetscErrorCode ierr; 2167 int i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2168 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2169 Mat *local,M,Mreuse; 2170 PetscScalar *vwork,*aa; 2171 MPI_Comm comm = mat->comm; 2172 Mat_SeqAIJ *aij; 2173 2174 2175 PetscFunctionBegin; 2176 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2177 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2178 2179 if (call == MAT_REUSE_MATRIX) { 2180 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2181 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2182 local = &Mreuse; 2183 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2184 } else { 2185 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2186 Mreuse = *local; 2187 ierr = PetscFree(local);CHKERRQ(ierr); 2188 } 2189 2190 /* 2191 m - number of local rows 2192 n - number of columns (same on all processors) 2193 rstart - first row in new global matrix generated 2194 */ 2195 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2196 if (call == MAT_INITIAL_MATRIX) { 2197 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2198 ii = aij->i; 2199 jj = aij->j; 2200 2201 /* 2202 Determine the number of non-zeros in the diagonal and off-diagonal 2203 portions of the matrix in order to do correct preallocation 2204 */ 2205 2206 /* first get start and end of "diagonal" columns */ 2207 if (csize == PETSC_DECIDE) { 2208 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2209 if (mglobal == n) { /* square matrix */ 2210 nlocal = m; 2211 } else { 2212 nlocal = n/size + ((n % size) > rank); 2213 } 2214 } else { 2215 nlocal = csize; 2216 } 2217 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2218 rstart = rend - nlocal; 2219 if (rank == size - 1 && rend != n) { 2220 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2221 } 2222 2223 /* next, compute all the lengths */ 2224 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2225 olens = dlens + m; 2226 for (i=0; i<m; i++) { 2227 jend = ii[i+1] - ii[i]; 2228 olen = 0; 2229 dlen = 0; 2230 for (j=0; j<jend; j++) { 2231 if (*jj < rstart || *jj >= rend) olen++; 2232 else dlen++; 2233 jj++; 2234 } 2235 olens[i] = olen; 2236 dlens[i] = dlen; 2237 } 2238 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2239 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2240 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2241 ierr = PetscFree(dlens);CHKERRQ(ierr); 2242 } else { 2243 int ml,nl; 2244 2245 M = *newmat; 2246 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2247 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2248 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2249 /* 2250 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2251 rather than the slower MatSetValues(). 2252 */ 2253 M->was_assembled = PETSC_TRUE; 2254 M->assembled = PETSC_FALSE; 2255 } 2256 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2257 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2258 ii = aij->i; 2259 jj = aij->j; 2260 aa = aij->a; 2261 for (i=0; i<m; i++) { 2262 row = rstart + i; 2263 nz = ii[i+1] - ii[i]; 2264 cwork = jj; jj += nz; 2265 vwork = aa; aa += nz; 2266 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2267 } 2268 2269 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2270 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2271 *newmat = M; 2272 2273 /* save submatrix used in processor for next request */ 2274 if (call == MAT_INITIAL_MATRIX) { 2275 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2276 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2277 } 2278 2279 PetscFunctionReturn(0); 2280 } 2281 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2284 /*@C 2285 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2286 (the default parallel PETSc format). For good matrix assembly performance 2287 the user should preallocate the matrix storage by setting the parameters 2288 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2289 performance can be increased by more than a factor of 50. 2290 2291 Collective on MPI_Comm 2292 2293 Input Parameters: 2294 + A - the matrix 2295 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2296 (same value is used for all local rows) 2297 . d_nnz - array containing the number of nonzeros in the various rows of the 2298 DIAGONAL portion of the local submatrix (possibly different for each row) 2299 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2300 The size of this array is equal to the number of local rows, i.e 'm'. 2301 You must leave room for the diagonal entry even if it is zero. 2302 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2303 submatrix (same value is used for all local rows). 2304 - o_nnz - array containing the number of nonzeros in the various rows of the 2305 OFF-DIAGONAL portion of the local submatrix (possibly different for 2306 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2307 structure. The size of this array is equal to the number 2308 of local rows, i.e 'm'. 2309 2310 The AIJ format (also called the Yale sparse matrix format or 2311 compressed row storage), is fully compatible with standard Fortran 77 2312 storage. That is, the stored row and column indices can begin at 2313 either one (as in Fortran) or zero. See the users manual for details. 2314 2315 The user MUST specify either the local or global matrix dimensions 2316 (possibly both). 2317 2318 The parallel matrix is partitioned such that the first m0 rows belong to 2319 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2320 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2321 2322 The DIAGONAL portion of the local submatrix of a processor can be defined 2323 as the submatrix which is obtained by extraction the part corresponding 2324 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2325 first row that belongs to the processor, and r2 is the last row belonging 2326 to the this processor. This is a square mxm matrix. The remaining portion 2327 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2328 2329 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2330 2331 By default, this format uses inodes (identical nodes) when possible. 2332 We search for consecutive rows with the same nonzero structure, thereby 2333 reusing matrix information to achieve increased efficiency. 2334 2335 Options Database Keys: 2336 + -mat_aij_no_inode - Do not use inodes 2337 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2338 - -mat_aij_oneindex - Internally use indexing starting at 1 2339 rather than 0. Note that when calling MatSetValues(), 2340 the user still MUST index entries starting at 0! 2341 2342 Example usage: 2343 2344 Consider the following 8x8 matrix with 34 non-zero values, that is 2345 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2346 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2347 as follows: 2348 2349 .vb 2350 1 2 0 | 0 3 0 | 0 4 2351 Proc0 0 5 6 | 7 0 0 | 8 0 2352 9 0 10 | 11 0 0 | 12 0 2353 ------------------------------------- 2354 13 0 14 | 15 16 17 | 0 0 2355 Proc1 0 18 0 | 19 20 21 | 0 0 2356 0 0 0 | 22 23 0 | 24 0 2357 ------------------------------------- 2358 Proc2 25 26 27 | 0 0 28 | 29 0 2359 30 0 0 | 31 32 33 | 0 34 2360 .ve 2361 2362 This can be represented as a collection of submatrices as: 2363 2364 .vb 2365 A B C 2366 D E F 2367 G H I 2368 .ve 2369 2370 Where the submatrices A,B,C are owned by proc0, D,E,F are 2371 owned by proc1, G,H,I are owned by proc2. 2372 2373 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2374 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2375 The 'M','N' parameters are 8,8, and have the same values on all procs. 2376 2377 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2378 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2379 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2380 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2381 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2382 matrix, ans [DF] as another SeqAIJ matrix. 2383 2384 When d_nz, o_nz parameters are specified, d_nz storage elements are 2385 allocated for every row of the local diagonal submatrix, and o_nz 2386 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2387 One way to choose d_nz and o_nz is to use the max nonzerors per local 2388 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2389 In this case, the values of d_nz,o_nz are: 2390 .vb 2391 proc0 : dnz = 2, o_nz = 2 2392 proc1 : dnz = 3, o_nz = 2 2393 proc2 : dnz = 1, o_nz = 4 2394 .ve 2395 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2396 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2397 for proc3. i.e we are using 12+15+10=37 storage locations to store 2398 34 values. 2399 2400 When d_nnz, o_nnz parameters are specified, the storage is specified 2401 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2402 In the above case the values for d_nnz,o_nnz are: 2403 .vb 2404 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2405 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2406 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2407 .ve 2408 Here the space allocated is sum of all the above values i.e 34, and 2409 hence pre-allocation is perfect. 2410 2411 Level: intermediate 2412 2413 .keywords: matrix, aij, compressed row, sparse, parallel 2414 2415 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2416 @*/ 2417 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2418 { 2419 PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]); 2420 2421 PetscFunctionBegin; 2422 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2423 if (f) { 2424 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2425 } 2426 PetscFunctionReturn(0); 2427 } 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "MatCreateMPIAIJ" 2431 /*@C 2432 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2433 (the default parallel PETSc format). For good matrix assembly performance 2434 the user should preallocate the matrix storage by setting the parameters 2435 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2436 performance can be increased by more than a factor of 50. 2437 2438 Collective on MPI_Comm 2439 2440 Input Parameters: 2441 + comm - MPI communicator 2442 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2443 This value should be the same as the local size used in creating the 2444 y vector for the matrix-vector product y = Ax. 2445 . n - This value should be the same as the local size used in creating the 2446 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2447 calculated if N is given) For square matrices n is almost always m. 2448 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2449 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2450 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2451 (same value is used for all local rows) 2452 . d_nnz - array containing the number of nonzeros in the various rows of the 2453 DIAGONAL portion of the local submatrix (possibly different for each row) 2454 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2455 The size of this array is equal to the number of local rows, i.e 'm'. 2456 You must leave room for the diagonal entry even if it is zero. 2457 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2458 submatrix (same value is used for all local rows). 2459 - o_nnz - array containing the number of nonzeros in the various rows of the 2460 OFF-DIAGONAL portion of the local submatrix (possibly different for 2461 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2462 structure. The size of this array is equal to the number 2463 of local rows, i.e 'm'. 2464 2465 Output Parameter: 2466 . A - the matrix 2467 2468 Notes: 2469 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2470 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2471 storage requirements for this matrix. 2472 2473 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2474 processor than it must be used on all processors that share the object for 2475 that argument. 2476 2477 The AIJ format (also called the Yale sparse matrix format or 2478 compressed row storage), is fully compatible with standard Fortran 77 2479 storage. That is, the stored row and column indices can begin at 2480 either one (as in Fortran) or zero. See the users manual for details. 2481 2482 The user MUST specify either the local or global matrix dimensions 2483 (possibly both). 2484 2485 The parallel matrix is partitioned such that the first m0 rows belong to 2486 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2487 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2488 2489 The DIAGONAL portion of the local submatrix of a processor can be defined 2490 as the submatrix which is obtained by extraction the part corresponding 2491 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2492 first row that belongs to the processor, and r2 is the last row belonging 2493 to the this processor. This is a square mxm matrix. The remaining portion 2494 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2495 2496 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2497 2498 When calling this routine with a single process communicator, a matrix of 2499 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2500 type of communicator, use the construction mechanism: 2501 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2502 2503 By default, this format uses inodes (identical nodes) when possible. 2504 We search for consecutive rows with the same nonzero structure, thereby 2505 reusing matrix information to achieve increased efficiency. 2506 2507 Options Database Keys: 2508 + -mat_aij_no_inode - Do not use inodes 2509 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2510 - -mat_aij_oneindex - Internally use indexing starting at 1 2511 rather than 0. Note that when calling MatSetValues(), 2512 the user still MUST index entries starting at 0! 2513 2514 2515 Example usage: 2516 2517 Consider the following 8x8 matrix with 34 non-zero values, that is 2518 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2519 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2520 as follows: 2521 2522 .vb 2523 1 2 0 | 0 3 0 | 0 4 2524 Proc0 0 5 6 | 7 0 0 | 8 0 2525 9 0 10 | 11 0 0 | 12 0 2526 ------------------------------------- 2527 13 0 14 | 15 16 17 | 0 0 2528 Proc1 0 18 0 | 19 20 21 | 0 0 2529 0 0 0 | 22 23 0 | 24 0 2530 ------------------------------------- 2531 Proc2 25 26 27 | 0 0 28 | 29 0 2532 30 0 0 | 31 32 33 | 0 34 2533 .ve 2534 2535 This can be represented as a collection of submatrices as: 2536 2537 .vb 2538 A B C 2539 D E F 2540 G H I 2541 .ve 2542 2543 Where the submatrices A,B,C are owned by proc0, D,E,F are 2544 owned by proc1, G,H,I are owned by proc2. 2545 2546 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2547 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2548 The 'M','N' parameters are 8,8, and have the same values on all procs. 2549 2550 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2551 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2552 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2553 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2554 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2555 matrix, ans [DF] as another SeqAIJ matrix. 2556 2557 When d_nz, o_nz parameters are specified, d_nz storage elements are 2558 allocated for every row of the local diagonal submatrix, and o_nz 2559 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2560 One way to choose d_nz and o_nz is to use the max nonzerors per local 2561 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2562 In this case, the values of d_nz,o_nz are: 2563 .vb 2564 proc0 : dnz = 2, o_nz = 2 2565 proc1 : dnz = 3, o_nz = 2 2566 proc2 : dnz = 1, o_nz = 4 2567 .ve 2568 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2569 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2570 for proc3. i.e we are using 12+15+10=37 storage locations to store 2571 34 values. 2572 2573 When d_nnz, o_nnz parameters are specified, the storage is specified 2574 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2575 In the above case the values for d_nnz,o_nnz are: 2576 .vb 2577 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2578 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2579 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2580 .ve 2581 Here the space allocated is sum of all the above values i.e 34, and 2582 hence pre-allocation is perfect. 2583 2584 Level: intermediate 2585 2586 .keywords: matrix, aij, compressed row, sparse, parallel 2587 2588 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2589 @*/ 2590 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2591 { 2592 PetscErrorCode ierr; 2593 int size; 2594 2595 PetscFunctionBegin; 2596 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2597 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2598 if (size > 1) { 2599 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2600 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2601 } else { 2602 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2603 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2604 } 2605 PetscFunctionReturn(0); 2606 } 2607 2608 #undef __FUNCT__ 2609 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2610 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2611 { 2612 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2613 PetscFunctionBegin; 2614 *Ad = a->A; 2615 *Ao = a->B; 2616 *colmap = a->garray; 2617 PetscFunctionReturn(0); 2618 } 2619 2620 #undef __FUNCT__ 2621 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2622 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2623 { 2624 PetscErrorCode ierr; 2625 int i; 2626 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2627 2628 PetscFunctionBegin; 2629 if (coloring->ctype == IS_COLORING_LOCAL) { 2630 ISColoringValue *allcolors,*colors; 2631 ISColoring ocoloring; 2632 2633 /* set coloring for diagonal portion */ 2634 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2635 2636 /* set coloring for off-diagonal portion */ 2637 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2638 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2639 for (i=0; i<a->B->n; i++) { 2640 colors[i] = allcolors[a->garray[i]]; 2641 } 2642 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2643 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2644 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2645 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2646 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2647 ISColoringValue *colors; 2648 int *larray; 2649 ISColoring ocoloring; 2650 2651 /* set coloring for diagonal portion */ 2652 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2653 for (i=0; i<a->A->n; i++) { 2654 larray[i] = i + a->cstart; 2655 } 2656 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2657 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2658 for (i=0; i<a->A->n; i++) { 2659 colors[i] = coloring->colors[larray[i]]; 2660 } 2661 ierr = PetscFree(larray);CHKERRQ(ierr); 2662 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2663 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2664 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2665 2666 /* set coloring for off-diagonal portion */ 2667 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2668 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2669 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2670 for (i=0; i<a->B->n; i++) { 2671 colors[i] = coloring->colors[larray[i]]; 2672 } 2673 ierr = PetscFree(larray);CHKERRQ(ierr); 2674 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2675 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2676 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2677 } else { 2678 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2679 } 2680 2681 PetscFunctionReturn(0); 2682 } 2683 2684 #undef __FUNCT__ 2685 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2686 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2687 { 2688 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2689 PetscErrorCode ierr; 2690 2691 PetscFunctionBegin; 2692 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2693 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2694 PetscFunctionReturn(0); 2695 } 2696 2697 #undef __FUNCT__ 2698 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2699 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2700 { 2701 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2702 PetscErrorCode ierr; 2703 2704 PetscFunctionBegin; 2705 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2706 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2707 PetscFunctionReturn(0); 2708 } 2709 2710 #undef __FUNCT__ 2711 #define __FUNCT__ "MatMerge" 2712 /*@C 2713 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2714 matrices from each processor 2715 2716 Collective on MPI_Comm 2717 2718 Input Parameters: 2719 + comm - the communicators the parallel matrix will live on 2720 . inmat - the input sequential matrices 2721 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2722 2723 Output Parameter: 2724 . outmat - the parallel matrix generated 2725 2726 Level: advanced 2727 2728 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2729 2730 @*/ 2731 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,MatReuse scall,Mat *outmat) 2732 { 2733 PetscErrorCode ierr; 2734 int m,n,i,rstart,nnz,I,*dnz,*onz; 2735 const int *indx; 2736 const PetscScalar *values; 2737 PetscMap columnmap,rowmap; 2738 2739 PetscFunctionBegin; 2740 2741 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2742 if (scall == MAT_INITIAL_MATRIX){ 2743 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2744 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2745 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2746 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2747 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2748 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2749 2750 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2751 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2752 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2753 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2754 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2755 2756 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2757 for (i=0;i<m;i++) { 2758 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2759 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2760 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2761 } 2762 /* This routine will ONLY return MPIAIJ type matrix */ 2763 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2764 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2765 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2766 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2767 2768 } else if (scall == MAT_REUSE_MATRIX){ 2769 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2770 } else { 2771 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2772 } 2773 2774 for (i=0;i<m;i++) { 2775 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2776 I = i + rstart; 2777 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2778 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2779 } 2780 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2781 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2782 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2783 2784 PetscFunctionReturn(0); 2785 } 2786 2787 #undef __FUNCT__ 2788 #define __FUNCT__ "MatFileSplit" 2789 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2790 { 2791 PetscErrorCode ierr; 2792 int rank,m,N,i,rstart,nnz; 2793 size_t len; 2794 const int *indx; 2795 PetscViewer out; 2796 char *name; 2797 Mat B; 2798 const PetscScalar *values; 2799 2800 PetscFunctionBegin; 2801 2802 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2803 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2804 /* Should this be the type of the diagonal block of A? */ 2805 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2806 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2807 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2808 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2809 for (i=0;i<m;i++) { 2810 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2811 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2812 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2813 } 2814 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2815 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2816 2817 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2818 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2819 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2820 sprintf(name,"%s.%d",outfile,rank); 2821 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2822 ierr = PetscFree(name); 2823 ierr = MatView(B,out);CHKERRQ(ierr); 2824 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2825 ierr = MatDestroy(B);CHKERRQ(ierr); 2826 PetscFunctionReturn(0); 2827 } 2828 2829 typedef struct { /* used by MatMerge_SeqsToMPI for reusing the merged matrix */ 2830 PetscMap rowmap; 2831 int nrecv,nsend,*id_r,*bi,*bj,**ijbuf_r; 2832 int *len_sra; /* array of length 2*size, len_sra[i]/len_sra[size+i] 2833 store length of i-th send/recv matrix values */ 2834 MatScalar *abuf_s; 2835 } Mat_Merge_SeqsToMPI; 2836 2837 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2838 #undef __FUNCT__ 2839 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2840 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2841 { 2842 PetscErrorCode ierr; 2843 Mat_Merge_SeqsToMPI *merge=(Mat_Merge_SeqsToMPI*)A->spptr; 2844 2845 PetscFunctionBegin; 2846 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2847 ierr = PetscFree(merge->abuf_s);CHKERRQ(ierr); 2848 ierr = PetscFree(merge->len_sra);CHKERRQ(ierr); 2849 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2850 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2851 ierr = PetscFree(merge->ijbuf_r);CHKERRQ(ierr); 2852 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2853 ierr = PetscFree(merge);CHKERRQ(ierr); 2854 2855 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2856 PetscFunctionReturn(0); 2857 } 2858 2859 #include "src/mat/utils/freespace.h" 2860 #undef __FUNCT__ 2861 #define __FUNCT__ "MatMerge_SeqsToMPI" 2862 /*@C 2863 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2864 matrices from each processor 2865 2866 Collective on MPI_Comm 2867 2868 Input Parameters: 2869 + comm - the communicators the parallel matrix will live on 2870 . seqmat - the input sequential matrices 2871 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2872 2873 Output Parameter: 2874 . mpimat - the parallel matrix generated 2875 2876 Level: advanced 2877 2878 Notes: The dimensions of the sequential matrix in each processor 2879 MUST be the same. 2880 2881 @*/ 2882 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,MatReuse scall,Mat *mpimat) 2883 { 2884 PetscErrorCode ierr; 2885 Mat B_seq,B_mpi; 2886 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2887 int size,rank,M=seqmat->m,N=seqmat->n,m,i,j,*owners, 2888 *ai=a->i,*aj=a->j,tag,taga,len,len_a,*len_s,*len_sa,proc,*len_r,*len_ra, 2889 **ijbuf_r,*ijbuf_s,*nnz_ptr,k,anzi,*bj_i, 2890 *bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0,nextaj; 2891 MPI_Request *s_waits,*r_waits,*s_waitsa,*r_waitsa; 2892 MPI_Status *status; 2893 MatScalar *ba,*aa=a->a,**abuf_r,*abuf_i,*ba_i; 2894 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2895 Mat_Merge_SeqsToMPI *merge; 2896 2897 PetscFunctionBegin; 2898 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2899 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2900 if (scall == MAT_INITIAL_MATRIX){ 2901 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 2902 } else if (scall == MAT_REUSE_MATRIX){ 2903 B_mpi = *mpimat; 2904 merge = (Mat_Merge_SeqsToMPI*)B_mpi->spptr; 2905 bi = merge->bi; 2906 bj = merge->bj; 2907 ijbuf_r = merge->ijbuf_r; 2908 } else { 2909 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2910 } 2911 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2912 2913 /* determine the number of messages to send, their lengths */ 2914 /*---------------------------------------------------------*/ 2915 if (scall == MAT_INITIAL_MATRIX){ 2916 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 2917 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 2918 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 2919 ierr = PetscMalloc(size*sizeof(int),&len_s);CHKERRQ(ierr); 2920 ierr = PetscMalloc(2*size*sizeof(int),&merge->len_sra);CHKERRQ(ierr); 2921 } 2922 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2923 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2924 2925 len_sa = merge->len_sra; 2926 len_ra = merge->len_sra + size; 2927 2928 if (scall == MAT_INITIAL_MATRIX){ 2929 merge->nsend = 0; 2930 len = len_a = 0; 2931 for (proc=0; proc<size; proc++){ 2932 if (proc == rank) continue; 2933 len_s[proc] = len_sa[proc] = 0; 2934 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 2935 len_sa[proc] += ai[i+1] - ai[i]; /* num of cols sent to [proc] */ 2936 } 2937 if (len_sa[proc]) { 2938 merge->nsend++; 2939 len_s[proc] = len_sa[proc] + owners[proc+1] - owners[proc]; 2940 if (len < len_s[proc]) len = len_s[proc]; 2941 if (len_a < len_sa[proc]) len_a = len_sa[proc]; 2942 } 2943 } 2944 2945 /* determine the number and length of messages to receive */ 2946 /*--------------------------------------------------------*/ 2947 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 2948 ierr = PetscGatherMessageLengths(comm,merge->nsend,merge->nrecv,len_s,&merge->id_r,&len_r);CHKERRQ(ierr); 2949 2950 /* post the Irecvs corresponding to these messages */ 2951 /*-------------------------------------------------*/ 2952 /* get new tag to keep the communication clean */ 2953 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tag);CHKERRQ(ierr); 2954 ierr = PetscPostIrecvInt(comm,tag,merge->nrecv,merge->id_r,len_r,&ijbuf_r,&r_waits);CHKERRQ(ierr); 2955 2956 /* post the sends */ 2957 /*----------------*/ 2958 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2959 ierr = PetscMalloc((len+1)*sizeof(int),&ijbuf_s);CHKERRQ(ierr); 2960 k = 0; 2961 for (proc=0; proc<size; proc++){ 2962 if (!len_s[proc]) continue; 2963 /* form outgoing ij data to be sent to [proc] */ 2964 nnz_ptr = ijbuf_s + owners[proc+1] - owners[proc]; 2965 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 2966 anzi = ai[i+1] - ai[i]; 2967 if (i==owners[proc]){ 2968 ijbuf_s[i-owners[proc]] = anzi; 2969 } else { 2970 ijbuf_s[i-owners[proc]] = ijbuf_s[i-owners[proc]-1]+ anzi; 2971 } 2972 for (j=0; j <anzi; j++){ 2973 *nnz_ptr = *(aj+ai[i]+j); nnz_ptr++; 2974 } 2975 } 2976 ierr = MPI_Isend(ijbuf_s,len_s[proc],MPI_INT,proc,tag,comm,s_waits+k);CHKERRQ(ierr); 2977 k++; 2978 } 2979 2980 /* receives and sends of ij-structure are complete, deallocate memories */ 2981 /*----------------------------------------------------------------------*/ 2982 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2983 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2984 2985 ierr = PetscFree(ijbuf_s);CHKERRQ(ierr); 2986 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2987 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2988 } 2989 2990 /* send and recv matrix values */ 2991 /*-----------------------------*/ 2992 if (scall == MAT_INITIAL_MATRIX){ 2993 ierr = PetscMalloc((len_a+1)*sizeof(MatScalar),&merge->abuf_s);CHKERRQ(ierr); 2994 for (i=0; i<merge->nrecv; i++) len_ra[i] = len_r[i]-m; /* length of seqmat->a to be received from a proc */ 2995 } 2996 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2997 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,len_ra,&abuf_r,&r_waitsa);CHKERRQ(ierr); 2998 2999 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waitsa);CHKERRQ(ierr); 3000 k = 0; 3001 for (proc=0; proc<size; proc++){ 3002 if (!len_sa[proc]) continue; 3003 /* form outgoing mat value data to be sent to [proc] */ 3004 abuf_i = merge->abuf_s; 3005 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 3006 anzi = ai[i+1] - ai[i]; 3007 for (j=0; j <anzi; j++){ 3008 *abuf_i = *(aa+ai[i]+j); abuf_i++; 3009 } 3010 } 3011 ierr = MPI_Isend(merge->abuf_s,len_sa[proc],MPIU_MATSCALAR,proc,taga,comm,s_waitsa+k);CHKERRQ(ierr); 3012 k++; 3013 } 3014 3015 ierr = MPI_Waitall(merge->nrecv,r_waitsa,status);CHKERRQ(ierr); 3016 ierr = MPI_Waitall(merge->nsend,s_waitsa,status);CHKERRQ(ierr); 3017 ierr = PetscFree(status);CHKERRQ(ierr); 3018 3019 ierr = PetscFree(s_waitsa);CHKERRQ(ierr); 3020 ierr = PetscFree(r_waitsa);CHKERRQ(ierr); 3021 3022 /* create seq matrix B_seq in each processor */ 3023 /*-------------------------------------------*/ 3024 if (scall == MAT_INITIAL_MATRIX){ 3025 ierr = PetscFree(len_s);CHKERRQ(ierr); 3026 /* allocate bi array and free space for accumulating nonzero column info */ 3027 ierr = PetscMalloc((m+1)*sizeof(int),&bi);CHKERRQ(ierr); 3028 bi[0] = 0; 3029 3030 ierr = PetscMalloc((N+1)*sizeof(int),&lnk);CHKERRQ(ierr); 3031 ierr = PetscLLInitialize(N,N);CHKERRQ(ierr); 3032 3033 /* initial FreeSpace size is (nproc)*(num of local nnz(seqmat)) */ 3034 len = 0; 3035 for (i=owners[rank]; i<owners[rank+1]; i++) len += ai[i+1] - ai[i]; 3036 ierr = GetMoreSpace((int)(size*len+1),&free_space);CHKERRQ(ierr); 3037 current_space = free_space; 3038 3039 /* determine symbolic info for each row of B_seq */ 3040 for (i=0;i<m;i++) { 3041 bnzi = 0; 3042 /* add local non-zero cols of this proc's seqmat into lnk */ 3043 arow = owners[rank] + i; 3044 anzi = ai[arow+1] - ai[arow]; 3045 aj = a->j + ai[arow]; 3046 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk);CHKERRQ(ierr); 3047 bnzi += nlnk; 3048 /* add received col data into lnk */ 3049 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3050 /* i-th row */ 3051 if (i == 0){ 3052 anzi = *(ijbuf_r[k]+i); 3053 aj = ijbuf_r[k] + m; 3054 } else { 3055 anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1); 3056 aj = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1); 3057 } 3058 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk);CHKERRQ(ierr); 3059 bnzi += nlnk; 3060 } 3061 3062 /* if free space is not available, make more free space */ 3063 if (current_space->local_remaining<bnzi) { 3064 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3065 nspacedouble++; 3066 } 3067 /* copy data into free space, then initialize lnk */ 3068 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array);CHKERRQ(ierr); 3069 current_space->array += bnzi; 3070 current_space->local_used += bnzi; 3071 current_space->local_remaining -= bnzi; 3072 3073 bi[i+1] = bi[i] + bnzi; 3074 } 3075 ierr = PetscMalloc((bi[m]+1)*sizeof(int),&bj);CHKERRQ(ierr); 3076 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3077 3078 ierr = PetscFree(lnk);CHKERRQ(ierr); 3079 ierr = PetscFree(len_r);CHKERRQ(ierr); 3080 3081 /* allocate space for ba */ 3082 ierr = PetscMalloc((bi[m]+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3083 ierr = PetscMemzero(ba,(bi[m]+1)*sizeof(MatScalar));CHKERRQ(ierr); 3084 3085 /* put together B_seq */ 3086 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,bi,bj,ba,&B_seq);CHKERRQ(ierr); 3087 ierr = PetscFree(ba);CHKERRQ(ierr); /* bi and bj are saved for reuse */ 3088 3089 /* create symbolic parallel matrix B_mpi by concatinating B_seq */ 3090 /*--------------------------------------------------------------*/ 3091 ierr = MatMerge(comm,B_seq,scall,&B_mpi);CHKERRQ(ierr); 3092 } /* endof if (scall == MAT_INITIAL_MATRIX) */ 3093 3094 /* insert mat values of B_mpi */ 3095 /*----------------------------*/ 3096 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3097 3098 /* set values of ba */ 3099 for (i=0; i<m; i++) { 3100 arow = owners[rank] + i; 3101 bj_i = bj+bi[i]; /* col indices of the i-th row of B_seq */ 3102 bnzi = bi[i+1] - bi[i]; 3103 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3104 3105 /* add local non-zero vals of this proc's seqmat into ba */ 3106 anzi = ai[arow+1] - ai[arow]; 3107 aj = a->j + ai[arow]; 3108 aa = a->a + ai[arow]; 3109 nextaj = 0; 3110 for (j=0; nextaj<anzi; j++){ 3111 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3112 ba_i[j] += aa[nextaj++]; 3113 } 3114 } 3115 3116 /* add received vals into ba */ 3117 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3118 /* i-th row */ 3119 if (i == 0){ 3120 anzi = *(ijbuf_r[k]+i); 3121 aj = ijbuf_r[k] + m; 3122 aa = abuf_r[k]; 3123 } else { 3124 anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1); 3125 aj = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1); 3126 aa = abuf_r[k] + *(ijbuf_r[k]+i-1); 3127 } 3128 nextaj = 0; 3129 for (j=0; nextaj<anzi; j++){ 3130 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3131 ba_i[j] += aa[nextaj++]; 3132 } 3133 } 3134 } 3135 3136 ierr = MatSetValues(B_mpi,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3137 } 3138 ierr = MatAssemblyBegin(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3139 ierr = MatAssemblyEnd(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3140 3141 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3142 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3143 3144 /* attach the supporting struct to B_mpi for reuse */ 3145 /*-------------------------------------------------*/ 3146 if (scall == MAT_INITIAL_MATRIX){ 3147 B_mpi->spptr = (void*)merge; 3148 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3149 merge->bi = bi; 3150 merge->bj = bj; 3151 merge->ijbuf_r = ijbuf_r; 3152 3153 *mpimat = B_mpi; 3154 } 3155 PetscFunctionReturn(0); 3156 } 3157