1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 246 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 324 PetscFunctionBegin; 325 if (!aij->donotstash) { 326 while (1) { 327 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 328 if (!flg) break; 329 330 for (i=0; i<n;) { 331 /* Now identify the consecutive vals belonging to the same row */ 332 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 333 if (j < n) ncols = j-i; 334 else ncols = n-i; 335 /* Now assemble all these values with a single function call */ 336 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 337 i = j; 338 } 339 } 340 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 341 } 342 a->compressedrow.use = PETSC_FALSE; 343 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 345 346 /* determine if any processor has disassembled, if so we must 347 also disassemble ourselfs, in order that we may reassemble. */ 348 /* 349 if nonzero structure of submatrix B cannot change then we know that 350 no processor disassembled thus we can skip this stuff 351 */ 352 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 353 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 354 if (mat->was_assembled && !other_disassembled) { 355 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 356 } 357 } 358 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 359 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 360 } 361 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 362 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 363 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 364 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 365 366 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 367 aij->rowvalues = 0; 368 369 /* used by MatAXPY() */ 370 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 371 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 372 373 PetscFunctionReturn(0); 374 } 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 379 { 380 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 381 PetscErrorCode ierr; 382 383 PetscFunctionBegin; 384 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 385 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatZeroRows_MPIAIJ" 391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 392 { 393 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 394 PetscErrorCode ierr; 395 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 396 PetscInt i,*owners = A->rmap.range; 397 PetscInt *nprocs,j,idx,nsends,row; 398 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 399 PetscInt *rvalues,count,base,slen,*source; 400 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 401 MPI_Comm comm = A->comm; 402 MPI_Request *send_waits,*recv_waits; 403 MPI_Status recv_status,*send_status; 404 #if defined(PETSC_DEBUG) 405 PetscTruth found = PETSC_FALSE; 406 #endif 407 408 PetscFunctionBegin; 409 /* first count number of contributors to each processor */ 410 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 411 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 412 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 413 j = 0; 414 for (i=0; i<N; i++) { 415 if (lastidx > (idx = rows[i])) j = 0; 416 lastidx = idx; 417 for (; j<size; j++) { 418 if (idx >= owners[j] && idx < owners[j+1]) { 419 nprocs[2*j]++; 420 nprocs[2*j+1] = 1; 421 owner[i] = j; 422 #if defined(PETSC_DEBUG) 423 found = PETSC_TRUE; 424 #endif 425 break; 426 } 427 } 428 #if defined(PETSC_DEBUG) 429 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 430 found = PETSC_FALSE; 431 #endif 432 } 433 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 434 435 /* inform other processors of number of messages and max length*/ 436 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 437 438 /* post receives: */ 439 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 440 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 441 for (i=0; i<nrecvs; i++) { 442 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 443 } 444 445 /* do sends: 446 1) starts[i] gives the starting index in svalues for stuff going to 447 the ith processor 448 */ 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 450 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 451 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 452 starts[0] = 0; 453 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 454 for (i=0; i<N; i++) { 455 svalues[starts[owner[i]]++] = rows[i]; 456 } 457 458 starts[0] = 0; 459 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 460 count = 0; 461 for (i=0; i<size; i++) { 462 if (nprocs[2*i+1]) { 463 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscFree(starts);CHKERRQ(ierr); 467 468 base = owners[rank]; 469 470 /* wait on receives */ 471 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 472 source = lens + nrecvs; 473 count = nrecvs; slen = 0; 474 while (count) { 475 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 476 /* unpack receives into our local space */ 477 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 478 source[imdex] = recv_status.MPI_SOURCE; 479 lens[imdex] = n; 480 slen += n; 481 count--; 482 } 483 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 484 485 /* move the data into the send scatter */ 486 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 487 count = 0; 488 for (i=0; i<nrecvs; i++) { 489 values = rvalues + i*nmax; 490 for (j=0; j<lens[i]; j++) { 491 lrows[count++] = values[j] - base; 492 } 493 } 494 ierr = PetscFree(rvalues);CHKERRQ(ierr); 495 ierr = PetscFree(lens);CHKERRQ(ierr); 496 ierr = PetscFree(owner);CHKERRQ(ierr); 497 ierr = PetscFree(nprocs);CHKERRQ(ierr); 498 499 /* actually zap the local rows */ 500 /* 501 Zero the required rows. If the "diagonal block" of the matrix 502 is square and the user wishes to set the diagonal we use separate 503 code so that MatSetValues() is not called for each diagonal allocating 504 new memory, thus calling lots of mallocs and slowing things down. 505 506 Contributed by: Matthew Knepley 507 */ 508 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 509 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 510 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 511 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 512 } else if (diag != 0.0) { 513 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 514 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 515 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 517 } 518 for (i = 0; i < slen; i++) { 519 row = lrows[i] + rstart; 520 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 521 } 522 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 524 } else { 525 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 526 } 527 ierr = PetscFree(lrows);CHKERRQ(ierr); 528 529 /* wait on sends */ 530 if (nsends) { 531 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 532 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 533 ierr = PetscFree(send_status);CHKERRQ(ierr); 534 } 535 ierr = PetscFree(send_waits);CHKERRQ(ierr); 536 ierr = PetscFree(svalues);CHKERRQ(ierr); 537 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatMult_MPIAIJ" 543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 544 { 545 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 546 PetscErrorCode ierr; 547 PetscInt nt; 548 549 PetscFunctionBegin; 550 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 551 if (nt != A->cmap.n) { 552 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 553 } 554 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 555 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 556 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 557 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatMultAdd_MPIAIJ" 563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 564 { 565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 570 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 571 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 572 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 579 { 580 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 581 PetscErrorCode ierr; 582 PetscTruth merged; 583 584 PetscFunctionBegin; 585 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 586 /* do nondiagonal part */ 587 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 588 if (!merged) { 589 /* send it on its way */ 590 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 591 /* do local part */ 592 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 593 /* receive remote parts: note this assumes the values are not actually */ 594 /* added in yy until the next line, */ 595 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 596 } else { 597 /* do local part */ 598 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 599 /* send it on its way */ 600 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 601 /* values actually were received in the Begin() but we need to call this nop */ 602 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 EXTERN_C_BEGIN 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 611 { 612 MPI_Comm comm; 613 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 614 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 615 IS Me,Notme; 616 PetscErrorCode ierr; 617 PetscInt M,N,first,last,*notme,i; 618 PetscMPIInt size; 619 620 PetscFunctionBegin; 621 622 /* Easy test: symmetric diagonal block */ 623 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 624 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 625 if (!*f) PetscFunctionReturn(0); 626 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 628 if (size == 1) PetscFunctionReturn(0); 629 630 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 631 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 632 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 633 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 634 for (i=0; i<first; i++) notme[i] = i; 635 for (i=last; i<M; i++) notme[i-last+first] = i; 636 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 637 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 638 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 639 Aoff = Aoffs[0]; 640 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 641 Boff = Boffs[0]; 642 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 644 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 645 ierr = ISDestroy(Me);CHKERRQ(ierr); 646 ierr = ISDestroy(Notme);CHKERRQ(ierr); 647 648 PetscFunctionReturn(0); 649 } 650 EXTERN_C_END 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 655 { 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 PetscErrorCode ierr; 658 659 PetscFunctionBegin; 660 /* do nondiagonal part */ 661 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 662 /* send it on its way */ 663 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 664 /* do local part */ 665 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 666 /* receive remote parts */ 667 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This only works correctly for square matrices where the subblock A->A is the 673 diagonal block 674 */ 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 678 { 679 PetscErrorCode ierr; 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 682 PetscFunctionBegin; 683 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 684 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 685 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 686 } 687 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatScale_MPIAIJ" 693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 696 PetscErrorCode ierr; 697 698 PetscFunctionBegin; 699 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 700 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatDestroy_MPIAIJ" 706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 709 PetscErrorCode ierr; 710 711 PetscFunctionBegin; 712 #if defined(PETSC_USE_LOG) 713 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 714 #endif 715 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 717 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 718 #if defined (PETSC_USE_CTABLE) 719 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 720 #else 721 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 722 #endif 723 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 724 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 725 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 726 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatView_MPIAIJ_Binary" 741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 744 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 745 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 746 PetscErrorCode ierr; 747 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 748 int fd; 749 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 750 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 751 PetscScalar *column_values; 752 753 PetscFunctionBegin; 754 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 755 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 756 nz = A->nz + B->nz; 757 if (!rank) { 758 header[0] = MAT_FILE_COOKIE; 759 header[1] = mat->rmap.N; 760 header[2] = mat->cmap.N; 761 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 762 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 763 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 764 /* get largest number of rows any processor has */ 765 rlen = mat->rmap.n; 766 range = mat->rmap.range; 767 for (i=1; i<size; i++) { 768 rlen = PetscMax(rlen,range[i+1] - range[i]); 769 } 770 } else { 771 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 772 rlen = mat->rmap.n; 773 } 774 775 /* load up the local row counts */ 776 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 777 for (i=0; i<mat->rmap.n; i++) { 778 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 779 } 780 781 /* store the row lengths to the file */ 782 if (!rank) { 783 MPI_Status status; 784 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 785 for (i=1; i<size; i++) { 786 rlen = range[i+1] - range[i]; 787 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 788 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 789 } 790 } else { 791 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 792 } 793 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 794 795 /* load up the local column indices */ 796 nzmax = nz; /* )th processor needs space a largest processor needs */ 797 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 798 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 799 cnt = 0; 800 for (i=0; i<mat->rmap.n; i++) { 801 for (j=B->i[i]; j<B->i[i+1]; j++) { 802 if ( (col = garray[B->j[j]]) > cstart) break; 803 column_indices[cnt++] = col; 804 } 805 for (k=A->i[i]; k<A->i[i+1]; k++) { 806 column_indices[cnt++] = A->j[k] + cstart; 807 } 808 for (; j<B->i[i+1]; j++) { 809 column_indices[cnt++] = garray[B->j[j]]; 810 } 811 } 812 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 813 814 /* store the column indices to the file */ 815 if (!rank) { 816 MPI_Status status; 817 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 818 for (i=1; i<size; i++) { 819 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 820 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 821 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 823 } 824 } else { 825 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 826 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 } 828 ierr = PetscFree(column_indices);CHKERRQ(ierr); 829 830 /* load up the local column values */ 831 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 832 cnt = 0; 833 for (i=0; i<mat->rmap.n; i++) { 834 for (j=B->i[i]; j<B->i[i+1]; j++) { 835 if ( garray[B->j[j]] > cstart) break; 836 column_values[cnt++] = B->a[j]; 837 } 838 for (k=A->i[i]; k<A->i[i+1]; k++) { 839 column_values[cnt++] = A->a[k]; 840 } 841 for (; j<B->i[i+1]; j++) { 842 column_values[cnt++] = B->a[j]; 843 } 844 } 845 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 846 847 /* store the column values to the file */ 848 if (!rank) { 849 MPI_Status status; 850 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 851 for (i=1; i<size; i++) { 852 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 853 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 854 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 856 } 857 } else { 858 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 859 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 860 } 861 ierr = PetscFree(column_values);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 868 { 869 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 870 PetscErrorCode ierr; 871 PetscMPIInt rank = aij->rank,size = aij->size; 872 PetscTruth isdraw,iascii,isbinary; 873 PetscViewer sviewer; 874 PetscViewerFormat format; 875 876 PetscFunctionBegin; 877 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 880 if (iascii) { 881 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 882 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 883 MatInfo info; 884 PetscTruth inodes; 885 886 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 887 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 889 if (!inodes) { 890 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 891 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 892 } else { 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 894 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 895 } 896 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 898 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 900 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 901 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } else if (format == PETSC_VIEWER_ASCII_INFO) { 904 PetscInt inodecount,inodelimit,*inodes; 905 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 906 if (inodes) { 907 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 908 } else { 909 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 910 } 911 PetscFunctionReturn(0); 912 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 913 PetscFunctionReturn(0); 914 } 915 } else if (isbinary) { 916 if (size == 1) { 917 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 918 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 919 } else { 920 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 921 } 922 PetscFunctionReturn(0); 923 } else if (isdraw) { 924 PetscDraw draw; 925 PetscTruth isnull; 926 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 927 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 928 } 929 930 if (size == 1) { 931 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 932 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 933 } else { 934 /* assemble the entire matrix onto first processor. */ 935 Mat A; 936 Mat_SeqAIJ *Aloc; 937 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 938 PetscScalar *a; 939 940 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 941 if (!rank) { 942 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 943 } else { 944 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 945 } 946 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 947 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 948 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 949 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 950 951 /* copy over the A part */ 952 Aloc = (Mat_SeqAIJ*)aij->A->data; 953 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 954 row = mat->rmap.rstart; 955 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 956 for (i=0; i<m; i++) { 957 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 958 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 959 } 960 aj = Aloc->j; 961 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 962 963 /* copy over the B part */ 964 Aloc = (Mat_SeqAIJ*)aij->B->data; 965 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 966 row = mat->rmap.rstart; 967 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 968 ct = cols; 969 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 970 for (i=0; i<m; i++) { 971 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 972 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 973 } 974 ierr = PetscFree(ct);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 /* 978 Everyone has to call to draw the matrix since the graphics waits are 979 synchronized across all processors that share the PetscDraw object 980 */ 981 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 982 if (!rank) { 983 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 984 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 985 } 986 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 ierr = MatDestroy(A);CHKERRQ(ierr); 988 } 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatView_MPIAIJ" 994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 995 { 996 PetscErrorCode ierr; 997 PetscTruth iascii,isdraw,issocket,isbinary; 998 999 PetscFunctionBegin; 1000 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1004 if (iascii || isdraw || isbinary || issocket) { 1005 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1006 } else { 1007 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatRelax_MPIAIJ" 1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscErrorCode ierr; 1020 Vec bb1; 1021 1022 PetscFunctionBegin; 1023 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1024 1025 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1026 1027 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1028 if (flag & SOR_ZERO_INITIAL_GUESS) { 1029 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1030 its--; 1031 } 1032 1033 while (its--) { 1034 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1036 1037 /* update rhs: bb1 = bb - B*x */ 1038 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1039 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1040 1041 /* local sweep */ 1042 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1043 CHKERRQ(ierr); 1044 } 1045 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1046 if (flag & SOR_ZERO_INITIAL_GUESS) { 1047 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1048 its--; 1049 } 1050 while (its--) { 1051 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 1054 /* update rhs: bb1 = bb - B*x */ 1055 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1056 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1057 1058 /* local sweep */ 1059 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1060 CHKERRQ(ierr); 1061 } 1062 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1063 if (flag & SOR_ZERO_INITIAL_GUESS) { 1064 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1065 its--; 1066 } 1067 while (its--) { 1068 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 1071 /* update rhs: bb1 = bb - B*x */ 1072 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1073 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1074 1075 /* local sweep */ 1076 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1077 CHKERRQ(ierr); 1078 } 1079 } else { 1080 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1081 } 1082 1083 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatPermute_MPIAIJ" 1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1090 { 1091 MPI_Comm comm,pcomm; 1092 PetscInt first,local_size,nrows,*rows; 1093 int ntids; 1094 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1099 /* make a collective version of 'rowp' */ 1100 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1101 if (pcomm==comm) { 1102 crowp = rowp; 1103 } else { 1104 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1105 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1107 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1108 } 1109 /* collect the global row permutation and invert it */ 1110 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1111 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1112 if (pcomm!=comm) { 1113 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1114 } 1115 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1116 /* get the local target indices */ 1117 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1118 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1119 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1121 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1122 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1123 /* the column permutation is so much easier; 1124 make a local version of 'colp' and invert it */ 1125 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1126 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1127 if (ntids==1) { 1128 lcolp = colp; 1129 } else { 1130 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1131 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1132 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1133 } 1134 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1135 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1136 if (ntids>1) { 1137 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1138 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1139 } 1140 /* now we just get the submatrix */ 1141 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1142 /* clean up */ 1143 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1144 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1151 { 1152 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1153 Mat A = mat->A,B = mat->B; 1154 PetscErrorCode ierr; 1155 PetscReal isend[5],irecv[5]; 1156 1157 PetscFunctionBegin; 1158 info->block_size = 1.0; 1159 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1160 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1161 isend[3] = info->memory; isend[4] = info->mallocs; 1162 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1163 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1164 isend[3] += info->memory; isend[4] += info->mallocs; 1165 if (flag == MAT_LOCAL) { 1166 info->nz_used = isend[0]; 1167 info->nz_allocated = isend[1]; 1168 info->nz_unneeded = isend[2]; 1169 info->memory = isend[3]; 1170 info->mallocs = isend[4]; 1171 } else if (flag == MAT_GLOBAL_MAX) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else if (flag == MAT_GLOBAL_SUM) { 1179 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1180 info->nz_used = irecv[0]; 1181 info->nz_allocated = irecv[1]; 1182 info->nz_unneeded = irecv[2]; 1183 info->memory = irecv[3]; 1184 info->mallocs = irecv[4]; 1185 } 1186 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1187 info->fill_ratio_needed = 0; 1188 info->factor_mallocs = 0; 1189 info->rows_global = (double)matin->rmap.N; 1190 info->columns_global = (double)matin->cmap.N; 1191 info->rows_local = (double)matin->rmap.n; 1192 info->columns_local = (double)matin->cmap.N; 1193 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatSetOption_MPIAIJ" 1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1200 { 1201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 switch (op) { 1206 case MAT_NO_NEW_NONZERO_LOCATIONS: 1207 case MAT_YES_NEW_NONZERO_LOCATIONS: 1208 case MAT_COLUMNS_UNSORTED: 1209 case MAT_COLUMNS_SORTED: 1210 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1211 case MAT_KEEP_ZEROED_ROWS: 1212 case MAT_NEW_NONZERO_LOCATION_ERR: 1213 case MAT_USE_INODES: 1214 case MAT_DO_NOT_USE_INODES: 1215 case MAT_IGNORE_ZERO_ENTRIES: 1216 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1217 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1218 break; 1219 case MAT_ROW_ORIENTED: 1220 a->roworiented = PETSC_TRUE; 1221 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1222 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1223 break; 1224 case MAT_ROWS_SORTED: 1225 case MAT_ROWS_UNSORTED: 1226 case MAT_YES_NEW_DIAGONALS: 1227 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1228 break; 1229 case MAT_COLUMN_ORIENTED: 1230 a->roworiented = PETSC_FALSE; 1231 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1232 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1233 break; 1234 case MAT_IGNORE_OFF_PROC_ENTRIES: 1235 a->donotstash = PETSC_TRUE; 1236 break; 1237 case MAT_NO_NEW_DIAGONALS: 1238 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1239 case MAT_SYMMETRIC: 1240 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1241 break; 1242 case MAT_STRUCTURALLY_SYMMETRIC: 1243 case MAT_HERMITIAN: 1244 case MAT_SYMMETRY_ETERNAL: 1245 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1246 break; 1247 case MAT_NOT_SYMMETRIC: 1248 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1249 case MAT_NOT_HERMITIAN: 1250 case MAT_NOT_SYMMETRY_ETERNAL: 1251 break; 1252 default: 1253 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatGetRow_MPIAIJ" 1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1261 { 1262 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1263 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1264 PetscErrorCode ierr; 1265 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1266 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1267 PetscInt *cmap,*idx_p; 1268 1269 PetscFunctionBegin; 1270 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1271 mat->getrowactive = PETSC_TRUE; 1272 1273 if (!mat->rowvalues && (idx || v)) { 1274 /* 1275 allocate enough space to hold information from the longest row. 1276 */ 1277 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1278 PetscInt max = 1,tmp; 1279 for (i=0; i<matin->rmap.n; i++) { 1280 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1281 if (max < tmp) { max = tmp; } 1282 } 1283 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1284 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1285 } 1286 1287 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1288 lrow = row - rstart; 1289 1290 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1291 if (!v) {pvA = 0; pvB = 0;} 1292 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1293 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1294 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1295 nztot = nzA + nzB; 1296 1297 cmap = mat->garray; 1298 if (v || idx) { 1299 if (nztot) { 1300 /* Sort by increasing column numbers, assuming A and B already sorted */ 1301 PetscInt imark = -1; 1302 if (v) { 1303 *v = v_p = mat->rowvalues; 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1306 else break; 1307 } 1308 imark = i; 1309 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1310 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1311 } 1312 if (idx) { 1313 *idx = idx_p = mat->rowindices; 1314 if (imark > -1) { 1315 for (i=0; i<imark; i++) { 1316 idx_p[i] = cmap[cworkB[i]]; 1317 } 1318 } else { 1319 for (i=0; i<nzB; i++) { 1320 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1321 else break; 1322 } 1323 imark = i; 1324 } 1325 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1326 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1327 } 1328 } else { 1329 if (idx) *idx = 0; 1330 if (v) *v = 0; 1331 } 1332 } 1333 *nz = nztot; 1334 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1335 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1342 { 1343 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1344 1345 PetscFunctionBegin; 1346 if (!aij->getrowactive) { 1347 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1348 } 1349 aij->getrowactive = PETSC_FALSE; 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatNorm_MPIAIJ" 1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1356 { 1357 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1358 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1359 PetscErrorCode ierr; 1360 PetscInt i,j,cstart = mat->cmap.rstart; 1361 PetscReal sum = 0.0; 1362 PetscScalar *v; 1363 1364 PetscFunctionBegin; 1365 if (aij->size == 1) { 1366 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1367 } else { 1368 if (type == NORM_FROBENIUS) { 1369 v = amat->a; 1370 for (i=0; i<amat->nz; i++) { 1371 #if defined(PETSC_USE_COMPLEX) 1372 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1373 #else 1374 sum += (*v)*(*v); v++; 1375 #endif 1376 } 1377 v = bmat->a; 1378 for (i=0; i<bmat->nz; i++) { 1379 #if defined(PETSC_USE_COMPLEX) 1380 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1381 #else 1382 sum += (*v)*(*v); v++; 1383 #endif 1384 } 1385 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1386 *norm = sqrt(*norm); 1387 } else if (type == NORM_1) { /* max column norm */ 1388 PetscReal *tmp,*tmp2; 1389 PetscInt *jj,*garray = aij->garray; 1390 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1391 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1392 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1393 *norm = 0.0; 1394 v = amat->a; jj = amat->j; 1395 for (j=0; j<amat->nz; j++) { 1396 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1397 } 1398 v = bmat->a; jj = bmat->j; 1399 for (j=0; j<bmat->nz; j++) { 1400 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1401 } 1402 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1403 for (j=0; j<mat->cmap.N; j++) { 1404 if (tmp2[j] > *norm) *norm = tmp2[j]; 1405 } 1406 ierr = PetscFree(tmp);CHKERRQ(ierr); 1407 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1408 } else if (type == NORM_INFINITY) { /* max row norm */ 1409 PetscReal ntemp = 0.0; 1410 for (j=0; j<aij->A->rmap.n; j++) { 1411 v = amat->a + amat->i[j]; 1412 sum = 0.0; 1413 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1414 sum += PetscAbsScalar(*v); v++; 1415 } 1416 v = bmat->a + bmat->i[j]; 1417 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1418 sum += PetscAbsScalar(*v); v++; 1419 } 1420 if (sum > ntemp) ntemp = sum; 1421 } 1422 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1423 } else { 1424 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1425 } 1426 } 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatTranspose_MPIAIJ" 1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1433 { 1434 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1435 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1436 PetscErrorCode ierr; 1437 PetscInt M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1438 Mat B; 1439 PetscScalar *array; 1440 1441 PetscFunctionBegin; 1442 if (!matout && M != N) { 1443 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1444 } 1445 1446 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1447 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1448 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1449 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1450 1451 /* copy over the A part */ 1452 Aloc = (Mat_SeqAIJ*)a->A->data; 1453 m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1454 row = A->rmap.rstart; 1455 for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;} 1456 for (i=0; i<m; i++) { 1457 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1458 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1459 } 1460 aj = Aloc->j; 1461 for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;} 1462 1463 /* copy over the B part */ 1464 Aloc = (Mat_SeqAIJ*)a->B->data; 1465 m = a->B->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1466 row = A->rmap.rstart; 1467 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1468 ct = cols; 1469 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1470 for (i=0; i<m; i++) { 1471 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1472 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1473 } 1474 ierr = PetscFree(ct);CHKERRQ(ierr); 1475 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1476 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1477 if (matout) { 1478 *matout = B; 1479 } else { 1480 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1487 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1488 { 1489 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1490 Mat a = aij->A,b = aij->B; 1491 PetscErrorCode ierr; 1492 PetscInt s1,s2,s3; 1493 1494 PetscFunctionBegin; 1495 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1496 if (rr) { 1497 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1498 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1499 /* Overlap communication with computation. */ 1500 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1501 } 1502 if (ll) { 1503 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1504 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1505 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1506 } 1507 /* scale the diagonal block */ 1508 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1509 1510 if (rr) { 1511 /* Do a scatter end and then right scale the off-diagonal block */ 1512 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1513 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1514 } 1515 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1521 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1522 { 1523 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1524 PetscErrorCode ierr; 1525 1526 PetscFunctionBegin; 1527 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1528 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1533 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1534 { 1535 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1536 PetscErrorCode ierr; 1537 1538 PetscFunctionBegin; 1539 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatEqual_MPIAIJ" 1545 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1546 { 1547 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1548 Mat a,b,c,d; 1549 PetscTruth flg; 1550 PetscErrorCode ierr; 1551 1552 PetscFunctionBegin; 1553 a = matA->A; b = matA->B; 1554 c = matB->A; d = matB->B; 1555 1556 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1557 if (flg) { 1558 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1559 } 1560 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 1564 #undef __FUNCT__ 1565 #define __FUNCT__ "MatCopy_MPIAIJ" 1566 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1567 { 1568 PetscErrorCode ierr; 1569 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1570 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1571 1572 PetscFunctionBegin; 1573 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1574 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1575 /* because of the column compression in the off-processor part of the matrix a->B, 1576 the number of columns in a->B and b->B may be different, hence we cannot call 1577 the MatCopy() directly on the two parts. If need be, we can provide a more 1578 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1579 then copying the submatrices */ 1580 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1581 } else { 1582 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1583 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1584 } 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1590 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1591 { 1592 PetscErrorCode ierr; 1593 1594 PetscFunctionBegin; 1595 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1596 PetscFunctionReturn(0); 1597 } 1598 1599 #include "petscblaslapack.h" 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatAXPY_MPIAIJ" 1602 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1603 { 1604 PetscErrorCode ierr; 1605 PetscInt i; 1606 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1607 PetscBLASInt bnz,one=1; 1608 Mat_SeqAIJ *x,*y; 1609 1610 PetscFunctionBegin; 1611 if (str == SAME_NONZERO_PATTERN) { 1612 PetscScalar alpha = a; 1613 x = (Mat_SeqAIJ *)xx->A->data; 1614 y = (Mat_SeqAIJ *)yy->A->data; 1615 bnz = (PetscBLASInt)x->nz; 1616 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1617 x = (Mat_SeqAIJ *)xx->B->data; 1618 y = (Mat_SeqAIJ *)yy->B->data; 1619 bnz = (PetscBLASInt)x->nz; 1620 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1621 } else if (str == SUBSET_NONZERO_PATTERN) { 1622 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1623 1624 x = (Mat_SeqAIJ *)xx->B->data; 1625 y = (Mat_SeqAIJ *)yy->B->data; 1626 if (y->xtoy && y->XtoY != xx->B) { 1627 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1628 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1629 } 1630 if (!y->xtoy) { /* get xtoy */ 1631 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1632 y->XtoY = xx->B; 1633 } 1634 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1635 } else { 1636 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1637 } 1638 PetscFunctionReturn(0); 1639 } 1640 1641 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1642 1643 #undef __FUNCT__ 1644 #define __FUNCT__ "MatConjugate_MPIAIJ" 1645 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1646 { 1647 #if defined(PETSC_USE_COMPLEX) 1648 PetscErrorCode ierr; 1649 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1650 1651 PetscFunctionBegin; 1652 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1653 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1654 #else 1655 PetscFunctionBegin; 1656 #endif 1657 PetscFunctionReturn(0); 1658 } 1659 1660 #undef __FUNCT__ 1661 #define __FUNCT__ "MatRealPart_MPIAIJ" 1662 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1663 { 1664 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1665 PetscErrorCode ierr; 1666 1667 PetscFunctionBegin; 1668 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1669 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1675 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1676 { 1677 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1678 PetscErrorCode ierr; 1679 1680 PetscFunctionBegin; 1681 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1682 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #ifdef PETSC_HAVE_PBGL 1687 #include <boost/parallel/mpi/bsp_process_group.hpp> 1688 typedef boost::parallel::mpi::bsp_process_group process_group_type; 1689 1690 #include <boost/graph/distributed/adjacency_list.hpp> 1691 #include <boost/parallel/mpi/bsp_process_group.hpp> 1692 1693 #include <boost/graph/distributed/petsc/interface.hpp> 1694 #include <boost/graph/distributed/ilu_0.hpp> 1695 1696 namespace petsc = boost::distributed::petsc; 1697 using namespace std; 1698 typedef double value_type; 1699 typedef boost::graph::distributed::ilu_elimination_state elimination_state; 1700 typedef boost::adjacency_list<boost::listS, 1701 boost::distributedS<process_group_type, boost::vecS>, 1702 boost::bidirectionalS, 1703 // Vertex properties 1704 boost::no_property, 1705 // Edge properties 1706 boost::property<boost::edge_weight_t, value_type, 1707 boost::property<boost::edge_finished_t, elimination_state> > > graph_type; 1708 1709 typedef boost::graph_traits<graph_type>::vertex_descriptor vertex_type; 1710 typedef boost::graph_traits<graph_type>::edge_descriptor edge_type; 1711 typedef boost::property_map<graph_type, boost::edge_weight_t>::type weight_map_type; 1712 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1715 /* 1716 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1717 */ 1718 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1719 { 1720 PetscTruth row_identity, col_identity; 1721 PetscObjectContainer c; 1722 PetscInt m, n, M, N; 1723 PetscErrorCode ierr; 1724 1725 PetscFunctionBegin; 1726 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1727 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1728 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1729 if (!row_identity || !col_identity) { 1730 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1731 } 1732 1733 process_group_type pg; 1734 graph_type* graph_p = new graph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1735 graph_type& graph = *graph_p; 1736 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1737 1738 //write_graphviz("petsc_matrix_as_graph.dot", graph, default_writer(), matrix_graph_writer<graph_type>(graph)); 1739 boost::property_map<graph_type, boost::edge_finished_t>::type finished = get(boost::edge_finished, graph); 1740 BGL_FORALL_EDGES(e, graph, graph_type) 1741 put(finished, e, boost::graph::distributed::unseen); 1742 1743 ilu_0(graph, get(boost::edge_weight, graph), get(boost::edge_finished, graph)); 1744 1745 /* put together the new matrix */ 1746 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1747 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1748 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1749 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1750 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1751 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1752 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1753 (*fact)->factor = FACTOR_LU; 1754 1755 ierr = PetscObjectContainerCreate(A->comm, &c); 1756 ierr = PetscObjectContainerSetPointer(c, graph_p); 1757 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1763 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1764 { 1765 PetscFunctionBegin; 1766 PetscFunctionReturn(0); 1767 } 1768 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "MatSolve_MPIAIJ" 1771 /* 1772 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1773 */ 1774 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1775 { 1776 graph_type* graph_p; 1777 PetscObjectContainer c; 1778 PetscErrorCode ierr; 1779 1780 PetscFunctionBegin; 1781 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1782 ierr = PetscObjectContainerGetPointer(c, (void **) &graph_p);CHKERRQ(ierr); 1783 ierr = VecCopy(b, x); CHKERRQ(ierr); 1784 PetscFunctionReturn(0); 1785 } 1786 #endif 1787 1788 /* -------------------------------------------------------------------*/ 1789 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1790 MatGetRow_MPIAIJ, 1791 MatRestoreRow_MPIAIJ, 1792 MatMult_MPIAIJ, 1793 /* 4*/ MatMultAdd_MPIAIJ, 1794 MatMultTranspose_MPIAIJ, 1795 MatMultTransposeAdd_MPIAIJ, 1796 #ifdef PETSC_HAVE_PBGL 1797 MatSolve_MPIAIJ, 1798 #else 1799 0, 1800 #endif 1801 0, 1802 0, 1803 /*10*/ 0, 1804 0, 1805 0, 1806 MatRelax_MPIAIJ, 1807 MatTranspose_MPIAIJ, 1808 /*15*/ MatGetInfo_MPIAIJ, 1809 MatEqual_MPIAIJ, 1810 MatGetDiagonal_MPIAIJ, 1811 MatDiagonalScale_MPIAIJ, 1812 MatNorm_MPIAIJ, 1813 /*20*/ MatAssemblyBegin_MPIAIJ, 1814 MatAssemblyEnd_MPIAIJ, 1815 0, 1816 MatSetOption_MPIAIJ, 1817 MatZeroEntries_MPIAIJ, 1818 /*25*/ MatZeroRows_MPIAIJ, 1819 0, 1820 #ifdef PETSC_HAVE_PBGL 1821 MatLUFactorNumeric_MPIAIJ, 1822 #else 1823 0, 1824 #endif 1825 0, 1826 0, 1827 /*30*/ MatSetUpPreallocation_MPIAIJ, 1828 #ifdef PETSC_HAVE_PBGL 1829 MatILUFactorSymbolic_MPIAIJ, 1830 #else 1831 0, 1832 #endif 1833 0, 1834 0, 1835 0, 1836 /*35*/ MatDuplicate_MPIAIJ, 1837 0, 1838 0, 1839 0, 1840 0, 1841 /*40*/ MatAXPY_MPIAIJ, 1842 MatGetSubMatrices_MPIAIJ, 1843 MatIncreaseOverlap_MPIAIJ, 1844 MatGetValues_MPIAIJ, 1845 MatCopy_MPIAIJ, 1846 /*45*/ 0, 1847 MatScale_MPIAIJ, 1848 0, 1849 0, 1850 0, 1851 /*50*/ MatSetBlockSize_MPIAIJ, 1852 0, 1853 0, 1854 0, 1855 0, 1856 /*55*/ MatFDColoringCreate_MPIAIJ, 1857 0, 1858 MatSetUnfactored_MPIAIJ, 1859 MatPermute_MPIAIJ, 1860 0, 1861 /*60*/ MatGetSubMatrix_MPIAIJ, 1862 MatDestroy_MPIAIJ, 1863 MatView_MPIAIJ, 1864 0, 1865 0, 1866 /*65*/ 0, 1867 0, 1868 0, 1869 0, 1870 0, 1871 /*70*/ 0, 1872 0, 1873 MatSetColoring_MPIAIJ, 1874 #if defined(PETSC_HAVE_ADIC) 1875 MatSetValuesAdic_MPIAIJ, 1876 #else 1877 0, 1878 #endif 1879 MatSetValuesAdifor_MPIAIJ, 1880 /*75*/ 0, 1881 0, 1882 0, 1883 0, 1884 0, 1885 /*80*/ 0, 1886 0, 1887 0, 1888 0, 1889 /*84*/ MatLoad_MPIAIJ, 1890 0, 1891 0, 1892 0, 1893 0, 1894 0, 1895 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1896 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1897 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1898 MatPtAP_Basic, 1899 MatPtAPSymbolic_MPIAIJ, 1900 /*95*/ MatPtAPNumeric_MPIAIJ, 1901 0, 1902 0, 1903 0, 1904 0, 1905 /*100*/0, 1906 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1907 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1908 MatConjugate_MPIAIJ, 1909 0, 1910 /*105*/MatSetValuesRow_MPIAIJ, 1911 MatRealPart_MPIAIJ, 1912 MatImaginaryPart_MPIAIJ}; 1913 1914 /* ----------------------------------------------------------------------------------------*/ 1915 1916 EXTERN_C_BEGIN 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1919 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1920 { 1921 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1922 PetscErrorCode ierr; 1923 1924 PetscFunctionBegin; 1925 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1926 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 EXTERN_C_END 1930 1931 EXTERN_C_BEGIN 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1934 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1935 { 1936 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1937 PetscErrorCode ierr; 1938 1939 PetscFunctionBegin; 1940 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1941 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1942 PetscFunctionReturn(0); 1943 } 1944 EXTERN_C_END 1945 1946 #include "petscpc.h" 1947 EXTERN_C_BEGIN 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1950 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1951 { 1952 Mat_MPIAIJ *b; 1953 PetscErrorCode ierr; 1954 PetscInt i; 1955 1956 PetscFunctionBegin; 1957 B->preallocated = PETSC_TRUE; 1958 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1959 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1960 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1961 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1962 1963 B->rmap.bs = B->cmap.bs = 1; 1964 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1965 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1966 if (d_nnz) { 1967 for (i=0; i<B->rmap.n; i++) { 1968 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1969 } 1970 } 1971 if (o_nnz) { 1972 for (i=0; i<B->rmap.n; i++) { 1973 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1974 } 1975 } 1976 b = (Mat_MPIAIJ*)B->data; 1977 1978 /* Explicitly create 2 MATSEQAIJ matrices. */ 1979 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1980 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1981 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1982 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1983 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1984 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1985 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1986 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1987 1988 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1989 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1990 1991 PetscFunctionReturn(0); 1992 } 1993 EXTERN_C_END 1994 1995 #undef __FUNCT__ 1996 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1997 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1998 { 1999 Mat mat; 2000 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2001 PetscErrorCode ierr; 2002 2003 PetscFunctionBegin; 2004 *newmat = 0; 2005 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2006 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2007 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2008 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2009 a = (Mat_MPIAIJ*)mat->data; 2010 2011 mat->factor = matin->factor; 2012 mat->rmap.bs = matin->rmap.bs; 2013 mat->assembled = PETSC_TRUE; 2014 mat->insertmode = NOT_SET_VALUES; 2015 mat->preallocated = PETSC_TRUE; 2016 2017 a->size = oldmat->size; 2018 a->rank = oldmat->rank; 2019 a->donotstash = oldmat->donotstash; 2020 a->roworiented = oldmat->roworiented; 2021 a->rowindices = 0; 2022 a->rowvalues = 0; 2023 a->getrowactive = PETSC_FALSE; 2024 2025 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2026 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2027 2028 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2029 if (oldmat->colmap) { 2030 #if defined (PETSC_USE_CTABLE) 2031 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2032 #else 2033 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2034 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2035 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2036 #endif 2037 } else a->colmap = 0; 2038 if (oldmat->garray) { 2039 PetscInt len; 2040 len = oldmat->B->cmap.n; 2041 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2042 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2043 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2044 } else a->garray = 0; 2045 2046 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2047 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2048 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2049 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2050 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2051 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2052 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2053 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2054 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2055 *newmat = mat; 2056 PetscFunctionReturn(0); 2057 } 2058 2059 #include "petscsys.h" 2060 2061 #undef __FUNCT__ 2062 #define __FUNCT__ "MatLoad_MPIAIJ" 2063 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2064 { 2065 Mat A; 2066 PetscScalar *vals,*svals; 2067 MPI_Comm comm = ((PetscObject)viewer)->comm; 2068 MPI_Status status; 2069 PetscErrorCode ierr; 2070 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2071 PetscInt i,nz,j,rstart,rend,mmax; 2072 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2073 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2074 PetscInt cend,cstart,n,*rowners; 2075 int fd; 2076 2077 PetscFunctionBegin; 2078 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2079 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2080 if (!rank) { 2081 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2082 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2083 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2084 } 2085 2086 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2087 M = header[1]; N = header[2]; 2088 /* determine ownership of all rows */ 2089 m = M/size + ((M % size) > rank); 2090 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2091 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2092 2093 /* First process needs enough room for process with most rows */ 2094 if (!rank) { 2095 mmax = rowners[1]; 2096 for (i=2; i<size; i++) { 2097 mmax = PetscMax(mmax,rowners[i]); 2098 } 2099 } else mmax = m; 2100 2101 rowners[0] = 0; 2102 for (i=2; i<=size; i++) { 2103 mmax = PetscMax(mmax,rowners[i]); 2104 rowners[i] += rowners[i-1]; 2105 } 2106 rstart = rowners[rank]; 2107 rend = rowners[rank+1]; 2108 2109 /* distribute row lengths to all processors */ 2110 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2111 if (!rank) { 2112 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2113 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2114 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2115 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2116 for (j=0; j<m; j++) { 2117 procsnz[0] += ourlens[j]; 2118 } 2119 for (i=1; i<size; i++) { 2120 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2121 /* calculate the number of nonzeros on each processor */ 2122 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2123 procsnz[i] += rowlengths[j]; 2124 } 2125 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2126 } 2127 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2128 } else { 2129 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2130 } 2131 2132 if (!rank) { 2133 /* determine max buffer needed and allocate it */ 2134 maxnz = 0; 2135 for (i=0; i<size; i++) { 2136 maxnz = PetscMax(maxnz,procsnz[i]); 2137 } 2138 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2139 2140 /* read in my part of the matrix column indices */ 2141 nz = procsnz[0]; 2142 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2143 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2144 2145 /* read in every one elses and ship off */ 2146 for (i=1; i<size; i++) { 2147 nz = procsnz[i]; 2148 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2149 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2150 } 2151 ierr = PetscFree(cols);CHKERRQ(ierr); 2152 } else { 2153 /* determine buffer space needed for message */ 2154 nz = 0; 2155 for (i=0; i<m; i++) { 2156 nz += ourlens[i]; 2157 } 2158 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2159 2160 /* receive message of column indices*/ 2161 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2162 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2163 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2164 } 2165 2166 /* determine column ownership if matrix is not square */ 2167 if (N != M) { 2168 n = N/size + ((N % size) > rank); 2169 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2170 cstart = cend - n; 2171 } else { 2172 cstart = rstart; 2173 cend = rend; 2174 n = cend - cstart; 2175 } 2176 2177 /* loop over local rows, determining number of off diagonal entries */ 2178 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2179 jj = 0; 2180 for (i=0; i<m; i++) { 2181 for (j=0; j<ourlens[i]; j++) { 2182 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2183 jj++; 2184 } 2185 } 2186 2187 /* create our matrix */ 2188 for (i=0; i<m; i++) { 2189 ourlens[i] -= offlens[i]; 2190 } 2191 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2192 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2193 ierr = MatSetType(A,type);CHKERRQ(ierr); 2194 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2195 2196 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2197 for (i=0; i<m; i++) { 2198 ourlens[i] += offlens[i]; 2199 } 2200 2201 if (!rank) { 2202 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2203 2204 /* read in my part of the matrix numerical values */ 2205 nz = procsnz[0]; 2206 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2207 2208 /* insert into matrix */ 2209 jj = rstart; 2210 smycols = mycols; 2211 svals = vals; 2212 for (i=0; i<m; i++) { 2213 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2214 smycols += ourlens[i]; 2215 svals += ourlens[i]; 2216 jj++; 2217 } 2218 2219 /* read in other processors and ship out */ 2220 for (i=1; i<size; i++) { 2221 nz = procsnz[i]; 2222 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2223 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2224 } 2225 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2226 } else { 2227 /* receive numeric values */ 2228 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2229 2230 /* receive message of values*/ 2231 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2232 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2233 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2234 2235 /* insert into matrix */ 2236 jj = rstart; 2237 smycols = mycols; 2238 svals = vals; 2239 for (i=0; i<m; i++) { 2240 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2241 smycols += ourlens[i]; 2242 svals += ourlens[i]; 2243 jj++; 2244 } 2245 } 2246 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2247 ierr = PetscFree(vals);CHKERRQ(ierr); 2248 ierr = PetscFree(mycols);CHKERRQ(ierr); 2249 ierr = PetscFree(rowners);CHKERRQ(ierr); 2250 2251 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2252 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2253 *newmat = A; 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2259 /* 2260 Not great since it makes two copies of the submatrix, first an SeqAIJ 2261 in local and then by concatenating the local matrices the end result. 2262 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2263 */ 2264 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2265 { 2266 PetscErrorCode ierr; 2267 PetscMPIInt rank,size; 2268 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2269 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2270 Mat *local,M,Mreuse; 2271 PetscScalar *vwork,*aa; 2272 MPI_Comm comm = mat->comm; 2273 Mat_SeqAIJ *aij; 2274 2275 2276 PetscFunctionBegin; 2277 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2278 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2279 2280 if (call == MAT_REUSE_MATRIX) { 2281 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2282 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2283 local = &Mreuse; 2284 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2285 } else { 2286 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2287 Mreuse = *local; 2288 ierr = PetscFree(local);CHKERRQ(ierr); 2289 } 2290 2291 /* 2292 m - number of local rows 2293 n - number of columns (same on all processors) 2294 rstart - first row in new global matrix generated 2295 */ 2296 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2297 if (call == MAT_INITIAL_MATRIX) { 2298 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2299 ii = aij->i; 2300 jj = aij->j; 2301 2302 /* 2303 Determine the number of non-zeros in the diagonal and off-diagonal 2304 portions of the matrix in order to do correct preallocation 2305 */ 2306 2307 /* first get start and end of "diagonal" columns */ 2308 if (csize == PETSC_DECIDE) { 2309 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2310 if (mglobal == n) { /* square matrix */ 2311 nlocal = m; 2312 } else { 2313 nlocal = n/size + ((n % size) > rank); 2314 } 2315 } else { 2316 nlocal = csize; 2317 } 2318 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2319 rstart = rend - nlocal; 2320 if (rank == size - 1 && rend != n) { 2321 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2322 } 2323 2324 /* next, compute all the lengths */ 2325 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2326 olens = dlens + m; 2327 for (i=0; i<m; i++) { 2328 jend = ii[i+1] - ii[i]; 2329 olen = 0; 2330 dlen = 0; 2331 for (j=0; j<jend; j++) { 2332 if (*jj < rstart || *jj >= rend) olen++; 2333 else dlen++; 2334 jj++; 2335 } 2336 olens[i] = olen; 2337 dlens[i] = dlen; 2338 } 2339 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2340 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2341 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2342 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2343 ierr = PetscFree(dlens);CHKERRQ(ierr); 2344 } else { 2345 PetscInt ml,nl; 2346 2347 M = *newmat; 2348 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2349 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2350 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2351 /* 2352 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2353 rather than the slower MatSetValues(). 2354 */ 2355 M->was_assembled = PETSC_TRUE; 2356 M->assembled = PETSC_FALSE; 2357 } 2358 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2359 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2360 ii = aij->i; 2361 jj = aij->j; 2362 aa = aij->a; 2363 for (i=0; i<m; i++) { 2364 row = rstart + i; 2365 nz = ii[i+1] - ii[i]; 2366 cwork = jj; jj += nz; 2367 vwork = aa; aa += nz; 2368 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2369 } 2370 2371 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2372 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2373 *newmat = M; 2374 2375 /* save submatrix used in processor for next request */ 2376 if (call == MAT_INITIAL_MATRIX) { 2377 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2378 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2379 } 2380 2381 PetscFunctionReturn(0); 2382 } 2383 2384 EXTERN_C_BEGIN 2385 #undef __FUNCT__ 2386 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2387 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2388 { 2389 PetscInt m,cstart, cend,j,nnz,i,d; 2390 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2391 const PetscInt *JJ; 2392 PetscScalar *values; 2393 PetscErrorCode ierr; 2394 2395 PetscFunctionBegin; 2396 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2397 2398 B->rmap.bs = B->cmap.bs = 1; 2399 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2400 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2401 m = B->rmap.n; 2402 cstart = B->cmap.rstart; 2403 cend = B->cmap.rend; 2404 rstart = B->rmap.rstart; 2405 2406 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2407 o_nnz = d_nnz + m; 2408 2409 for (i=0; i<m; i++) { 2410 nnz = Ii[i+1]- Ii[i]; 2411 JJ = J + Ii[i]; 2412 nnz_max = PetscMax(nnz_max,nnz); 2413 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2414 for (j=0; j<nnz; j++) { 2415 if (*JJ >= cstart) break; 2416 JJ++; 2417 } 2418 d = 0; 2419 for (; j<nnz; j++) { 2420 if (*JJ++ >= cend) break; 2421 d++; 2422 } 2423 d_nnz[i] = d; 2424 o_nnz[i] = nnz - d; 2425 } 2426 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2427 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2428 2429 if (v) values = (PetscScalar*)v; 2430 else { 2431 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2432 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2433 } 2434 2435 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2436 for (i=0; i<m; i++) { 2437 ii = i + rstart; 2438 nnz = Ii[i+1]- Ii[i]; 2439 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2440 } 2441 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2442 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2443 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2444 2445 if (!v) { 2446 ierr = PetscFree(values);CHKERRQ(ierr); 2447 } 2448 PetscFunctionReturn(0); 2449 } 2450 EXTERN_C_END 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2454 /*@ 2455 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2456 (the default parallel PETSc format). 2457 2458 Collective on MPI_Comm 2459 2460 Input Parameters: 2461 + B - the matrix 2462 . i - the indices into j for the start of each local row (starts with zero) 2463 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2464 - v - optional values in the matrix 2465 2466 Level: developer 2467 2468 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2469 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2470 2471 .keywords: matrix, aij, compressed row, sparse, parallel 2472 2473 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2474 MatCreateSeqAIJWithArrays() 2475 @*/ 2476 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2477 { 2478 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2479 2480 PetscFunctionBegin; 2481 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2482 if (f) { 2483 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2484 } 2485 PetscFunctionReturn(0); 2486 } 2487 2488 #undef __FUNCT__ 2489 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2490 /*@C 2491 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2492 (the default parallel PETSc format). For good matrix assembly performance 2493 the user should preallocate the matrix storage by setting the parameters 2494 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2495 performance can be increased by more than a factor of 50. 2496 2497 Collective on MPI_Comm 2498 2499 Input Parameters: 2500 + A - the matrix 2501 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2502 (same value is used for all local rows) 2503 . d_nnz - array containing the number of nonzeros in the various rows of the 2504 DIAGONAL portion of the local submatrix (possibly different for each row) 2505 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2506 The size of this array is equal to the number of local rows, i.e 'm'. 2507 You must leave room for the diagonal entry even if it is zero. 2508 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2509 submatrix (same value is used for all local rows). 2510 - o_nnz - array containing the number of nonzeros in the various rows of the 2511 OFF-DIAGONAL portion of the local submatrix (possibly different for 2512 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2513 structure. The size of this array is equal to the number 2514 of local rows, i.e 'm'. 2515 2516 If the *_nnz parameter is given then the *_nz parameter is ignored 2517 2518 The AIJ format (also called the Yale sparse matrix format or 2519 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2520 storage. The stored row and column indices begin with zero. See the users manual for details. 2521 2522 The parallel matrix is partitioned such that the first m0 rows belong to 2523 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2524 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2525 2526 The DIAGONAL portion of the local submatrix of a processor can be defined 2527 as the submatrix which is obtained by extraction the part corresponding 2528 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2529 first row that belongs to the processor, and r2 is the last row belonging 2530 to the this processor. This is a square mxm matrix. The remaining portion 2531 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2532 2533 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2534 2535 Example usage: 2536 2537 Consider the following 8x8 matrix with 34 non-zero values, that is 2538 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2539 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2540 as follows: 2541 2542 .vb 2543 1 2 0 | 0 3 0 | 0 4 2544 Proc0 0 5 6 | 7 0 0 | 8 0 2545 9 0 10 | 11 0 0 | 12 0 2546 ------------------------------------- 2547 13 0 14 | 15 16 17 | 0 0 2548 Proc1 0 18 0 | 19 20 21 | 0 0 2549 0 0 0 | 22 23 0 | 24 0 2550 ------------------------------------- 2551 Proc2 25 26 27 | 0 0 28 | 29 0 2552 30 0 0 | 31 32 33 | 0 34 2553 .ve 2554 2555 This can be represented as a collection of submatrices as: 2556 2557 .vb 2558 A B C 2559 D E F 2560 G H I 2561 .ve 2562 2563 Where the submatrices A,B,C are owned by proc0, D,E,F are 2564 owned by proc1, G,H,I are owned by proc2. 2565 2566 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2567 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2568 The 'M','N' parameters are 8,8, and have the same values on all procs. 2569 2570 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2571 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2572 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2573 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2574 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2575 matrix, ans [DF] as another SeqAIJ matrix. 2576 2577 When d_nz, o_nz parameters are specified, d_nz storage elements are 2578 allocated for every row of the local diagonal submatrix, and o_nz 2579 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2580 One way to choose d_nz and o_nz is to use the max nonzerors per local 2581 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2582 In this case, the values of d_nz,o_nz are: 2583 .vb 2584 proc0 : dnz = 2, o_nz = 2 2585 proc1 : dnz = 3, o_nz = 2 2586 proc2 : dnz = 1, o_nz = 4 2587 .ve 2588 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2589 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2590 for proc3. i.e we are using 12+15+10=37 storage locations to store 2591 34 values. 2592 2593 When d_nnz, o_nnz parameters are specified, the storage is specified 2594 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2595 In the above case the values for d_nnz,o_nnz are: 2596 .vb 2597 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2598 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2599 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2600 .ve 2601 Here the space allocated is sum of all the above values i.e 34, and 2602 hence pre-allocation is perfect. 2603 2604 Level: intermediate 2605 2606 .keywords: matrix, aij, compressed row, sparse, parallel 2607 2608 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2609 MPIAIJ 2610 @*/ 2611 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2612 { 2613 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2614 2615 PetscFunctionBegin; 2616 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2617 if (f) { 2618 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2619 } 2620 PetscFunctionReturn(0); 2621 } 2622 2623 #undef __FUNCT__ 2624 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 2625 /*@C 2626 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 2627 CSR format the local rows. 2628 2629 Collective on MPI_Comm 2630 2631 Input Parameters: 2632 + comm - MPI communicator 2633 . m - number of local rows (Cannot be PETSC_DECIDE) 2634 . n - This value should be the same as the local size used in creating the 2635 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2636 calculated if N is given) For square matrices n is almost always m. 2637 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2638 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2639 . i - row indices 2640 . j - column indices 2641 - a - matrix values 2642 2643 Output Parameter: 2644 . mat - the matrix 2645 Level: intermediate 2646 2647 Notes: 2648 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2649 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2650 called this routine. 2651 2652 The i and j indices are 0 based 2653 2654 .keywords: matrix, aij, compressed row, sparse, parallel 2655 2656 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2657 MPIAIJ, MatCreateMPIAIJ() 2658 @*/ 2659 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2660 { 2661 PetscErrorCode ierr; 2662 2663 PetscFunctionBegin; 2664 if (i[0]) { 2665 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2666 } 2667 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2668 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2669 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 2670 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 2671 PetscFunctionReturn(0); 2672 } 2673 2674 #undef __FUNCT__ 2675 #define __FUNCT__ "MatCreateMPIAIJ" 2676 /*@C 2677 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2678 (the default parallel PETSc format). For good matrix assembly performance 2679 the user should preallocate the matrix storage by setting the parameters 2680 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2681 performance can be increased by more than a factor of 50. 2682 2683 Collective on MPI_Comm 2684 2685 Input Parameters: 2686 + comm - MPI communicator 2687 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2688 This value should be the same as the local size used in creating the 2689 y vector for the matrix-vector product y = Ax. 2690 . n - This value should be the same as the local size used in creating the 2691 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2692 calculated if N is given) For square matrices n is almost always m. 2693 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2694 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2695 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2696 (same value is used for all local rows) 2697 . d_nnz - array containing the number of nonzeros in the various rows of the 2698 DIAGONAL portion of the local submatrix (possibly different for each row) 2699 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2700 The size of this array is equal to the number of local rows, i.e 'm'. 2701 You must leave room for the diagonal entry even if it is zero. 2702 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2703 submatrix (same value is used for all local rows). 2704 - o_nnz - array containing the number of nonzeros in the various rows of the 2705 OFF-DIAGONAL portion of the local submatrix (possibly different for 2706 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2707 structure. The size of this array is equal to the number 2708 of local rows, i.e 'm'. 2709 2710 Output Parameter: 2711 . A - the matrix 2712 2713 Notes: 2714 If the *_nnz parameter is given then the *_nz parameter is ignored 2715 2716 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2717 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2718 storage requirements for this matrix. 2719 2720 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2721 processor than it must be used on all processors that share the object for 2722 that argument. 2723 2724 The user MUST specify either the local or global matrix dimensions 2725 (possibly both). 2726 2727 The parallel matrix is partitioned such that the first m0 rows belong to 2728 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2729 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2730 2731 The DIAGONAL portion of the local submatrix of a processor can be defined 2732 as the submatrix which is obtained by extraction the part corresponding 2733 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2734 first row that belongs to the processor, and r2 is the last row belonging 2735 to the this processor. This is a square mxm matrix. The remaining portion 2736 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2737 2738 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2739 2740 When calling this routine with a single process communicator, a matrix of 2741 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2742 type of communicator, use the construction mechanism: 2743 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2744 2745 By default, this format uses inodes (identical nodes) when possible. 2746 We search for consecutive rows with the same nonzero structure, thereby 2747 reusing matrix information to achieve increased efficiency. 2748 2749 Options Database Keys: 2750 + -mat_no_inode - Do not use inodes 2751 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2752 - -mat_aij_oneindex - Internally use indexing starting at 1 2753 rather than 0. Note that when calling MatSetValues(), 2754 the user still MUST index entries starting at 0! 2755 2756 2757 Example usage: 2758 2759 Consider the following 8x8 matrix with 34 non-zero values, that is 2760 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2761 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2762 as follows: 2763 2764 .vb 2765 1 2 0 | 0 3 0 | 0 4 2766 Proc0 0 5 6 | 7 0 0 | 8 0 2767 9 0 10 | 11 0 0 | 12 0 2768 ------------------------------------- 2769 13 0 14 | 15 16 17 | 0 0 2770 Proc1 0 18 0 | 19 20 21 | 0 0 2771 0 0 0 | 22 23 0 | 24 0 2772 ------------------------------------- 2773 Proc2 25 26 27 | 0 0 28 | 29 0 2774 30 0 0 | 31 32 33 | 0 34 2775 .ve 2776 2777 This can be represented as a collection of submatrices as: 2778 2779 .vb 2780 A B C 2781 D E F 2782 G H I 2783 .ve 2784 2785 Where the submatrices A,B,C are owned by proc0, D,E,F are 2786 owned by proc1, G,H,I are owned by proc2. 2787 2788 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2789 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2790 The 'M','N' parameters are 8,8, and have the same values on all procs. 2791 2792 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2793 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2794 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2795 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2796 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2797 matrix, ans [DF] as another SeqAIJ matrix. 2798 2799 When d_nz, o_nz parameters are specified, d_nz storage elements are 2800 allocated for every row of the local diagonal submatrix, and o_nz 2801 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2802 One way to choose d_nz and o_nz is to use the max nonzerors per local 2803 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2804 In this case, the values of d_nz,o_nz are: 2805 .vb 2806 proc0 : dnz = 2, o_nz = 2 2807 proc1 : dnz = 3, o_nz = 2 2808 proc2 : dnz = 1, o_nz = 4 2809 .ve 2810 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2811 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2812 for proc3. i.e we are using 12+15+10=37 storage locations to store 2813 34 values. 2814 2815 When d_nnz, o_nnz parameters are specified, the storage is specified 2816 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2817 In the above case the values for d_nnz,o_nnz are: 2818 .vb 2819 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2820 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2821 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2822 .ve 2823 Here the space allocated is sum of all the above values i.e 34, and 2824 hence pre-allocation is perfect. 2825 2826 Level: intermediate 2827 2828 .keywords: matrix, aij, compressed row, sparse, parallel 2829 2830 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2831 MPIAIJ, MatCreateMPIAIJWithArrays() 2832 @*/ 2833 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2834 { 2835 PetscErrorCode ierr; 2836 PetscMPIInt size; 2837 2838 PetscFunctionBegin; 2839 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2840 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2841 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2842 if (size > 1) { 2843 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2844 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2845 } else { 2846 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2847 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2848 } 2849 PetscFunctionReturn(0); 2850 } 2851 2852 #undef __FUNCT__ 2853 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2854 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2855 { 2856 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2857 2858 PetscFunctionBegin; 2859 *Ad = a->A; 2860 *Ao = a->B; 2861 *colmap = a->garray; 2862 PetscFunctionReturn(0); 2863 } 2864 2865 #undef __FUNCT__ 2866 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2867 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2868 { 2869 PetscErrorCode ierr; 2870 PetscInt i; 2871 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2872 2873 PetscFunctionBegin; 2874 if (coloring->ctype == IS_COLORING_LOCAL) { 2875 ISColoringValue *allcolors,*colors; 2876 ISColoring ocoloring; 2877 2878 /* set coloring for diagonal portion */ 2879 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2880 2881 /* set coloring for off-diagonal portion */ 2882 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2883 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2884 for (i=0; i<a->B->cmap.n; i++) { 2885 colors[i] = allcolors[a->garray[i]]; 2886 } 2887 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2888 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2889 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2890 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2891 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2892 ISColoringValue *colors; 2893 PetscInt *larray; 2894 ISColoring ocoloring; 2895 2896 /* set coloring for diagonal portion */ 2897 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2898 for (i=0; i<a->A->cmap.n; i++) { 2899 larray[i] = i + A->cmap.rstart; 2900 } 2901 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2902 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2903 for (i=0; i<a->A->cmap.n; i++) { 2904 colors[i] = coloring->colors[larray[i]]; 2905 } 2906 ierr = PetscFree(larray);CHKERRQ(ierr); 2907 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2908 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2909 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2910 2911 /* set coloring for off-diagonal portion */ 2912 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2913 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2914 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2915 for (i=0; i<a->B->cmap.n; i++) { 2916 colors[i] = coloring->colors[larray[i]]; 2917 } 2918 ierr = PetscFree(larray);CHKERRQ(ierr); 2919 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2920 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2921 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2922 } else { 2923 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2924 } 2925 2926 PetscFunctionReturn(0); 2927 } 2928 2929 #if defined(PETSC_HAVE_ADIC) 2930 #undef __FUNCT__ 2931 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2932 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2933 { 2934 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2935 PetscErrorCode ierr; 2936 2937 PetscFunctionBegin; 2938 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2939 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2940 PetscFunctionReturn(0); 2941 } 2942 #endif 2943 2944 #undef __FUNCT__ 2945 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2946 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2947 { 2948 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2949 PetscErrorCode ierr; 2950 2951 PetscFunctionBegin; 2952 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2953 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2954 PetscFunctionReturn(0); 2955 } 2956 2957 #undef __FUNCT__ 2958 #define __FUNCT__ "MatMerge" 2959 /*@C 2960 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2961 matrices from each processor 2962 2963 Collective on MPI_Comm 2964 2965 Input Parameters: 2966 + comm - the communicators the parallel matrix will live on 2967 . inmat - the input sequential matrices 2968 . n - number of local columns (or PETSC_DECIDE) 2969 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2970 2971 Output Parameter: 2972 . outmat - the parallel matrix generated 2973 2974 Level: advanced 2975 2976 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2977 2978 @*/ 2979 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2980 { 2981 PetscErrorCode ierr; 2982 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 2983 PetscInt *indx; 2984 PetscScalar *values; 2985 2986 PetscFunctionBegin; 2987 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2988 if (scall == MAT_INITIAL_MATRIX){ 2989 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2990 if (n == PETSC_DECIDE){ 2991 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 2992 } 2993 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2994 rstart -= m; 2995 2996 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2997 for (i=0;i<m;i++) { 2998 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2999 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3000 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3001 } 3002 /* This routine will ONLY return MPIAIJ type matrix */ 3003 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3004 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3005 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3006 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3007 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3008 3009 } else if (scall == MAT_REUSE_MATRIX){ 3010 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3011 } else { 3012 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3013 } 3014 3015 for (i=0;i<m;i++) { 3016 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3017 Ii = i + rstart; 3018 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3019 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3020 } 3021 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3022 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3023 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3024 3025 PetscFunctionReturn(0); 3026 } 3027 3028 #undef __FUNCT__ 3029 #define __FUNCT__ "MatFileSplit" 3030 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3031 { 3032 PetscErrorCode ierr; 3033 PetscMPIInt rank; 3034 PetscInt m,N,i,rstart,nnz; 3035 size_t len; 3036 const PetscInt *indx; 3037 PetscViewer out; 3038 char *name; 3039 Mat B; 3040 const PetscScalar *values; 3041 3042 PetscFunctionBegin; 3043 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3044 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3045 /* Should this be the type of the diagonal block of A? */ 3046 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3047 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3048 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3049 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3050 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3051 for (i=0;i<m;i++) { 3052 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3053 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3054 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3055 } 3056 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3057 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3058 3059 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3060 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3061 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3062 sprintf(name,"%s.%d",outfile,rank); 3063 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3064 ierr = PetscFree(name); 3065 ierr = MatView(B,out);CHKERRQ(ierr); 3066 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3067 ierr = MatDestroy(B);CHKERRQ(ierr); 3068 PetscFunctionReturn(0); 3069 } 3070 3071 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3072 #undef __FUNCT__ 3073 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3074 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3075 { 3076 PetscErrorCode ierr; 3077 Mat_Merge_SeqsToMPI *merge; 3078 PetscObjectContainer container; 3079 3080 PetscFunctionBegin; 3081 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3082 if (container) { 3083 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3084 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3085 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3086 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3087 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3088 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3089 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3090 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3091 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3092 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3093 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3094 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3095 3096 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 3097 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3098 } 3099 ierr = PetscFree(merge);CHKERRQ(ierr); 3100 3101 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3102 PetscFunctionReturn(0); 3103 } 3104 3105 #include "src/mat/utils/freespace.h" 3106 #include "petscbt.h" 3107 static PetscEvent logkey_seqstompinum = 0; 3108 #undef __FUNCT__ 3109 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3110 /*@C 3111 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3112 matrices from each processor 3113 3114 Collective on MPI_Comm 3115 3116 Input Parameters: 3117 + comm - the communicators the parallel matrix will live on 3118 . seqmat - the input sequential matrices 3119 . m - number of local rows (or PETSC_DECIDE) 3120 . n - number of local columns (or PETSC_DECIDE) 3121 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3122 3123 Output Parameter: 3124 . mpimat - the parallel matrix generated 3125 3126 Level: advanced 3127 3128 Notes: 3129 The dimensions of the sequential matrix in each processor MUST be the same. 3130 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3131 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3132 @*/ 3133 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3134 { 3135 PetscErrorCode ierr; 3136 MPI_Comm comm=mpimat->comm; 3137 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3138 PetscMPIInt size,rank,taga,*len_s; 3139 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3140 PetscInt proc,m; 3141 PetscInt **buf_ri,**buf_rj; 3142 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3143 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3144 MPI_Request *s_waits,*r_waits; 3145 MPI_Status *status; 3146 MatScalar *aa=a->a,**abuf_r,*ba_i; 3147 Mat_Merge_SeqsToMPI *merge; 3148 PetscObjectContainer container; 3149 3150 PetscFunctionBegin; 3151 if (!logkey_seqstompinum) { 3152 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3153 } 3154 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3155 3156 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3157 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3158 3159 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3160 if (container) { 3161 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3162 } 3163 bi = merge->bi; 3164 bj = merge->bj; 3165 buf_ri = merge->buf_ri; 3166 buf_rj = merge->buf_rj; 3167 3168 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3169 owners = merge->rowmap.range; 3170 len_s = merge->len_s; 3171 3172 /* send and recv matrix values */ 3173 /*-----------------------------*/ 3174 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3175 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3176 3177 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3178 for (proc=0,k=0; proc<size; proc++){ 3179 if (!len_s[proc]) continue; 3180 i = owners[proc]; 3181 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3182 k++; 3183 } 3184 3185 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3186 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3187 ierr = PetscFree(status);CHKERRQ(ierr); 3188 3189 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3190 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3191 3192 /* insert mat values of mpimat */ 3193 /*----------------------------*/ 3194 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3195 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3196 nextrow = buf_ri_k + merge->nrecv; 3197 nextai = nextrow + merge->nrecv; 3198 3199 for (k=0; k<merge->nrecv; k++){ 3200 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3201 nrows = *(buf_ri_k[k]); 3202 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3203 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3204 } 3205 3206 /* set values of ba */ 3207 m = merge->rowmap.n; 3208 for (i=0; i<m; i++) { 3209 arow = owners[rank] + i; 3210 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3211 bnzi = bi[i+1] - bi[i]; 3212 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3213 3214 /* add local non-zero vals of this proc's seqmat into ba */ 3215 anzi = ai[arow+1] - ai[arow]; 3216 aj = a->j + ai[arow]; 3217 aa = a->a + ai[arow]; 3218 nextaj = 0; 3219 for (j=0; nextaj<anzi; j++){ 3220 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3221 ba_i[j] += aa[nextaj++]; 3222 } 3223 } 3224 3225 /* add received vals into ba */ 3226 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3227 /* i-th row */ 3228 if (i == *nextrow[k]) { 3229 anzi = *(nextai[k]+1) - *nextai[k]; 3230 aj = buf_rj[k] + *(nextai[k]); 3231 aa = abuf_r[k] + *(nextai[k]); 3232 nextaj = 0; 3233 for (j=0; nextaj<anzi; j++){ 3234 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3235 ba_i[j] += aa[nextaj++]; 3236 } 3237 } 3238 nextrow[k]++; nextai[k]++; 3239 } 3240 } 3241 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3242 } 3243 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3244 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3245 3246 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3247 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3248 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3249 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3250 PetscFunctionReturn(0); 3251 } 3252 3253 static PetscEvent logkey_seqstompisym = 0; 3254 #undef __FUNCT__ 3255 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3256 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3257 { 3258 PetscErrorCode ierr; 3259 Mat B_mpi; 3260 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3261 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3262 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3263 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3264 PetscInt len,proc,*dnz,*onz; 3265 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3266 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3267 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3268 MPI_Status *status; 3269 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3270 PetscBT lnkbt; 3271 Mat_Merge_SeqsToMPI *merge; 3272 PetscObjectContainer container; 3273 3274 PetscFunctionBegin; 3275 if (!logkey_seqstompisym) { 3276 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3277 } 3278 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3279 3280 /* make sure it is a PETSc comm */ 3281 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3282 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3283 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3284 3285 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3286 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3287 3288 /* determine row ownership */ 3289 /*---------------------------------------------------------*/ 3290 merge->rowmap.n = m; 3291 merge->rowmap.N = M; 3292 merge->rowmap.bs = 1; 3293 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3294 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3295 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3296 3297 m = merge->rowmap.n; 3298 M = merge->rowmap.N; 3299 owners = merge->rowmap.range; 3300 3301 /* determine the number of messages to send, their lengths */ 3302 /*---------------------------------------------------------*/ 3303 len_s = merge->len_s; 3304 3305 len = 0; /* length of buf_si[] */ 3306 merge->nsend = 0; 3307 for (proc=0; proc<size; proc++){ 3308 len_si[proc] = 0; 3309 if (proc == rank){ 3310 len_s[proc] = 0; 3311 } else { 3312 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3313 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3314 } 3315 if (len_s[proc]) { 3316 merge->nsend++; 3317 nrows = 0; 3318 for (i=owners[proc]; i<owners[proc+1]; i++){ 3319 if (ai[i+1] > ai[i]) nrows++; 3320 } 3321 len_si[proc] = 2*(nrows+1); 3322 len += len_si[proc]; 3323 } 3324 } 3325 3326 /* determine the number and length of messages to receive for ij-structure */ 3327 /*-------------------------------------------------------------------------*/ 3328 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3329 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3330 3331 /* post the Irecv of j-structure */ 3332 /*-------------------------------*/ 3333 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3334 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3335 3336 /* post the Isend of j-structure */ 3337 /*--------------------------------*/ 3338 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3339 sj_waits = si_waits + merge->nsend; 3340 3341 for (proc=0, k=0; proc<size; proc++){ 3342 if (!len_s[proc]) continue; 3343 i = owners[proc]; 3344 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3345 k++; 3346 } 3347 3348 /* receives and sends of j-structure are complete */ 3349 /*------------------------------------------------*/ 3350 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3351 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3352 3353 /* send and recv i-structure */ 3354 /*---------------------------*/ 3355 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3356 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3357 3358 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3359 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3360 for (proc=0,k=0; proc<size; proc++){ 3361 if (!len_s[proc]) continue; 3362 /* form outgoing message for i-structure: 3363 buf_si[0]: nrows to be sent 3364 [1:nrows]: row index (global) 3365 [nrows+1:2*nrows+1]: i-structure index 3366 */ 3367 /*-------------------------------------------*/ 3368 nrows = len_si[proc]/2 - 1; 3369 buf_si_i = buf_si + nrows+1; 3370 buf_si[0] = nrows; 3371 buf_si_i[0] = 0; 3372 nrows = 0; 3373 for (i=owners[proc]; i<owners[proc+1]; i++){ 3374 anzi = ai[i+1] - ai[i]; 3375 if (anzi) { 3376 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3377 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3378 nrows++; 3379 } 3380 } 3381 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3382 k++; 3383 buf_si += len_si[proc]; 3384 } 3385 3386 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3387 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3388 3389 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3390 for (i=0; i<merge->nrecv; i++){ 3391 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3392 } 3393 3394 ierr = PetscFree(len_si);CHKERRQ(ierr); 3395 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3396 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3397 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3398 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3399 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3400 ierr = PetscFree(status);CHKERRQ(ierr); 3401 3402 /* compute a local seq matrix in each processor */ 3403 /*----------------------------------------------*/ 3404 /* allocate bi array and free space for accumulating nonzero column info */ 3405 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3406 bi[0] = 0; 3407 3408 /* create and initialize a linked list */ 3409 nlnk = N+1; 3410 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3411 3412 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3413 len = 0; 3414 len = ai[owners[rank+1]] - ai[owners[rank]]; 3415 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3416 current_space = free_space; 3417 3418 /* determine symbolic info for each local row */ 3419 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3420 nextrow = buf_ri_k + merge->nrecv; 3421 nextai = nextrow + merge->nrecv; 3422 for (k=0; k<merge->nrecv; k++){ 3423 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3424 nrows = *buf_ri_k[k]; 3425 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3426 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3427 } 3428 3429 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3430 len = 0; 3431 for (i=0;i<m;i++) { 3432 bnzi = 0; 3433 /* add local non-zero cols of this proc's seqmat into lnk */ 3434 arow = owners[rank] + i; 3435 anzi = ai[arow+1] - ai[arow]; 3436 aj = a->j + ai[arow]; 3437 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3438 bnzi += nlnk; 3439 /* add received col data into lnk */ 3440 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3441 if (i == *nextrow[k]) { /* i-th row */ 3442 anzi = *(nextai[k]+1) - *nextai[k]; 3443 aj = buf_rj[k] + *nextai[k]; 3444 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3445 bnzi += nlnk; 3446 nextrow[k]++; nextai[k]++; 3447 } 3448 } 3449 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3450 3451 /* if free space is not available, make more free space */ 3452 if (current_space->local_remaining<bnzi) { 3453 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3454 nspacedouble++; 3455 } 3456 /* copy data into free space, then initialize lnk */ 3457 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3458 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3459 3460 current_space->array += bnzi; 3461 current_space->local_used += bnzi; 3462 current_space->local_remaining -= bnzi; 3463 3464 bi[i+1] = bi[i] + bnzi; 3465 } 3466 3467 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3468 3469 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3470 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3471 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3472 3473 /* create symbolic parallel matrix B_mpi */ 3474 /*---------------------------------------*/ 3475 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3476 if (n==PETSC_DECIDE) { 3477 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3478 } else { 3479 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3480 } 3481 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3482 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3483 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3484 3485 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3486 B_mpi->assembled = PETSC_FALSE; 3487 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3488 merge->bi = bi; 3489 merge->bj = bj; 3490 merge->buf_ri = buf_ri; 3491 merge->buf_rj = buf_rj; 3492 merge->coi = PETSC_NULL; 3493 merge->coj = PETSC_NULL; 3494 merge->owners_co = PETSC_NULL; 3495 3496 /* attach the supporting struct to B_mpi for reuse */ 3497 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3498 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3499 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3500 *mpimat = B_mpi; 3501 3502 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3503 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3504 PetscFunctionReturn(0); 3505 } 3506 3507 static PetscEvent logkey_seqstompi = 0; 3508 #undef __FUNCT__ 3509 #define __FUNCT__ "MatMerge_SeqsToMPI" 3510 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3511 { 3512 PetscErrorCode ierr; 3513 3514 PetscFunctionBegin; 3515 if (!logkey_seqstompi) { 3516 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3517 } 3518 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3519 if (scall == MAT_INITIAL_MATRIX){ 3520 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3521 } 3522 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3523 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3524 PetscFunctionReturn(0); 3525 } 3526 static PetscEvent logkey_getlocalmat = 0; 3527 #undef __FUNCT__ 3528 #define __FUNCT__ "MatGetLocalMat" 3529 /*@C 3530 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3531 3532 Not Collective 3533 3534 Input Parameters: 3535 + A - the matrix 3536 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3537 3538 Output Parameter: 3539 . A_loc - the local sequential matrix generated 3540 3541 Level: developer 3542 3543 @*/ 3544 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3545 { 3546 PetscErrorCode ierr; 3547 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3548 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3549 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3550 PetscScalar *aa=a->a,*ba=b->a,*ca; 3551 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3552 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3553 3554 PetscFunctionBegin; 3555 if (!logkey_getlocalmat) { 3556 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3557 } 3558 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3559 if (scall == MAT_INITIAL_MATRIX){ 3560 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3561 ci[0] = 0; 3562 for (i=0; i<am; i++){ 3563 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3564 } 3565 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3566 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3567 k = 0; 3568 for (i=0; i<am; i++) { 3569 ncols_o = bi[i+1] - bi[i]; 3570 ncols_d = ai[i+1] - ai[i]; 3571 /* off-diagonal portion of A */ 3572 for (jo=0; jo<ncols_o; jo++) { 3573 col = cmap[*bj]; 3574 if (col >= cstart) break; 3575 cj[k] = col; bj++; 3576 ca[k++] = *ba++; 3577 } 3578 /* diagonal portion of A */ 3579 for (j=0; j<ncols_d; j++) { 3580 cj[k] = cstart + *aj++; 3581 ca[k++] = *aa++; 3582 } 3583 /* off-diagonal portion of A */ 3584 for (j=jo; j<ncols_o; j++) { 3585 cj[k] = cmap[*bj++]; 3586 ca[k++] = *ba++; 3587 } 3588 } 3589 /* put together the new matrix */ 3590 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3591 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3592 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3593 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3594 mat->free_a = PETSC_TRUE; 3595 mat->free_ij = PETSC_TRUE; 3596 mat->nonew = 0; 3597 } else if (scall == MAT_REUSE_MATRIX){ 3598 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3599 ci = mat->i; cj = mat->j; ca = mat->a; 3600 for (i=0; i<am; i++) { 3601 /* off-diagonal portion of A */ 3602 ncols_o = bi[i+1] - bi[i]; 3603 for (jo=0; jo<ncols_o; jo++) { 3604 col = cmap[*bj]; 3605 if (col >= cstart) break; 3606 *ca++ = *ba++; bj++; 3607 } 3608 /* diagonal portion of A */ 3609 ncols_d = ai[i+1] - ai[i]; 3610 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3611 /* off-diagonal portion of A */ 3612 for (j=jo; j<ncols_o; j++) { 3613 *ca++ = *ba++; bj++; 3614 } 3615 } 3616 } else { 3617 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3618 } 3619 3620 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3621 PetscFunctionReturn(0); 3622 } 3623 3624 static PetscEvent logkey_getlocalmatcondensed = 0; 3625 #undef __FUNCT__ 3626 #define __FUNCT__ "MatGetLocalMatCondensed" 3627 /*@C 3628 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3629 3630 Not Collective 3631 3632 Input Parameters: 3633 + A - the matrix 3634 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3635 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3636 3637 Output Parameter: 3638 . A_loc - the local sequential matrix generated 3639 3640 Level: developer 3641 3642 @*/ 3643 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3644 { 3645 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3646 PetscErrorCode ierr; 3647 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3648 IS isrowa,iscola; 3649 Mat *aloc; 3650 3651 PetscFunctionBegin; 3652 if (!logkey_getlocalmatcondensed) { 3653 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3654 } 3655 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3656 if (!row){ 3657 start = A->rmap.rstart; end = A->rmap.rend; 3658 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3659 } else { 3660 isrowa = *row; 3661 } 3662 if (!col){ 3663 start = A->cmap.rstart; 3664 cmap = a->garray; 3665 nzA = a->A->cmap.n; 3666 nzB = a->B->cmap.n; 3667 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3668 ncols = 0; 3669 for (i=0; i<nzB; i++) { 3670 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3671 else break; 3672 } 3673 imark = i; 3674 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3675 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3676 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3677 ierr = PetscFree(idx);CHKERRQ(ierr); 3678 } else { 3679 iscola = *col; 3680 } 3681 if (scall != MAT_INITIAL_MATRIX){ 3682 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3683 aloc[0] = *A_loc; 3684 } 3685 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3686 *A_loc = aloc[0]; 3687 ierr = PetscFree(aloc);CHKERRQ(ierr); 3688 if (!row){ 3689 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3690 } 3691 if (!col){ 3692 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3693 } 3694 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3695 PetscFunctionReturn(0); 3696 } 3697 3698 static PetscEvent logkey_GetBrowsOfAcols = 0; 3699 #undef __FUNCT__ 3700 #define __FUNCT__ "MatGetBrowsOfAcols" 3701 /*@C 3702 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3703 3704 Collective on Mat 3705 3706 Input Parameters: 3707 + A,B - the matrices in mpiaij format 3708 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3709 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3710 3711 Output Parameter: 3712 + rowb, colb - index sets of rows and columns of B to extract 3713 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 3714 - B_seq - the sequential matrix generated 3715 3716 Level: developer 3717 3718 @*/ 3719 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3720 { 3721 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3722 PetscErrorCode ierr; 3723 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3724 IS isrowb,iscolb; 3725 Mat *bseq; 3726 3727 PetscFunctionBegin; 3728 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3729 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3730 } 3731 if (!logkey_GetBrowsOfAcols) { 3732 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3733 } 3734 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3735 3736 if (scall == MAT_INITIAL_MATRIX){ 3737 start = A->cmap.rstart; 3738 cmap = a->garray; 3739 nzA = a->A->cmap.n; 3740 nzB = a->B->cmap.n; 3741 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3742 ncols = 0; 3743 for (i=0; i<nzB; i++) { /* row < local row index */ 3744 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3745 else break; 3746 } 3747 imark = i; 3748 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3749 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3750 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3751 ierr = PetscFree(idx);CHKERRQ(ierr); 3752 *brstart = imark; 3753 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 3754 } else { 3755 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3756 isrowb = *rowb; iscolb = *colb; 3757 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3758 bseq[0] = *B_seq; 3759 } 3760 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3761 *B_seq = bseq[0]; 3762 ierr = PetscFree(bseq);CHKERRQ(ierr); 3763 if (!rowb){ 3764 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3765 } else { 3766 *rowb = isrowb; 3767 } 3768 if (!colb){ 3769 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3770 } else { 3771 *colb = iscolb; 3772 } 3773 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3774 PetscFunctionReturn(0); 3775 } 3776 3777 static PetscEvent logkey_GetBrowsOfAocols = 0; 3778 #undef __FUNCT__ 3779 #define __FUNCT__ "MatGetBrowsOfAoCols" 3780 /*@C 3781 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3782 of the OFF-DIAGONAL portion of local A 3783 3784 Collective on Mat 3785 3786 Input Parameters: 3787 + A,B - the matrices in mpiaij format 3788 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3789 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3790 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3791 3792 Output Parameter: 3793 + B_oth - the sequential matrix generated 3794 3795 Level: developer 3796 3797 @*/ 3798 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3799 { 3800 VecScatter_MPI_General *gen_to,*gen_from; 3801 PetscErrorCode ierr; 3802 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3803 Mat_SeqAIJ *b_oth; 3804 VecScatter ctx=a->Mvctx; 3805 MPI_Comm comm=ctx->comm; 3806 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3807 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 3808 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3809 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3810 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 3811 MPI_Status *sstatus,rstatus; 3812 PetscInt *cols; 3813 PetscScalar *vals; 3814 PetscMPIInt j; 3815 3816 PetscFunctionBegin; 3817 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3818 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3819 } 3820 if (!logkey_GetBrowsOfAocols) { 3821 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3822 } 3823 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3824 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3825 3826 gen_to = (VecScatter_MPI_General*)ctx->todata; 3827 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3828 rvalues = gen_from->values; /* holds the length of sending row */ 3829 svalues = gen_to->values; /* holds the length of receiving row */ 3830 nrecvs = gen_from->n; 3831 nsends = gen_to->n; 3832 3833 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 3834 srow = gen_to->indices; /* local row index to be sent */ 3835 rstarts = gen_from->starts; 3836 sstarts = gen_to->starts; 3837 rprocs = gen_from->procs; 3838 sprocs = gen_to->procs; 3839 sstatus = gen_to->sstatus; 3840 3841 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3842 if (scall == MAT_INITIAL_MATRIX){ 3843 /* i-array */ 3844 /*---------*/ 3845 /* post receives */ 3846 for (i=0; i<nrecvs; i++){ 3847 rowlen = (PetscInt*)rvalues + rstarts[i]; 3848 nrows = rstarts[i+1]-rstarts[i]; 3849 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3850 } 3851 3852 /* pack the outgoing message */ 3853 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3854 rstartsj = sstartsj + nsends +1; 3855 sstartsj[0] = 0; rstartsj[0] = 0; 3856 len = 0; /* total length of j or a array to be sent */ 3857 k = 0; 3858 for (i=0; i<nsends; i++){ 3859 rowlen = (PetscInt*)svalues + sstarts[i]; 3860 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3861 for (j=0; j<nrows; j++) { 3862 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 3863 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3864 len += rowlen[j]; 3865 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3866 k++; 3867 } 3868 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3869 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3870 } 3871 /* recvs and sends of i-array are completed */ 3872 i = nrecvs; 3873 while (i--) { 3874 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3875 } 3876 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3877 /* allocate buffers for sending j and a arrays */ 3878 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3879 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3880 3881 /* create i-array of B_oth */ 3882 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3883 b_othi[0] = 0; 3884 len = 0; /* total length of j or a array to be received */ 3885 k = 0; 3886 for (i=0; i<nrecvs; i++){ 3887 rowlen = (PetscInt*)rvalues + rstarts[i]; 3888 nrows = rstarts[i+1]-rstarts[i]; 3889 for (j=0; j<nrows; j++) { 3890 b_othi[k+1] = b_othi[k] + rowlen[j]; 3891 len += rowlen[j]; k++; 3892 } 3893 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3894 } 3895 3896 /* allocate space for j and a arrrays of B_oth */ 3897 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3898 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3899 3900 /* j-array */ 3901 /*---------*/ 3902 /* post receives of j-array */ 3903 for (i=0; i<nrecvs; i++){ 3904 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3905 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3906 } 3907 k = 0; 3908 for (i=0; i<nsends; i++){ 3909 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3910 bufJ = bufj+sstartsj[i]; 3911 for (j=0; j<nrows; j++) { 3912 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3913 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3914 for (l=0; l<ncols; l++){ 3915 *bufJ++ = cols[l]; 3916 } 3917 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3918 } 3919 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3920 } 3921 3922 /* recvs and sends of j-array are completed */ 3923 i = nrecvs; 3924 while (i--) { 3925 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3926 } 3927 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3928 } else if (scall == MAT_REUSE_MATRIX){ 3929 sstartsj = *startsj; 3930 rstartsj = sstartsj + nsends +1; 3931 bufa = *bufa_ptr; 3932 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3933 b_otha = b_oth->a; 3934 } else { 3935 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3936 } 3937 3938 /* a-array */ 3939 /*---------*/ 3940 /* post receives of a-array */ 3941 for (i=0; i<nrecvs; i++){ 3942 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3943 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3944 } 3945 k = 0; 3946 for (i=0; i<nsends; i++){ 3947 nrows = sstarts[i+1]-sstarts[i]; 3948 bufA = bufa+sstartsj[i]; 3949 for (j=0; j<nrows; j++) { 3950 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3951 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3952 for (l=0; l<ncols; l++){ 3953 *bufA++ = vals[l]; 3954 } 3955 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3956 3957 } 3958 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3959 } 3960 /* recvs and sends of a-array are completed */ 3961 i = nrecvs; 3962 while (i--) { 3963 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3964 } 3965 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3966 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 3967 3968 if (scall == MAT_INITIAL_MATRIX){ 3969 /* put together the new matrix */ 3970 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3971 3972 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3973 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3974 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3975 b_oth->free_a = PETSC_TRUE; 3976 b_oth->free_ij = PETSC_TRUE; 3977 b_oth->nonew = 0; 3978 3979 ierr = PetscFree(bufj);CHKERRQ(ierr); 3980 if (!startsj || !bufa_ptr){ 3981 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3982 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3983 } else { 3984 *startsj = sstartsj; 3985 *bufa_ptr = bufa; 3986 } 3987 } 3988 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3989 3990 PetscFunctionReturn(0); 3991 } 3992 3993 #undef __FUNCT__ 3994 #define __FUNCT__ "MatGetCommunicationStructs" 3995 /*@C 3996 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 3997 3998 Not Collective 3999 4000 Input Parameters: 4001 . A - The matrix in mpiaij format 4002 4003 Output Parameter: 4004 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4005 . colmap - A map from global column index to local index into lvec 4006 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4007 4008 Level: developer 4009 4010 @*/ 4011 #if defined (PETSC_USE_CTABLE) 4012 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4013 #else 4014 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4015 #endif 4016 { 4017 Mat_MPIAIJ *a; 4018 4019 PetscFunctionBegin; 4020 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4021 PetscValidPointer(lvec, 2) 4022 PetscValidPointer(colmap, 3) 4023 PetscValidPointer(multScatter, 4) 4024 a = (Mat_MPIAIJ *) A->data; 4025 if (lvec) *lvec = a->lvec; 4026 if (colmap) *colmap = a->colmap; 4027 if (multScatter) *multScatter = a->Mvctx; 4028 PetscFunctionReturn(0); 4029 } 4030 4031 EXTERN_C_BEGIN 4032 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4033 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4034 EXTERN_C_END 4035 4036 /*MC 4037 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4038 4039 Options Database Keys: 4040 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4041 4042 Level: beginner 4043 4044 .seealso: MatCreateMPIAIJ 4045 M*/ 4046 4047 EXTERN_C_BEGIN 4048 #undef __FUNCT__ 4049 #define __FUNCT__ "MatCreate_MPIAIJ" 4050 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4051 { 4052 Mat_MPIAIJ *b; 4053 PetscErrorCode ierr; 4054 PetscMPIInt size; 4055 4056 PetscFunctionBegin; 4057 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4058 4059 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 4060 B->data = (void*)b; 4061 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4062 B->factor = 0; 4063 B->rmap.bs = 1; 4064 B->assembled = PETSC_FALSE; 4065 B->mapping = 0; 4066 4067 B->insertmode = NOT_SET_VALUES; 4068 b->size = size; 4069 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4070 4071 /* build cache for off array entries formed */ 4072 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4073 b->donotstash = PETSC_FALSE; 4074 b->colmap = 0; 4075 b->garray = 0; 4076 b->roworiented = PETSC_TRUE; 4077 4078 /* stuff used for matrix vector multiply */ 4079 b->lvec = PETSC_NULL; 4080 b->Mvctx = PETSC_NULL; 4081 4082 /* stuff for MatGetRow() */ 4083 b->rowindices = 0; 4084 b->rowvalues = 0; 4085 b->getrowactive = PETSC_FALSE; 4086 4087 4088 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4089 "MatStoreValues_MPIAIJ", 4090 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4091 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4092 "MatRetrieveValues_MPIAIJ", 4093 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4094 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4095 "MatGetDiagonalBlock_MPIAIJ", 4096 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4098 "MatIsTranspose_MPIAIJ", 4099 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4100 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4101 "MatMPIAIJSetPreallocation_MPIAIJ", 4102 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4104 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4105 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4106 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4107 "MatDiagonalScaleLocal_MPIAIJ", 4108 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4110 "MatConvert_MPIAIJ_MPICSRPERM", 4111 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4112 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4113 "MatConvert_MPIAIJ_MPICRL", 4114 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4115 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4116 PetscFunctionReturn(0); 4117 } 4118 EXTERN_C_END 4119 4120 /* 4121 Special version for direct calls from Fortran 4122 */ 4123 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4124 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4125 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4126 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4127 #endif 4128 4129 /* Change these macros so can be used in void function */ 4130 #undef CHKERRQ 4131 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4132 #undef SETERRQ2 4133 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4134 #undef SETERRQ 4135 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4136 4137 EXTERN_C_BEGIN 4138 #undef __FUNCT__ 4139 #define __FUNCT__ "matsetvaluesmpiaij_" 4140 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4141 { 4142 Mat mat = *mmat; 4143 PetscInt m = *mm, n = *mn; 4144 InsertMode addv = *maddv; 4145 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4146 PetscScalar value; 4147 PetscErrorCode ierr; 4148 4149 MatPreallocated(mat); 4150 if (mat->insertmode == NOT_SET_VALUES) { 4151 mat->insertmode = addv; 4152 } 4153 #if defined(PETSC_USE_DEBUG) 4154 else if (mat->insertmode != addv) { 4155 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4156 } 4157 #endif 4158 { 4159 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4160 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4161 PetscTruth roworiented = aij->roworiented; 4162 4163 /* Some Variables required in the macro */ 4164 Mat A = aij->A; 4165 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4166 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4167 PetscScalar *aa = a->a; 4168 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4169 Mat B = aij->B; 4170 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4171 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4172 PetscScalar *ba = b->a; 4173 4174 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4175 PetscInt nonew = a->nonew; 4176 PetscScalar *ap1,*ap2; 4177 4178 PetscFunctionBegin; 4179 for (i=0; i<m; i++) { 4180 if (im[i] < 0) continue; 4181 #if defined(PETSC_USE_DEBUG) 4182 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4183 #endif 4184 if (im[i] >= rstart && im[i] < rend) { 4185 row = im[i] - rstart; 4186 lastcol1 = -1; 4187 rp1 = aj + ai[row]; 4188 ap1 = aa + ai[row]; 4189 rmax1 = aimax[row]; 4190 nrow1 = ailen[row]; 4191 low1 = 0; 4192 high1 = nrow1; 4193 lastcol2 = -1; 4194 rp2 = bj + bi[row]; 4195 ap2 = ba + bi[row]; 4196 rmax2 = bimax[row]; 4197 nrow2 = bilen[row]; 4198 low2 = 0; 4199 high2 = nrow2; 4200 4201 for (j=0; j<n; j++) { 4202 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4203 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4204 if (in[j] >= cstart && in[j] < cend){ 4205 col = in[j] - cstart; 4206 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4207 } else if (in[j] < 0) continue; 4208 #if defined(PETSC_USE_DEBUG) 4209 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4210 #endif 4211 else { 4212 if (mat->was_assembled) { 4213 if (!aij->colmap) { 4214 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4215 } 4216 #if defined (PETSC_USE_CTABLE) 4217 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4218 col--; 4219 #else 4220 col = aij->colmap[in[j]] - 1; 4221 #endif 4222 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4223 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4224 col = in[j]; 4225 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4226 B = aij->B; 4227 b = (Mat_SeqAIJ*)B->data; 4228 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4229 rp2 = bj + bi[row]; 4230 ap2 = ba + bi[row]; 4231 rmax2 = bimax[row]; 4232 nrow2 = bilen[row]; 4233 low2 = 0; 4234 high2 = nrow2; 4235 bm = aij->B->rmap.n; 4236 ba = b->a; 4237 } 4238 } else col = in[j]; 4239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4240 } 4241 } 4242 } else { 4243 if (!aij->donotstash) { 4244 if (roworiented) { 4245 if (ignorezeroentries && v[i*n] == 0.0) continue; 4246 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4247 } else { 4248 if (ignorezeroentries && v[i] == 0.0) continue; 4249 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4250 } 4251 } 4252 } 4253 }} 4254 PetscFunctionReturnVoid(); 4255 } 4256 EXTERN_C_END 4257