1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetValues_MPIAIJ" 240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 241 { 242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 243 PetscErrorCode ierr; 244 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 245 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 246 247 PetscFunctionBegin; 248 for (i=0; i<m; i++) { 249 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 250 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 251 if (idxm[i] >= rstart && idxm[i] < rend) { 252 row = idxm[i] - rstart; 253 for (j=0; j<n; j++) { 254 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 255 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 256 if (idxn[j] >= cstart && idxn[j] < cend){ 257 col = idxn[j] - cstart; 258 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 259 } else { 260 if (!aij->colmap) { 261 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262 } 263 #if defined (PETSC_USE_CTABLE) 264 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 265 col --; 266 #else 267 col = aij->colmap[idxn[j]] - 1; 268 #endif 269 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 270 else { 271 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 272 } 273 } 274 } 275 } else { 276 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 277 } 278 } 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 if (aij->donotstash) { 293 PetscFunctionReturn(0); 294 } 295 296 /* make sure all processors are either in INSERTMODE or ADDMODE */ 297 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 298 if (addv == (ADD_VALUES|INSERT_VALUES)) { 299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 300 } 301 mat->insertmode = addv; /* in case this processor had no cache */ 302 303 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 304 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 305 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 312 { 313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 314 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 315 PetscErrorCode ierr; 316 PetscMPIInt n; 317 PetscInt i,j,rstart,ncols,flg; 318 PetscInt *row,*col,other_disassembled; 319 PetscScalar *val; 320 InsertMode addv = mat->insertmode; 321 322 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 358 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 359 } 360 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 361 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 362 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 363 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 364 365 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 366 aij->rowvalues = 0; 367 368 /* used by MatAXPY() */ 369 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 370 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 371 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 378 { 379 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 384 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatZeroRows_MPIAIJ" 390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 391 { 392 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 393 PetscErrorCode ierr; 394 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 395 PetscInt i,*owners = A->rmap.range; 396 PetscInt *nprocs,j,idx,nsends,row; 397 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 398 PetscInt *rvalues,count,base,slen,*source; 399 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 400 MPI_Comm comm = A->comm; 401 MPI_Request *send_waits,*recv_waits; 402 MPI_Status recv_status,*send_status; 403 #if defined(PETSC_DEBUG) 404 PetscTruth found = PETSC_FALSE; 405 #endif 406 407 PetscFunctionBegin; 408 /* first count number of contributors to each processor */ 409 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 410 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 411 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 412 j = 0; 413 for (i=0; i<N; i++) { 414 if (lastidx > (idx = rows[i])) j = 0; 415 lastidx = idx; 416 for (; j<size; j++) { 417 if (idx >= owners[j] && idx < owners[j+1]) { 418 nprocs[2*j]++; 419 nprocs[2*j+1] = 1; 420 owner[i] = j; 421 #if defined(PETSC_DEBUG) 422 found = PETSC_TRUE; 423 #endif 424 break; 425 } 426 } 427 #if defined(PETSC_DEBUG) 428 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 429 found = PETSC_FALSE; 430 #endif 431 } 432 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 433 434 /* inform other processors of number of messages and max length*/ 435 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 436 437 /* post receives: */ 438 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 439 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 440 for (i=0; i<nrecvs; i++) { 441 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 442 } 443 444 /* do sends: 445 1) starts[i] gives the starting index in svalues for stuff going to 446 the ith processor 447 */ 448 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 449 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 450 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 451 starts[0] = 0; 452 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 453 for (i=0; i<N; i++) { 454 svalues[starts[owner[i]]++] = rows[i]; 455 } 456 457 starts[0] = 0; 458 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 459 count = 0; 460 for (i=0; i<size; i++) { 461 if (nprocs[2*i+1]) { 462 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 463 } 464 } 465 ierr = PetscFree(starts);CHKERRQ(ierr); 466 467 base = owners[rank]; 468 469 /* wait on receives */ 470 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 471 source = lens + nrecvs; 472 count = nrecvs; slen = 0; 473 while (count) { 474 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 475 /* unpack receives into our local space */ 476 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 477 source[imdex] = recv_status.MPI_SOURCE; 478 lens[imdex] = n; 479 slen += n; 480 count--; 481 } 482 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 483 484 /* move the data into the send scatter */ 485 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 486 count = 0; 487 for (i=0; i<nrecvs; i++) { 488 values = rvalues + i*nmax; 489 for (j=0; j<lens[i]; j++) { 490 lrows[count++] = values[j] - base; 491 } 492 } 493 ierr = PetscFree(rvalues);CHKERRQ(ierr); 494 ierr = PetscFree(lens);CHKERRQ(ierr); 495 ierr = PetscFree(owner);CHKERRQ(ierr); 496 ierr = PetscFree(nprocs);CHKERRQ(ierr); 497 498 /* actually zap the local rows */ 499 /* 500 Zero the required rows. If the "diagonal block" of the matrix 501 is square and the user wishes to set the diagonal we use separate 502 code so that MatSetValues() is not called for each diagonal allocating 503 new memory, thus calling lots of mallocs and slowing things down. 504 505 Contributed by: Matthew Knepley 506 */ 507 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 508 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 509 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 510 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 511 } else if (diag != 0.0) { 512 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 513 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 514 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 516 } 517 for (i = 0; i < slen; i++) { 518 row = lrows[i] + rstart; 519 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 520 } 521 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 522 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 } else { 524 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 525 } 526 ierr = PetscFree(lrows);CHKERRQ(ierr); 527 528 /* wait on sends */ 529 if (nsends) { 530 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 531 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 532 ierr = PetscFree(send_status);CHKERRQ(ierr); 533 } 534 ierr = PetscFree(send_waits);CHKERRQ(ierr); 535 ierr = PetscFree(svalues);CHKERRQ(ierr); 536 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatMult_MPIAIJ" 542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 543 { 544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 545 PetscErrorCode ierr; 546 PetscInt nt; 547 548 PetscFunctionBegin; 549 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 550 if (nt != A->cmap.n) { 551 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 552 } 553 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 554 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 555 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 556 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatMultAdd_MPIAIJ" 562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 563 { 564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 567 PetscFunctionBegin; 568 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 569 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 570 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 571 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 578 { 579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 580 PetscErrorCode ierr; 581 PetscTruth merged; 582 583 PetscFunctionBegin; 584 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 585 /* do nondiagonal part */ 586 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 587 if (!merged) { 588 /* send it on its way */ 589 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 590 /* do local part */ 591 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 592 /* receive remote parts: note this assumes the values are not actually */ 593 /* added in yy until the next line, */ 594 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 595 } else { 596 /* do local part */ 597 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 598 /* send it on its way */ 599 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 600 /* values actually were received in the Begin() but we need to call this nop */ 601 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 602 } 603 PetscFunctionReturn(0); 604 } 605 606 EXTERN_C_BEGIN 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 610 { 611 MPI_Comm comm; 612 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 613 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 614 IS Me,Notme; 615 PetscErrorCode ierr; 616 PetscInt M,N,first,last,*notme,i; 617 PetscMPIInt size; 618 619 PetscFunctionBegin; 620 621 /* Easy test: symmetric diagonal block */ 622 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 623 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 624 if (!*f) PetscFunctionReturn(0); 625 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 626 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 627 if (size == 1) PetscFunctionReturn(0); 628 629 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 630 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 631 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 632 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 633 for (i=0; i<first; i++) notme[i] = i; 634 for (i=last; i<M; i++) notme[i-last+first] = i; 635 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 636 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 637 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 638 Aoff = Aoffs[0]; 639 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 640 Boff = Boffs[0]; 641 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 642 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 644 ierr = ISDestroy(Me);CHKERRQ(ierr); 645 ierr = ISDestroy(Notme);CHKERRQ(ierr); 646 647 PetscFunctionReturn(0); 648 } 649 EXTERN_C_END 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 654 { 655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 656 PetscErrorCode ierr; 657 658 PetscFunctionBegin; 659 /* do nondiagonal part */ 660 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 661 /* send it on its way */ 662 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 663 /* do local part */ 664 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 665 /* receive remote parts */ 666 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 /* 671 This only works correctly for square matrices where the subblock A->A is the 672 diagonal block 673 */ 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 677 { 678 PetscErrorCode ierr; 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 681 PetscFunctionBegin; 682 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 683 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 684 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 685 } 686 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatScale_MPIAIJ" 692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 693 { 694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 699 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatDestroy_MPIAIJ" 705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 706 { 707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 #if defined(PETSC_USE_LOG) 712 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 713 #endif 714 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 715 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 717 #if defined (PETSC_USE_CTABLE) 718 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 719 #else 720 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 721 #endif 722 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 723 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 724 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 725 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 726 ierr = PetscFree(aij);CHKERRQ(ierr); 727 728 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 729 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatView_MPIAIJ_Binary" 741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 744 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 745 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 746 PetscErrorCode ierr; 747 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 748 int fd; 749 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 750 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 751 PetscScalar *column_values; 752 753 PetscFunctionBegin; 754 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 755 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 756 nz = A->nz + B->nz; 757 if (!rank) { 758 header[0] = MAT_FILE_COOKIE; 759 header[1] = mat->rmap.N; 760 header[2] = mat->cmap.N; 761 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 762 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 763 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 764 /* get largest number of rows any processor has */ 765 rlen = mat->rmap.n; 766 range = mat->rmap.range; 767 for (i=1; i<size; i++) { 768 rlen = PetscMax(rlen,range[i+1] - range[i]); 769 } 770 } else { 771 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 772 rlen = mat->rmap.n; 773 } 774 775 /* load up the local row counts */ 776 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 777 for (i=0; i<mat->rmap.n; i++) { 778 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 779 } 780 781 /* store the row lengths to the file */ 782 if (!rank) { 783 MPI_Status status; 784 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 785 for (i=1; i<size; i++) { 786 rlen = range[i+1] - range[i]; 787 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 788 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 789 } 790 } else { 791 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 792 } 793 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 794 795 /* load up the local column indices */ 796 nzmax = nz; /* )th processor needs space a largest processor needs */ 797 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 798 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 799 cnt = 0; 800 for (i=0; i<mat->rmap.n; i++) { 801 for (j=B->i[i]; j<B->i[i+1]; j++) { 802 if ( (col = garray[B->j[j]]) > cstart) break; 803 column_indices[cnt++] = col; 804 } 805 for (k=A->i[i]; k<A->i[i+1]; k++) { 806 column_indices[cnt++] = A->j[k] + cstart; 807 } 808 for (; j<B->i[i+1]; j++) { 809 column_indices[cnt++] = garray[B->j[j]]; 810 } 811 } 812 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 813 814 /* store the column indices to the file */ 815 if (!rank) { 816 MPI_Status status; 817 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 818 for (i=1; i<size; i++) { 819 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 820 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 821 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 823 } 824 } else { 825 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 826 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 } 828 ierr = PetscFree(column_indices);CHKERRQ(ierr); 829 830 /* load up the local column values */ 831 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 832 cnt = 0; 833 for (i=0; i<mat->rmap.n; i++) { 834 for (j=B->i[i]; j<B->i[i+1]; j++) { 835 if ( garray[B->j[j]] > cstart) break; 836 column_values[cnt++] = B->a[j]; 837 } 838 for (k=A->i[i]; k<A->i[i+1]; k++) { 839 column_values[cnt++] = A->a[k]; 840 } 841 for (; j<B->i[i+1]; j++) { 842 column_values[cnt++] = B->a[j]; 843 } 844 } 845 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 846 847 /* store the column values to the file */ 848 if (!rank) { 849 MPI_Status status; 850 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 851 for (i=1; i<size; i++) { 852 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 853 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 854 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 856 } 857 } else { 858 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 859 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 860 } 861 ierr = PetscFree(column_values);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 868 { 869 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 870 PetscErrorCode ierr; 871 PetscMPIInt rank = aij->rank,size = aij->size; 872 PetscTruth isdraw,iascii,isbinary; 873 PetscViewer sviewer; 874 PetscViewerFormat format; 875 876 PetscFunctionBegin; 877 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 880 if (iascii) { 881 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 882 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 883 MatInfo info; 884 PetscTruth inodes; 885 886 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 887 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 889 if (!inodes) { 890 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 891 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 892 } else { 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 894 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 895 } 896 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 898 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 900 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 901 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } else if (format == PETSC_VIEWER_ASCII_INFO) { 904 PetscInt inodecount,inodelimit,*inodes; 905 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 906 if (inodes) { 907 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 908 } else { 909 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 910 } 911 PetscFunctionReturn(0); 912 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 913 PetscFunctionReturn(0); 914 } 915 } else if (isbinary) { 916 if (size == 1) { 917 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 918 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 919 } else { 920 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 921 } 922 PetscFunctionReturn(0); 923 } else if (isdraw) { 924 PetscDraw draw; 925 PetscTruth isnull; 926 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 927 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 928 } 929 930 if (size == 1) { 931 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 932 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 933 } else { 934 /* assemble the entire matrix onto first processor. */ 935 Mat A; 936 Mat_SeqAIJ *Aloc; 937 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 938 PetscScalar *a; 939 940 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 941 if (!rank) { 942 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 943 } else { 944 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 945 } 946 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 947 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 948 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 949 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 950 951 /* copy over the A part */ 952 Aloc = (Mat_SeqAIJ*)aij->A->data; 953 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 954 row = mat->rmap.rstart; 955 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 956 for (i=0; i<m; i++) { 957 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 958 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 959 } 960 aj = Aloc->j; 961 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 962 963 /* copy over the B part */ 964 Aloc = (Mat_SeqAIJ*)aij->B->data; 965 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 966 row = mat->rmap.rstart; 967 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 968 ct = cols; 969 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 970 for (i=0; i<m; i++) { 971 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 972 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 973 } 974 ierr = PetscFree(ct);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 /* 978 Everyone has to call to draw the matrix since the graphics waits are 979 synchronized across all processors that share the PetscDraw object 980 */ 981 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 982 if (!rank) { 983 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 984 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 985 } 986 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 ierr = MatDestroy(A);CHKERRQ(ierr); 988 } 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatView_MPIAIJ" 994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 995 { 996 PetscErrorCode ierr; 997 PetscTruth iascii,isdraw,issocket,isbinary; 998 999 PetscFunctionBegin; 1000 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1004 if (iascii || isdraw || isbinary || issocket) { 1005 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1006 } else { 1007 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatRelax_MPIAIJ" 1014 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1015 { 1016 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1017 PetscErrorCode ierr; 1018 Vec bb1; 1019 1020 PetscFunctionBegin; 1021 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1022 1023 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1024 1025 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1026 if (flag & SOR_ZERO_INITIAL_GUESS) { 1027 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1028 its--; 1029 } 1030 1031 while (its--) { 1032 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1034 1035 /* update rhs: bb1 = bb - B*x */ 1036 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1037 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1038 1039 /* local sweep */ 1040 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1041 CHKERRQ(ierr); 1042 } 1043 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1044 if (flag & SOR_ZERO_INITIAL_GUESS) { 1045 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1046 its--; 1047 } 1048 while (its--) { 1049 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1050 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1051 1052 /* update rhs: bb1 = bb - B*x */ 1053 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1054 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1055 1056 /* local sweep */ 1057 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1058 CHKERRQ(ierr); 1059 } 1060 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1061 if (flag & SOR_ZERO_INITIAL_GUESS) { 1062 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1063 its--; 1064 } 1065 while (its--) { 1066 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1067 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1068 1069 /* update rhs: bb1 = bb - B*x */ 1070 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1071 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1072 1073 /* local sweep */ 1074 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1075 CHKERRQ(ierr); 1076 } 1077 } else { 1078 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1079 } 1080 1081 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatPermute_MPIAIJ" 1087 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1088 { 1089 MPI_Comm comm,pcomm; 1090 PetscInt first,local_size,nrows,*rows; 1091 int ntids; 1092 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1093 PetscErrorCode ierr; 1094 1095 PetscFunctionBegin; 1096 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1097 /* make a collective version of 'rowp' */ 1098 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1099 if (pcomm==comm) { 1100 crowp = rowp; 1101 } else { 1102 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1103 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1104 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1105 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1106 } 1107 /* collect the global row permutation and invert it */ 1108 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1109 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1110 if (pcomm!=comm) { 1111 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1112 } 1113 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1114 /* get the local target indices */ 1115 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1116 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1117 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1118 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1119 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1121 /* the column permutation is so much easier; 1122 make a local version of 'colp' and invert it */ 1123 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1124 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1125 if (ntids==1) { 1126 lcolp = colp; 1127 } else { 1128 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1129 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1130 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1131 } 1132 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1133 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1134 if (ntids>1) { 1135 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1136 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1137 } 1138 /* now we just get the submatrix */ 1139 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1140 /* clean up */ 1141 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1142 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1148 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1149 { 1150 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1151 Mat A = mat->A,B = mat->B; 1152 PetscErrorCode ierr; 1153 PetscReal isend[5],irecv[5]; 1154 1155 PetscFunctionBegin; 1156 info->block_size = 1.0; 1157 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1158 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1159 isend[3] = info->memory; isend[4] = info->mallocs; 1160 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1161 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1162 isend[3] += info->memory; isend[4] += info->mallocs; 1163 if (flag == MAT_LOCAL) { 1164 info->nz_used = isend[0]; 1165 info->nz_allocated = isend[1]; 1166 info->nz_unneeded = isend[2]; 1167 info->memory = isend[3]; 1168 info->mallocs = isend[4]; 1169 } else if (flag == MAT_GLOBAL_MAX) { 1170 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1171 info->nz_used = irecv[0]; 1172 info->nz_allocated = irecv[1]; 1173 info->nz_unneeded = irecv[2]; 1174 info->memory = irecv[3]; 1175 info->mallocs = irecv[4]; 1176 } else if (flag == MAT_GLOBAL_SUM) { 1177 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1178 info->nz_used = irecv[0]; 1179 info->nz_allocated = irecv[1]; 1180 info->nz_unneeded = irecv[2]; 1181 info->memory = irecv[3]; 1182 info->mallocs = irecv[4]; 1183 } 1184 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1185 info->fill_ratio_needed = 0; 1186 info->factor_mallocs = 0; 1187 info->rows_global = (double)matin->rmap.N; 1188 info->columns_global = (double)matin->cmap.N; 1189 info->rows_local = (double)matin->rmap.n; 1190 info->columns_local = (double)matin->cmap.N; 1191 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatSetOption_MPIAIJ" 1197 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1198 { 1199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 switch (op) { 1204 case MAT_NO_NEW_NONZERO_LOCATIONS: 1205 case MAT_YES_NEW_NONZERO_LOCATIONS: 1206 case MAT_COLUMNS_UNSORTED: 1207 case MAT_COLUMNS_SORTED: 1208 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1209 case MAT_KEEP_ZEROED_ROWS: 1210 case MAT_NEW_NONZERO_LOCATION_ERR: 1211 case MAT_USE_INODES: 1212 case MAT_DO_NOT_USE_INODES: 1213 case MAT_IGNORE_ZERO_ENTRIES: 1214 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1215 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1216 break; 1217 case MAT_ROW_ORIENTED: 1218 a->roworiented = PETSC_TRUE; 1219 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1220 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1221 break; 1222 case MAT_ROWS_SORTED: 1223 case MAT_ROWS_UNSORTED: 1224 case MAT_YES_NEW_DIAGONALS: 1225 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1226 break; 1227 case MAT_COLUMN_ORIENTED: 1228 a->roworiented = PETSC_FALSE; 1229 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1230 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1231 break; 1232 case MAT_IGNORE_OFF_PROC_ENTRIES: 1233 a->donotstash = PETSC_TRUE; 1234 break; 1235 case MAT_NO_NEW_DIAGONALS: 1236 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1237 case MAT_SYMMETRIC: 1238 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1239 break; 1240 case MAT_STRUCTURALLY_SYMMETRIC: 1241 case MAT_HERMITIAN: 1242 case MAT_SYMMETRY_ETERNAL: 1243 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1244 break; 1245 case MAT_NOT_SYMMETRIC: 1246 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1247 case MAT_NOT_HERMITIAN: 1248 case MAT_NOT_SYMMETRY_ETERNAL: 1249 break; 1250 default: 1251 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1252 } 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatGetRow_MPIAIJ" 1258 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1259 { 1260 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1261 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1262 PetscErrorCode ierr; 1263 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1264 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1265 PetscInt *cmap,*idx_p; 1266 1267 PetscFunctionBegin; 1268 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1269 mat->getrowactive = PETSC_TRUE; 1270 1271 if (!mat->rowvalues && (idx || v)) { 1272 /* 1273 allocate enough space to hold information from the longest row. 1274 */ 1275 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1276 PetscInt max = 1,tmp; 1277 for (i=0; i<matin->rmap.n; i++) { 1278 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1279 if (max < tmp) { max = tmp; } 1280 } 1281 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1282 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1283 } 1284 1285 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1286 lrow = row - rstart; 1287 1288 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1289 if (!v) {pvA = 0; pvB = 0;} 1290 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1291 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1292 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1293 nztot = nzA + nzB; 1294 1295 cmap = mat->garray; 1296 if (v || idx) { 1297 if (nztot) { 1298 /* Sort by increasing column numbers, assuming A and B already sorted */ 1299 PetscInt imark = -1; 1300 if (v) { 1301 *v = v_p = mat->rowvalues; 1302 for (i=0; i<nzB; i++) { 1303 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1304 else break; 1305 } 1306 imark = i; 1307 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1308 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1309 } 1310 if (idx) { 1311 *idx = idx_p = mat->rowindices; 1312 if (imark > -1) { 1313 for (i=0; i<imark; i++) { 1314 idx_p[i] = cmap[cworkB[i]]; 1315 } 1316 } else { 1317 for (i=0; i<nzB; i++) { 1318 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1319 else break; 1320 } 1321 imark = i; 1322 } 1323 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1324 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1325 } 1326 } else { 1327 if (idx) *idx = 0; 1328 if (v) *v = 0; 1329 } 1330 } 1331 *nz = nztot; 1332 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1333 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1339 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1340 { 1341 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1342 1343 PetscFunctionBegin; 1344 if (!aij->getrowactive) { 1345 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1346 } 1347 aij->getrowactive = PETSC_FALSE; 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatNorm_MPIAIJ" 1353 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1354 { 1355 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1356 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1357 PetscErrorCode ierr; 1358 PetscInt i,j,cstart = mat->cmap.rstart; 1359 PetscReal sum = 0.0; 1360 PetscScalar *v; 1361 1362 PetscFunctionBegin; 1363 if (aij->size == 1) { 1364 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1365 } else { 1366 if (type == NORM_FROBENIUS) { 1367 v = amat->a; 1368 for (i=0; i<amat->nz; i++) { 1369 #if defined(PETSC_USE_COMPLEX) 1370 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1371 #else 1372 sum += (*v)*(*v); v++; 1373 #endif 1374 } 1375 v = bmat->a; 1376 for (i=0; i<bmat->nz; i++) { 1377 #if defined(PETSC_USE_COMPLEX) 1378 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1379 #else 1380 sum += (*v)*(*v); v++; 1381 #endif 1382 } 1383 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1384 *norm = sqrt(*norm); 1385 } else if (type == NORM_1) { /* max column norm */ 1386 PetscReal *tmp,*tmp2; 1387 PetscInt *jj,*garray = aij->garray; 1388 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1389 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1390 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1391 *norm = 0.0; 1392 v = amat->a; jj = amat->j; 1393 for (j=0; j<amat->nz; j++) { 1394 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1395 } 1396 v = bmat->a; jj = bmat->j; 1397 for (j=0; j<bmat->nz; j++) { 1398 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1399 } 1400 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1401 for (j=0; j<mat->cmap.N; j++) { 1402 if (tmp2[j] > *norm) *norm = tmp2[j]; 1403 } 1404 ierr = PetscFree(tmp);CHKERRQ(ierr); 1405 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1406 } else if (type == NORM_INFINITY) { /* max row norm */ 1407 PetscReal ntemp = 0.0; 1408 for (j=0; j<aij->A->rmap.n; j++) { 1409 v = amat->a + amat->i[j]; 1410 sum = 0.0; 1411 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1412 sum += PetscAbsScalar(*v); v++; 1413 } 1414 v = bmat->a + bmat->i[j]; 1415 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1416 sum += PetscAbsScalar(*v); v++; 1417 } 1418 if (sum > ntemp) ntemp = sum; 1419 } 1420 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1421 } else { 1422 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1423 } 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatTranspose_MPIAIJ" 1430 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1431 { 1432 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1433 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1434 PetscErrorCode ierr; 1435 PetscInt M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1436 Mat B; 1437 PetscScalar *array; 1438 1439 PetscFunctionBegin; 1440 if (!matout && M != N) { 1441 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1442 } 1443 1444 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1445 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1446 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1447 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1448 1449 /* copy over the A part */ 1450 Aloc = (Mat_SeqAIJ*)a->A->data; 1451 m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1452 row = A->rmap.rstart; 1453 for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;} 1454 for (i=0; i<m; i++) { 1455 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1456 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1457 } 1458 aj = Aloc->j; 1459 for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;} 1460 1461 /* copy over the B part */ 1462 Aloc = (Mat_SeqAIJ*)a->B->data; 1463 m = a->B->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1464 row = A->rmap.rstart; 1465 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1466 ct = cols; 1467 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1468 for (i=0; i<m; i++) { 1469 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1470 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1471 } 1472 ierr = PetscFree(ct);CHKERRQ(ierr); 1473 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1474 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1475 if (matout) { 1476 *matout = B; 1477 } else { 1478 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1479 } 1480 PetscFunctionReturn(0); 1481 } 1482 1483 #undef __FUNCT__ 1484 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1485 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1486 { 1487 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1488 Mat a = aij->A,b = aij->B; 1489 PetscErrorCode ierr; 1490 PetscInt s1,s2,s3; 1491 1492 PetscFunctionBegin; 1493 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1494 if (rr) { 1495 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1496 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1497 /* Overlap communication with computation. */ 1498 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1499 } 1500 if (ll) { 1501 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1502 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1503 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1504 } 1505 /* scale the diagonal block */ 1506 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1507 1508 if (rr) { 1509 /* Do a scatter end and then right scale the off-diagonal block */ 1510 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1511 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1512 } 1513 1514 PetscFunctionReturn(0); 1515 } 1516 1517 #undef __FUNCT__ 1518 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1519 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1520 { 1521 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1526 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 #undef __FUNCT__ 1530 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1531 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1532 { 1533 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatEqual_MPIAIJ" 1543 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1544 { 1545 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1546 Mat a,b,c,d; 1547 PetscTruth flg; 1548 PetscErrorCode ierr; 1549 1550 PetscFunctionBegin; 1551 a = matA->A; b = matA->B; 1552 c = matB->A; d = matB->B; 1553 1554 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1555 if (flg) { 1556 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1557 } 1558 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatCopy_MPIAIJ" 1564 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1565 { 1566 PetscErrorCode ierr; 1567 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1568 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1569 1570 PetscFunctionBegin; 1571 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1572 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1573 /* because of the column compression in the off-processor part of the matrix a->B, 1574 the number of columns in a->B and b->B may be different, hence we cannot call 1575 the MatCopy() directly on the two parts. If need be, we can provide a more 1576 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1577 then copying the submatrices */ 1578 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1579 } else { 1580 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1581 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1582 } 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1588 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1589 { 1590 PetscErrorCode ierr; 1591 1592 PetscFunctionBegin; 1593 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1594 PetscFunctionReturn(0); 1595 } 1596 1597 #include "petscblaslapack.h" 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatAXPY_MPIAIJ" 1600 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1601 { 1602 PetscErrorCode ierr; 1603 PetscInt i; 1604 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1605 PetscBLASInt bnz,one=1; 1606 Mat_SeqAIJ *x,*y; 1607 1608 PetscFunctionBegin; 1609 if (str == SAME_NONZERO_PATTERN) { 1610 PetscScalar alpha = a; 1611 x = (Mat_SeqAIJ *)xx->A->data; 1612 y = (Mat_SeqAIJ *)yy->A->data; 1613 bnz = (PetscBLASInt)x->nz; 1614 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1615 x = (Mat_SeqAIJ *)xx->B->data; 1616 y = (Mat_SeqAIJ *)yy->B->data; 1617 bnz = (PetscBLASInt)x->nz; 1618 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1619 } else if (str == SUBSET_NONZERO_PATTERN) { 1620 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1621 1622 x = (Mat_SeqAIJ *)xx->B->data; 1623 y = (Mat_SeqAIJ *)yy->B->data; 1624 if (y->xtoy && y->XtoY != xx->B) { 1625 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1626 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1627 } 1628 if (!y->xtoy) { /* get xtoy */ 1629 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1630 y->XtoY = xx->B; 1631 } 1632 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1633 } else { 1634 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1635 } 1636 PetscFunctionReturn(0); 1637 } 1638 1639 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1640 1641 #undef __FUNCT__ 1642 #define __FUNCT__ "MatConjugate_MPIAIJ" 1643 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1644 { 1645 #if defined(PETSC_USE_COMPLEX) 1646 PetscErrorCode ierr; 1647 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1648 1649 PetscFunctionBegin; 1650 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1651 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1652 #else 1653 PetscFunctionBegin; 1654 #endif 1655 PetscFunctionReturn(0); 1656 } 1657 1658 #undef __FUNCT__ 1659 #define __FUNCT__ "MatRealPart_MPIAIJ" 1660 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1661 { 1662 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1663 PetscErrorCode ierr; 1664 1665 PetscFunctionBegin; 1666 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1667 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1668 PetscFunctionReturn(0); 1669 } 1670 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1673 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1674 { 1675 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1676 PetscErrorCode ierr; 1677 1678 PetscFunctionBegin; 1679 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1680 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #ifdef PETSC_HAVE_PBGL 1685 1686 #include <boost/parallel/mpi/bsp_process_group.hpp> 1687 #include <boost/graph/distributed/ilu_default_graph.hpp> 1688 #include <boost/graph/distributed/ilu_0_block.hpp> 1689 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1690 #include <boost/graph/distributed/petsc/interface.hpp> 1691 #include <boost/multi_array.hpp> 1692 #include <boost/parallel/distributed_property_map.hpp> 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1696 /* 1697 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1698 */ 1699 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1700 { 1701 namespace petsc = boost::distributed::petsc; 1702 1703 namespace graph_dist = boost::graph::distributed; 1704 using boost::graph::distributed::ilu_default::process_group_type; 1705 using boost::graph::ilu_permuted; 1706 1707 PetscTruth row_identity, col_identity; 1708 PetscContainer c; 1709 PetscInt m, n, M, N; 1710 PetscErrorCode ierr; 1711 1712 PetscFunctionBegin; 1713 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1714 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1715 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1716 if (!row_identity || !col_identity) { 1717 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1718 } 1719 1720 process_group_type pg; 1721 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1722 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1723 lgraph_type& level_graph = *lgraph_p; 1724 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1725 1726 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1727 ilu_permuted(level_graph); 1728 1729 /* put together the new matrix */ 1730 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1731 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1732 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1733 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1734 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1735 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1736 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1737 (*fact)->factor = FACTOR_LU; 1738 1739 ierr = PetscContainerCreate(A->comm, &c); 1740 ierr = PetscContainerSetPointer(c, lgraph_p); 1741 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1742 PetscFunctionReturn(0); 1743 } 1744 1745 #undef __FUNCT__ 1746 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1747 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1748 { 1749 PetscFunctionBegin; 1750 PetscFunctionReturn(0); 1751 } 1752 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatSolve_MPIAIJ" 1755 /* 1756 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1757 */ 1758 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1759 { 1760 namespace graph_dist = boost::graph::distributed; 1761 1762 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1763 lgraph_type* lgraph_p; 1764 PetscContainer c; 1765 PetscErrorCode ierr; 1766 1767 PetscFunctionBegin; 1768 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1769 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1770 ierr = VecCopy(b, x); CHKERRQ(ierr); 1771 1772 PetscScalar* array_x; 1773 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1774 PetscInt sx; 1775 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1776 1777 PetscScalar* array_b; 1778 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1779 PetscInt sb; 1780 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1781 1782 lgraph_type& level_graph = *lgraph_p; 1783 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1784 1785 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1786 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1787 ref_x(array_x, boost::extents[num_vertices(graph)]); 1788 1789 typedef boost::iterator_property_map<array_ref_type::iterator, 1790 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1791 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1792 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1793 1794 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1795 1796 PetscFunctionReturn(0); 1797 } 1798 #endif 1799 1800 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1801 PetscInt nzlocal,nsends,nrecvs; 1802 PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j; 1803 PetscScalar *sbuf_a,**rbuf_a; 1804 PetscErrorCode (*MatDestroy)(Mat); 1805 } Mat_Redundant; 1806 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1809 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1810 { 1811 PetscErrorCode ierr; 1812 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1813 PetscInt i; 1814 1815 PetscFunctionBegin; 1816 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1817 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1818 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1819 for (i=0; i<redund->nrecvs; i++){ 1820 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1821 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1822 } 1823 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1824 ierr = PetscFree(redund);CHKERRQ(ierr); 1825 PetscFunctionReturn(0); 1826 } 1827 1828 #undef __FUNCT__ 1829 #define __FUNCT__ "MatDestroy_MatRedundant" 1830 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1831 { 1832 PetscErrorCode ierr; 1833 PetscContainer container; 1834 Mat_Redundant *redund=PETSC_NULL; 1835 1836 PetscFunctionBegin; 1837 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1838 if (container) { 1839 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1840 } else { 1841 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1842 } 1843 A->ops->destroy = redund->MatDestroy; 1844 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 1845 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 1846 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 1847 PetscFunctionReturn(0); 1848 } 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 1852 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 1853 { 1854 PetscMPIInt rank,size; 1855 MPI_Comm comm=mat->comm; 1856 PetscErrorCode ierr; 1857 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 1858 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 1859 PetscInt *rowrange=mat->rmap.range; 1860 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1861 Mat A=aij->A,B=aij->B,C=*matredundant; 1862 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1863 PetscScalar *sbuf_a; 1864 PetscInt nzlocal=a->nz+b->nz; 1865 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 1866 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 1867 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 1868 PetscScalar *vals,*aworkA,*aworkB; 1869 PetscMPIInt tag1,tag2,tag3,imdex; 1870 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 1871 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 1872 MPI_Status recv_status,*send_status; 1873 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 1874 PetscInt **rbuf_j=PETSC_NULL; 1875 PetscScalar **rbuf_a=PETSC_NULL; 1876 Mat_Redundant *redund=PETSC_NULL; 1877 PetscContainer container; 1878 1879 PetscFunctionBegin; 1880 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1881 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1882 1883 if (reuse == MAT_REUSE_MATRIX) { 1884 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 1885 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 1886 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 1887 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 1888 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1889 if (container) { 1890 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1891 } else { 1892 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1893 } 1894 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 1895 1896 nsends = redund->nsends; 1897 nrecvs = redund->nrecvs; 1898 send_rank = redund->send_rank; recv_rank = send_rank + size; 1899 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 1900 sbuf_j = redund->sbuf_j; 1901 sbuf_a = redund->sbuf_a; 1902 rbuf_j = redund->rbuf_j; 1903 rbuf_a = redund->rbuf_a; 1904 } 1905 1906 if (reuse == MAT_INITIAL_MATRIX){ 1907 PetscMPIInt subrank,subsize; 1908 PetscInt nleftover,np_subcomm; 1909 /* get the destination processors' id send_rank, nsends and nrecvs */ 1910 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 1911 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 1912 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 1913 recv_rank = send_rank + size; 1914 np_subcomm = size/nsubcomm; 1915 nleftover = size - nsubcomm*np_subcomm; 1916 nsends = 0; nrecvs = 0; 1917 for (i=0; i<size; i++){ /* i=rank*/ 1918 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 1919 send_rank[nsends] = i; nsends++; 1920 recv_rank[nrecvs++] = i; 1921 } 1922 } 1923 if (rank >= size - nleftover){/* this proc is a leftover processor */ 1924 i = size-nleftover-1; 1925 j = 0; 1926 while (j < nsubcomm - nleftover){ 1927 send_rank[nsends++] = i; 1928 i--; j++; 1929 } 1930 } 1931 1932 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 1933 for (i=0; i<nleftover; i++){ 1934 recv_rank[nrecvs++] = size-nleftover+i; 1935 } 1936 } 1937 1938 /* allocate sbuf_j, sbuf_a */ 1939 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 1940 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 1941 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 1942 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1943 1944 /* copy mat's local entries into the buffers */ 1945 if (reuse == MAT_INITIAL_MATRIX){ 1946 rownz_max = 0; 1947 rptr = sbuf_j; 1948 cols = sbuf_j + rend-rstart + 1; 1949 vals = sbuf_a; 1950 rptr[0] = 0; 1951 for (i=0; i<rend-rstart; i++){ 1952 row = i + rstart; 1953 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1954 ncols = nzA + nzB; 1955 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1956 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1957 /* load the column indices for this row into cols */ 1958 lwrite = 0; 1959 for (l=0; l<nzB; l++) { 1960 if ((ctmp = bmap[cworkB[l]]) < cstart){ 1961 vals[lwrite] = aworkB[l]; 1962 cols[lwrite++] = ctmp; 1963 } 1964 } 1965 for (l=0; l<nzA; l++){ 1966 vals[lwrite] = aworkA[l]; 1967 cols[lwrite++] = cstart + cworkA[l]; 1968 } 1969 for (l=0; l<nzB; l++) { 1970 if ((ctmp = bmap[cworkB[l]]) >= cend){ 1971 vals[lwrite] = aworkB[l]; 1972 cols[lwrite++] = ctmp; 1973 } 1974 } 1975 vals += ncols; 1976 cols += ncols; 1977 rptr[i+1] = rptr[i] + ncols; 1978 if (rownz_max < ncols) rownz_max = ncols; 1979 } 1980 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 1981 } else { /* only copy matrix values into sbuf_a */ 1982 rptr = sbuf_j; 1983 vals = sbuf_a; 1984 rptr[0] = 0; 1985 for (i=0; i<rend-rstart; i++){ 1986 row = i + rstart; 1987 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1988 ncols = nzA + nzB; 1989 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1990 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1991 lwrite = 0; 1992 for (l=0; l<nzB; l++) { 1993 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 1994 } 1995 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 1996 for (l=0; l<nzB; l++) { 1997 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 1998 } 1999 vals += ncols; 2000 rptr[i+1] = rptr[i] + ncols; 2001 } 2002 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2003 2004 /* send nzlocal to others, and recv other's nzlocal */ 2005 /*--------------------------------------------------*/ 2006 if (reuse == MAT_INITIAL_MATRIX){ 2007 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2008 s_waits2 = s_waits3 + nsends; 2009 s_waits1 = s_waits2 + nsends; 2010 r_waits1 = s_waits1 + nsends; 2011 r_waits2 = r_waits1 + nrecvs; 2012 r_waits3 = r_waits2 + nrecvs; 2013 } else { 2014 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2015 r_waits3 = s_waits3 + nsends; 2016 } 2017 2018 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2019 if (reuse == MAT_INITIAL_MATRIX){ 2020 /* get new tags to keep the communication clean */ 2021 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2022 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2023 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2024 rbuf_nz = sbuf_nz + nsends; 2025 2026 /* post receives of other's nzlocal */ 2027 for (i=0; i<nrecvs; i++){ 2028 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2029 } 2030 /* send nzlocal to others */ 2031 for (i=0; i<nsends; i++){ 2032 sbuf_nz[i] = nzlocal; 2033 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2034 } 2035 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2036 count = nrecvs; 2037 while (count) { 2038 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2039 recv_rank[imdex] = recv_status.MPI_SOURCE; 2040 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2041 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2042 2043 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2044 rbuf_nz[imdex] += i + 2; 2045 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2046 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2047 count--; 2048 } 2049 /* wait on sends of nzlocal */ 2050 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2051 /* send mat->i,j to others, and recv from other's */ 2052 /*------------------------------------------------*/ 2053 for (i=0; i<nsends; i++){ 2054 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2055 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2056 } 2057 /* wait on receives of mat->i,j */ 2058 /*------------------------------*/ 2059 count = nrecvs; 2060 while (count) { 2061 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2062 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2063 count--; 2064 } 2065 /* wait on sends of mat->i,j */ 2066 /*---------------------------*/ 2067 if (nsends) { 2068 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2069 } 2070 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2071 2072 /* post receives, send and receive mat->a */ 2073 /*----------------------------------------*/ 2074 for (imdex=0; imdex<nrecvs; imdex++) { 2075 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2076 } 2077 for (i=0; i<nsends; i++){ 2078 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2079 } 2080 count = nrecvs; 2081 while (count) { 2082 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2083 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2084 count--; 2085 } 2086 if (nsends) { 2087 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2088 } 2089 2090 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2091 2092 /* create redundant matrix */ 2093 /*-------------------------*/ 2094 if (reuse == MAT_INITIAL_MATRIX){ 2095 /* compute rownz_max for preallocation */ 2096 for (imdex=0; imdex<nrecvs; imdex++){ 2097 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2098 rptr = rbuf_j[imdex]; 2099 for (i=0; i<j; i++){ 2100 ncols = rptr[i+1] - rptr[i]; 2101 if (rownz_max < ncols) rownz_max = ncols; 2102 } 2103 } 2104 2105 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2106 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2107 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2108 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2109 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2110 } else { 2111 C = *matredundant; 2112 } 2113 2114 /* insert local matrix entries */ 2115 rptr = sbuf_j; 2116 cols = sbuf_j + rend-rstart + 1; 2117 vals = sbuf_a; 2118 for (i=0; i<rend-rstart; i++){ 2119 row = i + rstart; 2120 ncols = rptr[i+1] - rptr[i]; 2121 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2122 vals += ncols; 2123 cols += ncols; 2124 } 2125 /* insert received matrix entries */ 2126 for (imdex=0; imdex<nrecvs; imdex++){ 2127 rstart = rowrange[recv_rank[imdex]]; 2128 rend = rowrange[recv_rank[imdex]+1]; 2129 rptr = rbuf_j[imdex]; 2130 cols = rbuf_j[imdex] + rend-rstart + 1; 2131 vals = rbuf_a[imdex]; 2132 for (i=0; i<rend-rstart; i++){ 2133 row = i + rstart; 2134 ncols = rptr[i+1] - rptr[i]; 2135 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2136 vals += ncols; 2137 cols += ncols; 2138 } 2139 } 2140 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2141 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2142 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2143 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2144 if (reuse == MAT_INITIAL_MATRIX){ 2145 PetscContainer container; 2146 *matredundant = C; 2147 /* create a supporting struct and attach it to C for reuse */ 2148 ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr); 2149 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2150 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2151 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2152 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2153 2154 redund->nzlocal = nzlocal; 2155 redund->nsends = nsends; 2156 redund->nrecvs = nrecvs; 2157 redund->send_rank = send_rank; 2158 redund->sbuf_nz = sbuf_nz; 2159 redund->sbuf_j = sbuf_j; 2160 redund->sbuf_a = sbuf_a; 2161 redund->rbuf_j = rbuf_j; 2162 redund->rbuf_a = rbuf_a; 2163 2164 redund->MatDestroy = C->ops->destroy; 2165 C->ops->destroy = MatDestroy_MatRedundant; 2166 } 2167 PetscFunctionReturn(0); 2168 } 2169 2170 /* -------------------------------------------------------------------*/ 2171 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2172 MatGetRow_MPIAIJ, 2173 MatRestoreRow_MPIAIJ, 2174 MatMult_MPIAIJ, 2175 /* 4*/ MatMultAdd_MPIAIJ, 2176 MatMultTranspose_MPIAIJ, 2177 MatMultTransposeAdd_MPIAIJ, 2178 #ifdef PETSC_HAVE_PBGL 2179 MatSolve_MPIAIJ, 2180 #else 2181 0, 2182 #endif 2183 0, 2184 0, 2185 /*10*/ 0, 2186 0, 2187 0, 2188 MatRelax_MPIAIJ, 2189 MatTranspose_MPIAIJ, 2190 /*15*/ MatGetInfo_MPIAIJ, 2191 MatEqual_MPIAIJ, 2192 MatGetDiagonal_MPIAIJ, 2193 MatDiagonalScale_MPIAIJ, 2194 MatNorm_MPIAIJ, 2195 /*20*/ MatAssemblyBegin_MPIAIJ, 2196 MatAssemblyEnd_MPIAIJ, 2197 0, 2198 MatSetOption_MPIAIJ, 2199 MatZeroEntries_MPIAIJ, 2200 /*25*/ MatZeroRows_MPIAIJ, 2201 0, 2202 #ifdef PETSC_HAVE_PBGL 2203 MatLUFactorNumeric_MPIAIJ, 2204 #else 2205 0, 2206 #endif 2207 0, 2208 0, 2209 /*30*/ MatSetUpPreallocation_MPIAIJ, 2210 #ifdef PETSC_HAVE_PBGL 2211 MatILUFactorSymbolic_MPIAIJ, 2212 #else 2213 0, 2214 #endif 2215 0, 2216 0, 2217 0, 2218 /*35*/ MatDuplicate_MPIAIJ, 2219 0, 2220 0, 2221 0, 2222 0, 2223 /*40*/ MatAXPY_MPIAIJ, 2224 MatGetSubMatrices_MPIAIJ, 2225 MatIncreaseOverlap_MPIAIJ, 2226 MatGetValues_MPIAIJ, 2227 MatCopy_MPIAIJ, 2228 /*45*/ 0, 2229 MatScale_MPIAIJ, 2230 0, 2231 0, 2232 0, 2233 /*50*/ MatSetBlockSize_MPIAIJ, 2234 0, 2235 0, 2236 0, 2237 0, 2238 /*55*/ MatFDColoringCreate_MPIAIJ, 2239 0, 2240 MatSetUnfactored_MPIAIJ, 2241 MatPermute_MPIAIJ, 2242 0, 2243 /*60*/ MatGetSubMatrix_MPIAIJ, 2244 MatDestroy_MPIAIJ, 2245 MatView_MPIAIJ, 2246 0, 2247 0, 2248 /*65*/ 0, 2249 0, 2250 0, 2251 0, 2252 0, 2253 /*70*/ 0, 2254 0, 2255 MatSetColoring_MPIAIJ, 2256 #if defined(PETSC_HAVE_ADIC) 2257 MatSetValuesAdic_MPIAIJ, 2258 #else 2259 0, 2260 #endif 2261 MatSetValuesAdifor_MPIAIJ, 2262 /*75*/ 0, 2263 0, 2264 0, 2265 0, 2266 0, 2267 /*80*/ 0, 2268 0, 2269 0, 2270 0, 2271 /*84*/ MatLoad_MPIAIJ, 2272 0, 2273 0, 2274 0, 2275 0, 2276 0, 2277 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2278 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2279 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2280 MatPtAP_Basic, 2281 MatPtAPSymbolic_MPIAIJ, 2282 /*95*/ MatPtAPNumeric_MPIAIJ, 2283 0, 2284 0, 2285 0, 2286 0, 2287 /*100*/0, 2288 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2289 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2290 MatConjugate_MPIAIJ, 2291 0, 2292 /*105*/MatSetValuesRow_MPIAIJ, 2293 MatRealPart_MPIAIJ, 2294 MatImaginaryPart_MPIAIJ, 2295 0, 2296 0, 2297 /*110*/0, 2298 MatGetRedundantMatrix_MPIAIJ}; 2299 2300 /* ----------------------------------------------------------------------------------------*/ 2301 2302 EXTERN_C_BEGIN 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2305 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2306 { 2307 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2308 PetscErrorCode ierr; 2309 2310 PetscFunctionBegin; 2311 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2312 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2313 PetscFunctionReturn(0); 2314 } 2315 EXTERN_C_END 2316 2317 EXTERN_C_BEGIN 2318 #undef __FUNCT__ 2319 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2320 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2321 { 2322 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2323 PetscErrorCode ierr; 2324 2325 PetscFunctionBegin; 2326 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2327 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2328 PetscFunctionReturn(0); 2329 } 2330 EXTERN_C_END 2331 2332 #include "petscpc.h" 2333 EXTERN_C_BEGIN 2334 #undef __FUNCT__ 2335 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2336 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2337 { 2338 Mat_MPIAIJ *b; 2339 PetscErrorCode ierr; 2340 PetscInt i; 2341 2342 PetscFunctionBegin; 2343 B->preallocated = PETSC_TRUE; 2344 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2345 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2346 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2347 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2348 2349 B->rmap.bs = B->cmap.bs = 1; 2350 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2351 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2352 if (d_nnz) { 2353 for (i=0; i<B->rmap.n; i++) { 2354 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2355 } 2356 } 2357 if (o_nnz) { 2358 for (i=0; i<B->rmap.n; i++) { 2359 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2360 } 2361 } 2362 b = (Mat_MPIAIJ*)B->data; 2363 2364 /* Explicitly create 2 MATSEQAIJ matrices. */ 2365 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2366 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2367 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2368 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2369 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2370 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2371 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2372 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2373 2374 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2375 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2376 2377 PetscFunctionReturn(0); 2378 } 2379 EXTERN_C_END 2380 2381 #undef __FUNCT__ 2382 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2383 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2384 { 2385 Mat mat; 2386 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2387 PetscErrorCode ierr; 2388 2389 PetscFunctionBegin; 2390 *newmat = 0; 2391 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2392 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2393 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2394 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2395 a = (Mat_MPIAIJ*)mat->data; 2396 2397 mat->factor = matin->factor; 2398 mat->rmap.bs = matin->rmap.bs; 2399 mat->assembled = PETSC_TRUE; 2400 mat->insertmode = NOT_SET_VALUES; 2401 mat->preallocated = PETSC_TRUE; 2402 2403 a->size = oldmat->size; 2404 a->rank = oldmat->rank; 2405 a->donotstash = oldmat->donotstash; 2406 a->roworiented = oldmat->roworiented; 2407 a->rowindices = 0; 2408 a->rowvalues = 0; 2409 a->getrowactive = PETSC_FALSE; 2410 2411 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2412 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2413 2414 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2415 if (oldmat->colmap) { 2416 #if defined (PETSC_USE_CTABLE) 2417 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2418 #else 2419 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2420 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2421 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2422 #endif 2423 } else a->colmap = 0; 2424 if (oldmat->garray) { 2425 PetscInt len; 2426 len = oldmat->B->cmap.n; 2427 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2428 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2429 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2430 } else a->garray = 0; 2431 2432 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2433 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2434 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2435 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2436 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2437 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2438 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2439 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2440 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2441 *newmat = mat; 2442 PetscFunctionReturn(0); 2443 } 2444 2445 #include "petscsys.h" 2446 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "MatLoad_MPIAIJ" 2449 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2450 { 2451 Mat A; 2452 PetscScalar *vals,*svals; 2453 MPI_Comm comm = ((PetscObject)viewer)->comm; 2454 MPI_Status status; 2455 PetscErrorCode ierr; 2456 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2457 PetscInt i,nz,j,rstart,rend,mmax; 2458 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2459 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2460 PetscInt cend,cstart,n,*rowners; 2461 int fd; 2462 2463 PetscFunctionBegin; 2464 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2465 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2466 if (!rank) { 2467 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2468 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2469 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2470 } 2471 2472 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2473 M = header[1]; N = header[2]; 2474 /* determine ownership of all rows */ 2475 m = M/size + ((M % size) > rank); 2476 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2477 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2478 2479 /* First process needs enough room for process with most rows */ 2480 if (!rank) { 2481 mmax = rowners[1]; 2482 for (i=2; i<size; i++) { 2483 mmax = PetscMax(mmax,rowners[i]); 2484 } 2485 } else mmax = m; 2486 2487 rowners[0] = 0; 2488 for (i=2; i<=size; i++) { 2489 rowners[i] += rowners[i-1]; 2490 } 2491 rstart = rowners[rank]; 2492 rend = rowners[rank+1]; 2493 2494 /* distribute row lengths to all processors */ 2495 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2496 if (!rank) { 2497 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2498 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2499 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2500 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2501 for (j=0; j<m; j++) { 2502 procsnz[0] += ourlens[j]; 2503 } 2504 for (i=1; i<size; i++) { 2505 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2506 /* calculate the number of nonzeros on each processor */ 2507 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2508 procsnz[i] += rowlengths[j]; 2509 } 2510 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2511 } 2512 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2513 } else { 2514 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2515 } 2516 2517 if (!rank) { 2518 /* determine max buffer needed and allocate it */ 2519 maxnz = 0; 2520 for (i=0; i<size; i++) { 2521 maxnz = PetscMax(maxnz,procsnz[i]); 2522 } 2523 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2524 2525 /* read in my part of the matrix column indices */ 2526 nz = procsnz[0]; 2527 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2528 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2529 2530 /* read in every one elses and ship off */ 2531 for (i=1; i<size; i++) { 2532 nz = procsnz[i]; 2533 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2534 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2535 } 2536 ierr = PetscFree(cols);CHKERRQ(ierr); 2537 } else { 2538 /* determine buffer space needed for message */ 2539 nz = 0; 2540 for (i=0; i<m; i++) { 2541 nz += ourlens[i]; 2542 } 2543 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2544 2545 /* receive message of column indices*/ 2546 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2547 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2548 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2549 } 2550 2551 /* determine column ownership if matrix is not square */ 2552 if (N != M) { 2553 n = N/size + ((N % size) > rank); 2554 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2555 cstart = cend - n; 2556 } else { 2557 cstart = rstart; 2558 cend = rend; 2559 n = cend - cstart; 2560 } 2561 2562 /* loop over local rows, determining number of off diagonal entries */ 2563 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2564 jj = 0; 2565 for (i=0; i<m; i++) { 2566 for (j=0; j<ourlens[i]; j++) { 2567 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2568 jj++; 2569 } 2570 } 2571 2572 /* create our matrix */ 2573 for (i=0; i<m; i++) { 2574 ourlens[i] -= offlens[i]; 2575 } 2576 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2577 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2578 ierr = MatSetType(A,type);CHKERRQ(ierr); 2579 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2580 2581 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2582 for (i=0; i<m; i++) { 2583 ourlens[i] += offlens[i]; 2584 } 2585 2586 if (!rank) { 2587 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2588 2589 /* read in my part of the matrix numerical values */ 2590 nz = procsnz[0]; 2591 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2592 2593 /* insert into matrix */ 2594 jj = rstart; 2595 smycols = mycols; 2596 svals = vals; 2597 for (i=0; i<m; i++) { 2598 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2599 smycols += ourlens[i]; 2600 svals += ourlens[i]; 2601 jj++; 2602 } 2603 2604 /* read in other processors and ship out */ 2605 for (i=1; i<size; i++) { 2606 nz = procsnz[i]; 2607 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2608 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2609 } 2610 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2611 } else { 2612 /* receive numeric values */ 2613 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2614 2615 /* receive message of values*/ 2616 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2617 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2618 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2619 2620 /* insert into matrix */ 2621 jj = rstart; 2622 smycols = mycols; 2623 svals = vals; 2624 for (i=0; i<m; i++) { 2625 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2626 smycols += ourlens[i]; 2627 svals += ourlens[i]; 2628 jj++; 2629 } 2630 } 2631 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2632 ierr = PetscFree(vals);CHKERRQ(ierr); 2633 ierr = PetscFree(mycols);CHKERRQ(ierr); 2634 ierr = PetscFree(rowners);CHKERRQ(ierr); 2635 2636 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2637 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2638 *newmat = A; 2639 PetscFunctionReturn(0); 2640 } 2641 2642 #undef __FUNCT__ 2643 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2644 /* 2645 Not great since it makes two copies of the submatrix, first an SeqAIJ 2646 in local and then by concatenating the local matrices the end result. 2647 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2648 */ 2649 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2650 { 2651 PetscErrorCode ierr; 2652 PetscMPIInt rank,size; 2653 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2654 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2655 Mat *local,M,Mreuse; 2656 PetscScalar *vwork,*aa; 2657 MPI_Comm comm = mat->comm; 2658 Mat_SeqAIJ *aij; 2659 2660 2661 PetscFunctionBegin; 2662 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2663 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2664 2665 if (call == MAT_REUSE_MATRIX) { 2666 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2667 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2668 local = &Mreuse; 2669 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2670 } else { 2671 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2672 Mreuse = *local; 2673 ierr = PetscFree(local);CHKERRQ(ierr); 2674 } 2675 2676 /* 2677 m - number of local rows 2678 n - number of columns (same on all processors) 2679 rstart - first row in new global matrix generated 2680 */ 2681 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2682 if (call == MAT_INITIAL_MATRIX) { 2683 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2684 ii = aij->i; 2685 jj = aij->j; 2686 2687 /* 2688 Determine the number of non-zeros in the diagonal and off-diagonal 2689 portions of the matrix in order to do correct preallocation 2690 */ 2691 2692 /* first get start and end of "diagonal" columns */ 2693 if (csize == PETSC_DECIDE) { 2694 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2695 if (mglobal == n) { /* square matrix */ 2696 nlocal = m; 2697 } else { 2698 nlocal = n/size + ((n % size) > rank); 2699 } 2700 } else { 2701 nlocal = csize; 2702 } 2703 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2704 rstart = rend - nlocal; 2705 if (rank == size - 1 && rend != n) { 2706 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2707 } 2708 2709 /* next, compute all the lengths */ 2710 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2711 olens = dlens + m; 2712 for (i=0; i<m; i++) { 2713 jend = ii[i+1] - ii[i]; 2714 olen = 0; 2715 dlen = 0; 2716 for (j=0; j<jend; j++) { 2717 if (*jj < rstart || *jj >= rend) olen++; 2718 else dlen++; 2719 jj++; 2720 } 2721 olens[i] = olen; 2722 dlens[i] = dlen; 2723 } 2724 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2725 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2726 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2727 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2728 ierr = PetscFree(dlens);CHKERRQ(ierr); 2729 } else { 2730 PetscInt ml,nl; 2731 2732 M = *newmat; 2733 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2734 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2735 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2736 /* 2737 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2738 rather than the slower MatSetValues(). 2739 */ 2740 M->was_assembled = PETSC_TRUE; 2741 M->assembled = PETSC_FALSE; 2742 } 2743 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2744 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2745 ii = aij->i; 2746 jj = aij->j; 2747 aa = aij->a; 2748 for (i=0; i<m; i++) { 2749 row = rstart + i; 2750 nz = ii[i+1] - ii[i]; 2751 cwork = jj; jj += nz; 2752 vwork = aa; aa += nz; 2753 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2754 } 2755 2756 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2757 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2758 *newmat = M; 2759 2760 /* save submatrix used in processor for next request */ 2761 if (call == MAT_INITIAL_MATRIX) { 2762 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2763 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2764 } 2765 2766 PetscFunctionReturn(0); 2767 } 2768 2769 EXTERN_C_BEGIN 2770 #undef __FUNCT__ 2771 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2772 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2773 { 2774 PetscInt m,cstart, cend,j,nnz,i,d; 2775 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2776 const PetscInt *JJ; 2777 PetscScalar *values; 2778 PetscErrorCode ierr; 2779 2780 PetscFunctionBegin; 2781 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2782 2783 B->rmap.bs = B->cmap.bs = 1; 2784 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2785 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2786 m = B->rmap.n; 2787 cstart = B->cmap.rstart; 2788 cend = B->cmap.rend; 2789 rstart = B->rmap.rstart; 2790 2791 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2792 o_nnz = d_nnz + m; 2793 2794 for (i=0; i<m; i++) { 2795 nnz = Ii[i+1]- Ii[i]; 2796 JJ = J + Ii[i]; 2797 nnz_max = PetscMax(nnz_max,nnz); 2798 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2799 for (j=0; j<nnz; j++) { 2800 if (*JJ >= cstart) break; 2801 JJ++; 2802 } 2803 d = 0; 2804 for (; j<nnz; j++) { 2805 if (*JJ++ >= cend) break; 2806 d++; 2807 } 2808 d_nnz[i] = d; 2809 o_nnz[i] = nnz - d; 2810 } 2811 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2812 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2813 2814 if (v) values = (PetscScalar*)v; 2815 else { 2816 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2817 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2818 } 2819 2820 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2821 for (i=0; i<m; i++) { 2822 ii = i + rstart; 2823 nnz = Ii[i+1]- Ii[i]; 2824 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2825 } 2826 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2827 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2828 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2829 2830 if (!v) { 2831 ierr = PetscFree(values);CHKERRQ(ierr); 2832 } 2833 PetscFunctionReturn(0); 2834 } 2835 EXTERN_C_END 2836 2837 #undef __FUNCT__ 2838 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2839 /*@ 2840 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2841 (the default parallel PETSc format). 2842 2843 Collective on MPI_Comm 2844 2845 Input Parameters: 2846 + B - the matrix 2847 . i - the indices into j for the start of each local row (starts with zero) 2848 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2849 - v - optional values in the matrix 2850 2851 Level: developer 2852 2853 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2854 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2855 2856 .keywords: matrix, aij, compressed row, sparse, parallel 2857 2858 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2859 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2860 @*/ 2861 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2862 { 2863 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2864 2865 PetscFunctionBegin; 2866 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2867 if (f) { 2868 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2869 } 2870 PetscFunctionReturn(0); 2871 } 2872 2873 #undef __FUNCT__ 2874 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2875 /*@C 2876 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2877 (the default parallel PETSc format). For good matrix assembly performance 2878 the user should preallocate the matrix storage by setting the parameters 2879 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2880 performance can be increased by more than a factor of 50. 2881 2882 Collective on MPI_Comm 2883 2884 Input Parameters: 2885 + A - the matrix 2886 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2887 (same value is used for all local rows) 2888 . d_nnz - array containing the number of nonzeros in the various rows of the 2889 DIAGONAL portion of the local submatrix (possibly different for each row) 2890 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2891 The size of this array is equal to the number of local rows, i.e 'm'. 2892 You must leave room for the diagonal entry even if it is zero. 2893 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2894 submatrix (same value is used for all local rows). 2895 - o_nnz - array containing the number of nonzeros in the various rows of the 2896 OFF-DIAGONAL portion of the local submatrix (possibly different for 2897 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2898 structure. The size of this array is equal to the number 2899 of local rows, i.e 'm'. 2900 2901 If the *_nnz parameter is given then the *_nz parameter is ignored 2902 2903 The AIJ format (also called the Yale sparse matrix format or 2904 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2905 storage. The stored row and column indices begin with zero. See the users manual for details. 2906 2907 The parallel matrix is partitioned such that the first m0 rows belong to 2908 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2909 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2910 2911 The DIAGONAL portion of the local submatrix of a processor can be defined 2912 as the submatrix which is obtained by extraction the part corresponding 2913 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2914 first row that belongs to the processor, and r2 is the last row belonging 2915 to the this processor. This is a square mxm matrix. The remaining portion 2916 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2917 2918 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2919 2920 Example usage: 2921 2922 Consider the following 8x8 matrix with 34 non-zero values, that is 2923 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2924 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2925 as follows: 2926 2927 .vb 2928 1 2 0 | 0 3 0 | 0 4 2929 Proc0 0 5 6 | 7 0 0 | 8 0 2930 9 0 10 | 11 0 0 | 12 0 2931 ------------------------------------- 2932 13 0 14 | 15 16 17 | 0 0 2933 Proc1 0 18 0 | 19 20 21 | 0 0 2934 0 0 0 | 22 23 0 | 24 0 2935 ------------------------------------- 2936 Proc2 25 26 27 | 0 0 28 | 29 0 2937 30 0 0 | 31 32 33 | 0 34 2938 .ve 2939 2940 This can be represented as a collection of submatrices as: 2941 2942 .vb 2943 A B C 2944 D E F 2945 G H I 2946 .ve 2947 2948 Where the submatrices A,B,C are owned by proc0, D,E,F are 2949 owned by proc1, G,H,I are owned by proc2. 2950 2951 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2952 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2953 The 'M','N' parameters are 8,8, and have the same values on all procs. 2954 2955 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2956 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2957 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2958 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2959 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2960 matrix, ans [DF] as another SeqAIJ matrix. 2961 2962 When d_nz, o_nz parameters are specified, d_nz storage elements are 2963 allocated for every row of the local diagonal submatrix, and o_nz 2964 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2965 One way to choose d_nz and o_nz is to use the max nonzerors per local 2966 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2967 In this case, the values of d_nz,o_nz are: 2968 .vb 2969 proc0 : dnz = 2, o_nz = 2 2970 proc1 : dnz = 3, o_nz = 2 2971 proc2 : dnz = 1, o_nz = 4 2972 .ve 2973 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2974 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2975 for proc3. i.e we are using 12+15+10=37 storage locations to store 2976 34 values. 2977 2978 When d_nnz, o_nnz parameters are specified, the storage is specified 2979 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2980 In the above case the values for d_nnz,o_nnz are: 2981 .vb 2982 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2983 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2984 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2985 .ve 2986 Here the space allocated is sum of all the above values i.e 34, and 2987 hence pre-allocation is perfect. 2988 2989 Level: intermediate 2990 2991 .keywords: matrix, aij, compressed row, sparse, parallel 2992 2993 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2994 MPIAIJ 2995 @*/ 2996 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2997 { 2998 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2999 3000 PetscFunctionBegin; 3001 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3002 if (f) { 3003 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3004 } 3005 PetscFunctionReturn(0); 3006 } 3007 3008 #undef __FUNCT__ 3009 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3010 /*@C 3011 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3012 CSR format the local rows. 3013 3014 Collective on MPI_Comm 3015 3016 Input Parameters: 3017 + comm - MPI communicator 3018 . m - number of local rows (Cannot be PETSC_DECIDE) 3019 . n - This value should be the same as the local size used in creating the 3020 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3021 calculated if N is given) For square matrices n is almost always m. 3022 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3023 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3024 . i - row indices 3025 . j - column indices 3026 - a - matrix values 3027 3028 Output Parameter: 3029 . mat - the matrix 3030 3031 Level: intermediate 3032 3033 Notes: 3034 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3035 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3036 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3037 3038 The i and j indices are 0 based 3039 3040 .keywords: matrix, aij, compressed row, sparse, parallel 3041 3042 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3043 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3044 @*/ 3045 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3046 { 3047 PetscErrorCode ierr; 3048 3049 PetscFunctionBegin; 3050 if (i[0]) { 3051 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3052 } 3053 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3054 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3055 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3056 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3057 PetscFunctionReturn(0); 3058 } 3059 3060 #undef __FUNCT__ 3061 #define __FUNCT__ "MatCreateMPIAIJ" 3062 /*@C 3063 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3064 (the default parallel PETSc format). For good matrix assembly performance 3065 the user should preallocate the matrix storage by setting the parameters 3066 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3067 performance can be increased by more than a factor of 50. 3068 3069 Collective on MPI_Comm 3070 3071 Input Parameters: 3072 + comm - MPI communicator 3073 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3074 This value should be the same as the local size used in creating the 3075 y vector for the matrix-vector product y = Ax. 3076 . n - This value should be the same as the local size used in creating the 3077 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3078 calculated if N is given) For square matrices n is almost always m. 3079 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3080 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3081 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3082 (same value is used for all local rows) 3083 . d_nnz - array containing the number of nonzeros in the various rows of the 3084 DIAGONAL portion of the local submatrix (possibly different for each row) 3085 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3086 The size of this array is equal to the number of local rows, i.e 'm'. 3087 You must leave room for the diagonal entry even if it is zero. 3088 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3089 submatrix (same value is used for all local rows). 3090 - o_nnz - array containing the number of nonzeros in the various rows of the 3091 OFF-DIAGONAL portion of the local submatrix (possibly different for 3092 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3093 structure. The size of this array is equal to the number 3094 of local rows, i.e 'm'. 3095 3096 Output Parameter: 3097 . A - the matrix 3098 3099 Notes: 3100 If the *_nnz parameter is given then the *_nz parameter is ignored 3101 3102 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3103 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3104 storage requirements for this matrix. 3105 3106 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3107 processor than it must be used on all processors that share the object for 3108 that argument. 3109 3110 The user MUST specify either the local or global matrix dimensions 3111 (possibly both). 3112 3113 The parallel matrix is partitioned such that the first m0 rows belong to 3114 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3115 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3116 3117 The DIAGONAL portion of the local submatrix of a processor can be defined 3118 as the submatrix which is obtained by extraction the part corresponding 3119 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3120 first row that belongs to the processor, and r2 is the last row belonging 3121 to the this processor. This is a square mxm matrix. The remaining portion 3122 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3123 3124 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3125 3126 When calling this routine with a single process communicator, a matrix of 3127 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3128 type of communicator, use the construction mechanism: 3129 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3130 3131 By default, this format uses inodes (identical nodes) when possible. 3132 We search for consecutive rows with the same nonzero structure, thereby 3133 reusing matrix information to achieve increased efficiency. 3134 3135 Options Database Keys: 3136 + -mat_no_inode - Do not use inodes 3137 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3138 - -mat_aij_oneindex - Internally use indexing starting at 1 3139 rather than 0. Note that when calling MatSetValues(), 3140 the user still MUST index entries starting at 0! 3141 3142 3143 Example usage: 3144 3145 Consider the following 8x8 matrix with 34 non-zero values, that is 3146 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3147 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3148 as follows: 3149 3150 .vb 3151 1 2 0 | 0 3 0 | 0 4 3152 Proc0 0 5 6 | 7 0 0 | 8 0 3153 9 0 10 | 11 0 0 | 12 0 3154 ------------------------------------- 3155 13 0 14 | 15 16 17 | 0 0 3156 Proc1 0 18 0 | 19 20 21 | 0 0 3157 0 0 0 | 22 23 0 | 24 0 3158 ------------------------------------- 3159 Proc2 25 26 27 | 0 0 28 | 29 0 3160 30 0 0 | 31 32 33 | 0 34 3161 .ve 3162 3163 This can be represented as a collection of submatrices as: 3164 3165 .vb 3166 A B C 3167 D E F 3168 G H I 3169 .ve 3170 3171 Where the submatrices A,B,C are owned by proc0, D,E,F are 3172 owned by proc1, G,H,I are owned by proc2. 3173 3174 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3175 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3176 The 'M','N' parameters are 8,8, and have the same values on all procs. 3177 3178 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3179 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3180 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3181 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3182 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3183 matrix, ans [DF] as another SeqAIJ matrix. 3184 3185 When d_nz, o_nz parameters are specified, d_nz storage elements are 3186 allocated for every row of the local diagonal submatrix, and o_nz 3187 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3188 One way to choose d_nz and o_nz is to use the max nonzerors per local 3189 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3190 In this case, the values of d_nz,o_nz are: 3191 .vb 3192 proc0 : dnz = 2, o_nz = 2 3193 proc1 : dnz = 3, o_nz = 2 3194 proc2 : dnz = 1, o_nz = 4 3195 .ve 3196 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3197 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3198 for proc3. i.e we are using 12+15+10=37 storage locations to store 3199 34 values. 3200 3201 When d_nnz, o_nnz parameters are specified, the storage is specified 3202 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3203 In the above case the values for d_nnz,o_nnz are: 3204 .vb 3205 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3206 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3207 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3208 .ve 3209 Here the space allocated is sum of all the above values i.e 34, and 3210 hence pre-allocation is perfect. 3211 3212 Level: intermediate 3213 3214 .keywords: matrix, aij, compressed row, sparse, parallel 3215 3216 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3217 MPIAIJ, MatCreateMPIAIJWithArrays() 3218 @*/ 3219 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3220 { 3221 PetscErrorCode ierr; 3222 PetscMPIInt size; 3223 3224 PetscFunctionBegin; 3225 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3226 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3227 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3228 if (size > 1) { 3229 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3230 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3231 } else { 3232 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3233 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3234 } 3235 PetscFunctionReturn(0); 3236 } 3237 3238 #undef __FUNCT__ 3239 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3240 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3241 { 3242 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3243 3244 PetscFunctionBegin; 3245 *Ad = a->A; 3246 *Ao = a->B; 3247 *colmap = a->garray; 3248 PetscFunctionReturn(0); 3249 } 3250 3251 #undef __FUNCT__ 3252 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3253 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3254 { 3255 PetscErrorCode ierr; 3256 PetscInt i; 3257 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3258 3259 PetscFunctionBegin; 3260 if (coloring->ctype == IS_COLORING_GLOBAL) { 3261 ISColoringValue *allcolors,*colors; 3262 ISColoring ocoloring; 3263 3264 /* set coloring for diagonal portion */ 3265 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3266 3267 /* set coloring for off-diagonal portion */ 3268 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3269 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3270 for (i=0; i<a->B->cmap.n; i++) { 3271 colors[i] = allcolors[a->garray[i]]; 3272 } 3273 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3274 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3275 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3276 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3277 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3278 ISColoringValue *colors; 3279 PetscInt *larray; 3280 ISColoring ocoloring; 3281 3282 /* set coloring for diagonal portion */ 3283 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3284 for (i=0; i<a->A->cmap.n; i++) { 3285 larray[i] = i + A->cmap.rstart; 3286 } 3287 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3288 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3289 for (i=0; i<a->A->cmap.n; i++) { 3290 colors[i] = coloring->colors[larray[i]]; 3291 } 3292 ierr = PetscFree(larray);CHKERRQ(ierr); 3293 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3294 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3295 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3296 3297 /* set coloring for off-diagonal portion */ 3298 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3299 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3300 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3301 for (i=0; i<a->B->cmap.n; i++) { 3302 colors[i] = coloring->colors[larray[i]]; 3303 } 3304 ierr = PetscFree(larray);CHKERRQ(ierr); 3305 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3306 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3307 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3308 } else { 3309 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3310 } 3311 3312 PetscFunctionReturn(0); 3313 } 3314 3315 #if defined(PETSC_HAVE_ADIC) 3316 #undef __FUNCT__ 3317 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3318 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3319 { 3320 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3321 PetscErrorCode ierr; 3322 3323 PetscFunctionBegin; 3324 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3325 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3326 PetscFunctionReturn(0); 3327 } 3328 #endif 3329 3330 #undef __FUNCT__ 3331 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3332 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3333 { 3334 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3335 PetscErrorCode ierr; 3336 3337 PetscFunctionBegin; 3338 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3339 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3340 PetscFunctionReturn(0); 3341 } 3342 3343 #undef __FUNCT__ 3344 #define __FUNCT__ "MatMerge" 3345 /*@C 3346 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3347 matrices from each processor 3348 3349 Collective on MPI_Comm 3350 3351 Input Parameters: 3352 + comm - the communicators the parallel matrix will live on 3353 . inmat - the input sequential matrices 3354 . n - number of local columns (or PETSC_DECIDE) 3355 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3356 3357 Output Parameter: 3358 . outmat - the parallel matrix generated 3359 3360 Level: advanced 3361 3362 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3363 3364 @*/ 3365 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3366 { 3367 PetscErrorCode ierr; 3368 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3369 PetscInt *indx; 3370 PetscScalar *values; 3371 3372 PetscFunctionBegin; 3373 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3374 if (scall == MAT_INITIAL_MATRIX){ 3375 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3376 if (n == PETSC_DECIDE){ 3377 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3378 } 3379 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3380 rstart -= m; 3381 3382 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3383 for (i=0;i<m;i++) { 3384 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3385 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3386 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3387 } 3388 /* This routine will ONLY return MPIAIJ type matrix */ 3389 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3390 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3391 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3392 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3393 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3394 3395 } else if (scall == MAT_REUSE_MATRIX){ 3396 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3397 } else { 3398 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3399 } 3400 3401 for (i=0;i<m;i++) { 3402 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3403 Ii = i + rstart; 3404 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3405 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3406 } 3407 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3408 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3409 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3410 3411 PetscFunctionReturn(0); 3412 } 3413 3414 #undef __FUNCT__ 3415 #define __FUNCT__ "MatFileSplit" 3416 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3417 { 3418 PetscErrorCode ierr; 3419 PetscMPIInt rank; 3420 PetscInt m,N,i,rstart,nnz; 3421 size_t len; 3422 const PetscInt *indx; 3423 PetscViewer out; 3424 char *name; 3425 Mat B; 3426 const PetscScalar *values; 3427 3428 PetscFunctionBegin; 3429 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3430 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3431 /* Should this be the type of the diagonal block of A? */ 3432 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3433 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3434 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3435 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3436 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3437 for (i=0;i<m;i++) { 3438 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3439 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3440 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3441 } 3442 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3443 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3444 3445 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3446 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3447 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3448 sprintf(name,"%s.%d",outfile,rank); 3449 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3450 ierr = PetscFree(name); 3451 ierr = MatView(B,out);CHKERRQ(ierr); 3452 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3453 ierr = MatDestroy(B);CHKERRQ(ierr); 3454 PetscFunctionReturn(0); 3455 } 3456 3457 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3458 #undef __FUNCT__ 3459 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3460 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3461 { 3462 PetscErrorCode ierr; 3463 Mat_Merge_SeqsToMPI *merge; 3464 PetscContainer container; 3465 3466 PetscFunctionBegin; 3467 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3468 if (container) { 3469 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3470 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3471 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3472 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3473 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3474 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3475 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3476 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3477 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3478 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3479 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3480 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3481 3482 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3483 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3484 } 3485 ierr = PetscFree(merge);CHKERRQ(ierr); 3486 3487 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3488 PetscFunctionReturn(0); 3489 } 3490 3491 #include "src/mat/utils/freespace.h" 3492 #include "petscbt.h" 3493 static PetscEvent logkey_seqstompinum = 0; 3494 #undef __FUNCT__ 3495 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3496 /*@C 3497 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3498 matrices from each processor 3499 3500 Collective on MPI_Comm 3501 3502 Input Parameters: 3503 + comm - the communicators the parallel matrix will live on 3504 . seqmat - the input sequential matrices 3505 . m - number of local rows (or PETSC_DECIDE) 3506 . n - number of local columns (or PETSC_DECIDE) 3507 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3508 3509 Output Parameter: 3510 . mpimat - the parallel matrix generated 3511 3512 Level: advanced 3513 3514 Notes: 3515 The dimensions of the sequential matrix in each processor MUST be the same. 3516 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3517 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3518 @*/ 3519 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3520 { 3521 PetscErrorCode ierr; 3522 MPI_Comm comm=mpimat->comm; 3523 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3524 PetscMPIInt size,rank,taga,*len_s; 3525 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3526 PetscInt proc,m; 3527 PetscInt **buf_ri,**buf_rj; 3528 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3529 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3530 MPI_Request *s_waits,*r_waits; 3531 MPI_Status *status; 3532 MatScalar *aa=a->a,**abuf_r,*ba_i; 3533 Mat_Merge_SeqsToMPI *merge; 3534 PetscContainer container; 3535 3536 PetscFunctionBegin; 3537 if (!logkey_seqstompinum) { 3538 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3539 } 3540 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3541 3542 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3543 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3544 3545 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3546 if (container) { 3547 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3548 } 3549 bi = merge->bi; 3550 bj = merge->bj; 3551 buf_ri = merge->buf_ri; 3552 buf_rj = merge->buf_rj; 3553 3554 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3555 owners = merge->rowmap.range; 3556 len_s = merge->len_s; 3557 3558 /* send and recv matrix values */ 3559 /*-----------------------------*/ 3560 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3561 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3562 3563 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3564 for (proc=0,k=0; proc<size; proc++){ 3565 if (!len_s[proc]) continue; 3566 i = owners[proc]; 3567 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3568 k++; 3569 } 3570 3571 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3572 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3573 ierr = PetscFree(status);CHKERRQ(ierr); 3574 3575 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3576 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3577 3578 /* insert mat values of mpimat */ 3579 /*----------------------------*/ 3580 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3581 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3582 nextrow = buf_ri_k + merge->nrecv; 3583 nextai = nextrow + merge->nrecv; 3584 3585 for (k=0; k<merge->nrecv; k++){ 3586 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3587 nrows = *(buf_ri_k[k]); 3588 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3589 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3590 } 3591 3592 /* set values of ba */ 3593 m = merge->rowmap.n; 3594 for (i=0; i<m; i++) { 3595 arow = owners[rank] + i; 3596 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3597 bnzi = bi[i+1] - bi[i]; 3598 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3599 3600 /* add local non-zero vals of this proc's seqmat into ba */ 3601 anzi = ai[arow+1] - ai[arow]; 3602 aj = a->j + ai[arow]; 3603 aa = a->a + ai[arow]; 3604 nextaj = 0; 3605 for (j=0; nextaj<anzi; j++){ 3606 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3607 ba_i[j] += aa[nextaj++]; 3608 } 3609 } 3610 3611 /* add received vals into ba */ 3612 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3613 /* i-th row */ 3614 if (i == *nextrow[k]) { 3615 anzi = *(nextai[k]+1) - *nextai[k]; 3616 aj = buf_rj[k] + *(nextai[k]); 3617 aa = abuf_r[k] + *(nextai[k]); 3618 nextaj = 0; 3619 for (j=0; nextaj<anzi; j++){ 3620 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3621 ba_i[j] += aa[nextaj++]; 3622 } 3623 } 3624 nextrow[k]++; nextai[k]++; 3625 } 3626 } 3627 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3628 } 3629 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3630 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3631 3632 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3633 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3634 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3635 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3636 PetscFunctionReturn(0); 3637 } 3638 3639 static PetscEvent logkey_seqstompisym = 0; 3640 #undef __FUNCT__ 3641 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3642 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3643 { 3644 PetscErrorCode ierr; 3645 Mat B_mpi; 3646 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3647 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3648 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3649 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3650 PetscInt len,proc,*dnz,*onz; 3651 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3652 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3653 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3654 MPI_Status *status; 3655 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3656 PetscBT lnkbt; 3657 Mat_Merge_SeqsToMPI *merge; 3658 PetscContainer container; 3659 3660 PetscFunctionBegin; 3661 if (!logkey_seqstompisym) { 3662 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3663 } 3664 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3665 3666 /* make sure it is a PETSc comm */ 3667 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3668 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3669 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3670 3671 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3672 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3673 3674 /* determine row ownership */ 3675 /*---------------------------------------------------------*/ 3676 merge->rowmap.n = m; 3677 merge->rowmap.N = M; 3678 merge->rowmap.bs = 1; 3679 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3680 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3681 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3682 3683 m = merge->rowmap.n; 3684 M = merge->rowmap.N; 3685 owners = merge->rowmap.range; 3686 3687 /* determine the number of messages to send, their lengths */ 3688 /*---------------------------------------------------------*/ 3689 len_s = merge->len_s; 3690 3691 len = 0; /* length of buf_si[] */ 3692 merge->nsend = 0; 3693 for (proc=0; proc<size; proc++){ 3694 len_si[proc] = 0; 3695 if (proc == rank){ 3696 len_s[proc] = 0; 3697 } else { 3698 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3699 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3700 } 3701 if (len_s[proc]) { 3702 merge->nsend++; 3703 nrows = 0; 3704 for (i=owners[proc]; i<owners[proc+1]; i++){ 3705 if (ai[i+1] > ai[i]) nrows++; 3706 } 3707 len_si[proc] = 2*(nrows+1); 3708 len += len_si[proc]; 3709 } 3710 } 3711 3712 /* determine the number and length of messages to receive for ij-structure */ 3713 /*-------------------------------------------------------------------------*/ 3714 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3715 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3716 3717 /* post the Irecv of j-structure */ 3718 /*-------------------------------*/ 3719 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3720 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3721 3722 /* post the Isend of j-structure */ 3723 /*--------------------------------*/ 3724 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3725 sj_waits = si_waits + merge->nsend; 3726 3727 for (proc=0, k=0; proc<size; proc++){ 3728 if (!len_s[proc]) continue; 3729 i = owners[proc]; 3730 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3731 k++; 3732 } 3733 3734 /* receives and sends of j-structure are complete */ 3735 /*------------------------------------------------*/ 3736 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3737 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3738 3739 /* send and recv i-structure */ 3740 /*---------------------------*/ 3741 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3742 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3743 3744 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3745 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3746 for (proc=0,k=0; proc<size; proc++){ 3747 if (!len_s[proc]) continue; 3748 /* form outgoing message for i-structure: 3749 buf_si[0]: nrows to be sent 3750 [1:nrows]: row index (global) 3751 [nrows+1:2*nrows+1]: i-structure index 3752 */ 3753 /*-------------------------------------------*/ 3754 nrows = len_si[proc]/2 - 1; 3755 buf_si_i = buf_si + nrows+1; 3756 buf_si[0] = nrows; 3757 buf_si_i[0] = 0; 3758 nrows = 0; 3759 for (i=owners[proc]; i<owners[proc+1]; i++){ 3760 anzi = ai[i+1] - ai[i]; 3761 if (anzi) { 3762 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3763 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3764 nrows++; 3765 } 3766 } 3767 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3768 k++; 3769 buf_si += len_si[proc]; 3770 } 3771 3772 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3773 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3774 3775 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3776 for (i=0; i<merge->nrecv; i++){ 3777 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3778 } 3779 3780 ierr = PetscFree(len_si);CHKERRQ(ierr); 3781 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3782 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3783 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3784 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3785 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3786 ierr = PetscFree(status);CHKERRQ(ierr); 3787 3788 /* compute a local seq matrix in each processor */ 3789 /*----------------------------------------------*/ 3790 /* allocate bi array and free space for accumulating nonzero column info */ 3791 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3792 bi[0] = 0; 3793 3794 /* create and initialize a linked list */ 3795 nlnk = N+1; 3796 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3797 3798 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3799 len = 0; 3800 len = ai[owners[rank+1]] - ai[owners[rank]]; 3801 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3802 current_space = free_space; 3803 3804 /* determine symbolic info for each local row */ 3805 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3806 nextrow = buf_ri_k + merge->nrecv; 3807 nextai = nextrow + merge->nrecv; 3808 for (k=0; k<merge->nrecv; k++){ 3809 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3810 nrows = *buf_ri_k[k]; 3811 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3812 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3813 } 3814 3815 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3816 len = 0; 3817 for (i=0;i<m;i++) { 3818 bnzi = 0; 3819 /* add local non-zero cols of this proc's seqmat into lnk */ 3820 arow = owners[rank] + i; 3821 anzi = ai[arow+1] - ai[arow]; 3822 aj = a->j + ai[arow]; 3823 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3824 bnzi += nlnk; 3825 /* add received col data into lnk */ 3826 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3827 if (i == *nextrow[k]) { /* i-th row */ 3828 anzi = *(nextai[k]+1) - *nextai[k]; 3829 aj = buf_rj[k] + *nextai[k]; 3830 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3831 bnzi += nlnk; 3832 nextrow[k]++; nextai[k]++; 3833 } 3834 } 3835 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3836 3837 /* if free space is not available, make more free space */ 3838 if (current_space->local_remaining<bnzi) { 3839 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3840 nspacedouble++; 3841 } 3842 /* copy data into free space, then initialize lnk */ 3843 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3844 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3845 3846 current_space->array += bnzi; 3847 current_space->local_used += bnzi; 3848 current_space->local_remaining -= bnzi; 3849 3850 bi[i+1] = bi[i] + bnzi; 3851 } 3852 3853 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3854 3855 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3856 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3857 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3858 3859 /* create symbolic parallel matrix B_mpi */ 3860 /*---------------------------------------*/ 3861 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3862 if (n==PETSC_DECIDE) { 3863 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3864 } else { 3865 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3866 } 3867 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3868 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3869 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3870 3871 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3872 B_mpi->assembled = PETSC_FALSE; 3873 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3874 merge->bi = bi; 3875 merge->bj = bj; 3876 merge->buf_ri = buf_ri; 3877 merge->buf_rj = buf_rj; 3878 merge->coi = PETSC_NULL; 3879 merge->coj = PETSC_NULL; 3880 merge->owners_co = PETSC_NULL; 3881 3882 /* attach the supporting struct to B_mpi for reuse */ 3883 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3884 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 3885 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3886 *mpimat = B_mpi; 3887 3888 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3889 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3890 PetscFunctionReturn(0); 3891 } 3892 3893 static PetscEvent logkey_seqstompi = 0; 3894 #undef __FUNCT__ 3895 #define __FUNCT__ "MatMerge_SeqsToMPI" 3896 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3897 { 3898 PetscErrorCode ierr; 3899 3900 PetscFunctionBegin; 3901 if (!logkey_seqstompi) { 3902 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3903 } 3904 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3905 if (scall == MAT_INITIAL_MATRIX){ 3906 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3907 } 3908 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3909 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3910 PetscFunctionReturn(0); 3911 } 3912 static PetscEvent logkey_getlocalmat = 0; 3913 #undef __FUNCT__ 3914 #define __FUNCT__ "MatGetLocalMat" 3915 /*@C 3916 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3917 3918 Not Collective 3919 3920 Input Parameters: 3921 + A - the matrix 3922 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3923 3924 Output Parameter: 3925 . A_loc - the local sequential matrix generated 3926 3927 Level: developer 3928 3929 @*/ 3930 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3931 { 3932 PetscErrorCode ierr; 3933 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3934 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3935 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3936 PetscScalar *aa=a->a,*ba=b->a,*ca; 3937 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3938 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3939 3940 PetscFunctionBegin; 3941 if (!logkey_getlocalmat) { 3942 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3943 } 3944 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3945 if (scall == MAT_INITIAL_MATRIX){ 3946 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3947 ci[0] = 0; 3948 for (i=0; i<am; i++){ 3949 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3950 } 3951 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3952 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3953 k = 0; 3954 for (i=0; i<am; i++) { 3955 ncols_o = bi[i+1] - bi[i]; 3956 ncols_d = ai[i+1] - ai[i]; 3957 /* off-diagonal portion of A */ 3958 for (jo=0; jo<ncols_o; jo++) { 3959 col = cmap[*bj]; 3960 if (col >= cstart) break; 3961 cj[k] = col; bj++; 3962 ca[k++] = *ba++; 3963 } 3964 /* diagonal portion of A */ 3965 for (j=0; j<ncols_d; j++) { 3966 cj[k] = cstart + *aj++; 3967 ca[k++] = *aa++; 3968 } 3969 /* off-diagonal portion of A */ 3970 for (j=jo; j<ncols_o; j++) { 3971 cj[k] = cmap[*bj++]; 3972 ca[k++] = *ba++; 3973 } 3974 } 3975 /* put together the new matrix */ 3976 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3977 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3978 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3979 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3980 mat->free_a = PETSC_TRUE; 3981 mat->free_ij = PETSC_TRUE; 3982 mat->nonew = 0; 3983 } else if (scall == MAT_REUSE_MATRIX){ 3984 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3985 ci = mat->i; cj = mat->j; ca = mat->a; 3986 for (i=0; i<am; i++) { 3987 /* off-diagonal portion of A */ 3988 ncols_o = bi[i+1] - bi[i]; 3989 for (jo=0; jo<ncols_o; jo++) { 3990 col = cmap[*bj]; 3991 if (col >= cstart) break; 3992 *ca++ = *ba++; bj++; 3993 } 3994 /* diagonal portion of A */ 3995 ncols_d = ai[i+1] - ai[i]; 3996 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3997 /* off-diagonal portion of A */ 3998 for (j=jo; j<ncols_o; j++) { 3999 *ca++ = *ba++; bj++; 4000 } 4001 } 4002 } else { 4003 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4004 } 4005 4006 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 4007 PetscFunctionReturn(0); 4008 } 4009 4010 static PetscEvent logkey_getlocalmatcondensed = 0; 4011 #undef __FUNCT__ 4012 #define __FUNCT__ "MatGetLocalMatCondensed" 4013 /*@C 4014 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4015 4016 Not Collective 4017 4018 Input Parameters: 4019 + A - the matrix 4020 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4021 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4022 4023 Output Parameter: 4024 . A_loc - the local sequential matrix generated 4025 4026 Level: developer 4027 4028 @*/ 4029 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4030 { 4031 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4032 PetscErrorCode ierr; 4033 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4034 IS isrowa,iscola; 4035 Mat *aloc; 4036 4037 PetscFunctionBegin; 4038 if (!logkey_getlocalmatcondensed) { 4039 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 4040 } 4041 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4042 if (!row){ 4043 start = A->rmap.rstart; end = A->rmap.rend; 4044 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4045 } else { 4046 isrowa = *row; 4047 } 4048 if (!col){ 4049 start = A->cmap.rstart; 4050 cmap = a->garray; 4051 nzA = a->A->cmap.n; 4052 nzB = a->B->cmap.n; 4053 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4054 ncols = 0; 4055 for (i=0; i<nzB; i++) { 4056 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4057 else break; 4058 } 4059 imark = i; 4060 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4061 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4062 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4063 ierr = PetscFree(idx);CHKERRQ(ierr); 4064 } else { 4065 iscola = *col; 4066 } 4067 if (scall != MAT_INITIAL_MATRIX){ 4068 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4069 aloc[0] = *A_loc; 4070 } 4071 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4072 *A_loc = aloc[0]; 4073 ierr = PetscFree(aloc);CHKERRQ(ierr); 4074 if (!row){ 4075 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4076 } 4077 if (!col){ 4078 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4079 } 4080 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4081 PetscFunctionReturn(0); 4082 } 4083 4084 static PetscEvent logkey_GetBrowsOfAcols = 0; 4085 #undef __FUNCT__ 4086 #define __FUNCT__ "MatGetBrowsOfAcols" 4087 /*@C 4088 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4089 4090 Collective on Mat 4091 4092 Input Parameters: 4093 + A,B - the matrices in mpiaij format 4094 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4095 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4096 4097 Output Parameter: 4098 + rowb, colb - index sets of rows and columns of B to extract 4099 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4100 - B_seq - the sequential matrix generated 4101 4102 Level: developer 4103 4104 @*/ 4105 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4106 { 4107 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4108 PetscErrorCode ierr; 4109 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4110 IS isrowb,iscolb; 4111 Mat *bseq; 4112 4113 PetscFunctionBegin; 4114 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4115 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4116 } 4117 if (!logkey_GetBrowsOfAcols) { 4118 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 4119 } 4120 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4121 4122 if (scall == MAT_INITIAL_MATRIX){ 4123 start = A->cmap.rstart; 4124 cmap = a->garray; 4125 nzA = a->A->cmap.n; 4126 nzB = a->B->cmap.n; 4127 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4128 ncols = 0; 4129 for (i=0; i<nzB; i++) { /* row < local row index */ 4130 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4131 else break; 4132 } 4133 imark = i; 4134 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4135 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4136 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4137 ierr = PetscFree(idx);CHKERRQ(ierr); 4138 *brstart = imark; 4139 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4140 } else { 4141 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4142 isrowb = *rowb; iscolb = *colb; 4143 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4144 bseq[0] = *B_seq; 4145 } 4146 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4147 *B_seq = bseq[0]; 4148 ierr = PetscFree(bseq);CHKERRQ(ierr); 4149 if (!rowb){ 4150 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4151 } else { 4152 *rowb = isrowb; 4153 } 4154 if (!colb){ 4155 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4156 } else { 4157 *colb = iscolb; 4158 } 4159 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4160 PetscFunctionReturn(0); 4161 } 4162 4163 static PetscEvent logkey_GetBrowsOfAocols = 0; 4164 #undef __FUNCT__ 4165 #define __FUNCT__ "MatGetBrowsOfAoCols" 4166 /*@C 4167 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4168 of the OFF-DIAGONAL portion of local A 4169 4170 Collective on Mat 4171 4172 Input Parameters: 4173 + A,B - the matrices in mpiaij format 4174 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4175 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4176 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4177 4178 Output Parameter: 4179 + B_oth - the sequential matrix generated 4180 4181 Level: developer 4182 4183 @*/ 4184 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 4185 { 4186 VecScatter_MPI_General *gen_to,*gen_from; 4187 PetscErrorCode ierr; 4188 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4189 Mat_SeqAIJ *b_oth; 4190 VecScatter ctx=a->Mvctx; 4191 MPI_Comm comm=ctx->comm; 4192 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 4193 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4194 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 4195 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4196 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4197 MPI_Status *sstatus,rstatus; 4198 PetscInt *cols,sbs,rbs; 4199 PetscScalar *vals; 4200 4201 PetscFunctionBegin; 4202 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4203 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4204 } 4205 if (!logkey_GetBrowsOfAocols) { 4206 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 4207 } 4208 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4209 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4210 4211 gen_to = (VecScatter_MPI_General*)ctx->todata; 4212 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4213 rvalues = gen_from->values; /* holds the length of receiving row */ 4214 svalues = gen_to->values; /* holds the length of sending row */ 4215 nrecvs = gen_from->n; 4216 nsends = gen_to->n; 4217 4218 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4219 srow = gen_to->indices; /* local row index to be sent */ 4220 sstarts = gen_to->starts; 4221 sprocs = gen_to->procs; 4222 sstatus = gen_to->sstatus; 4223 sbs = gen_to->bs; 4224 rstarts = gen_from->starts; 4225 rprocs = gen_from->procs; 4226 rbs = gen_from->bs; 4227 4228 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4229 if (scall == MAT_INITIAL_MATRIX){ 4230 /* i-array */ 4231 /*---------*/ 4232 /* post receives */ 4233 for (i=0; i<nrecvs; i++){ 4234 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4235 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4236 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4237 } 4238 4239 /* pack the outgoing message */ 4240 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4241 rstartsj = sstartsj + nsends +1; 4242 sstartsj[0] = 0; rstartsj[0] = 0; 4243 len = 0; /* total length of j or a array to be sent */ 4244 k = 0; 4245 for (i=0; i<nsends; i++){ 4246 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4247 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4248 for (j=0; j<nrows; j++) { 4249 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4250 for (l=0; l<sbs; l++){ 4251 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4252 rowlen[j*sbs+l] = ncols; 4253 len += ncols; 4254 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4255 } 4256 k++; 4257 } 4258 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4259 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4260 } 4261 /* recvs and sends of i-array are completed */ 4262 i = nrecvs; 4263 while (i--) { 4264 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4265 } 4266 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4267 4268 /* allocate buffers for sending j and a arrays */ 4269 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4270 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4271 4272 /* create i-array of B_oth */ 4273 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4274 b_othi[0] = 0; 4275 len = 0; /* total length of j or a array to be received */ 4276 k = 0; 4277 for (i=0; i<nrecvs; i++){ 4278 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4279 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4280 for (j=0; j<nrows; j++) { 4281 b_othi[k+1] = b_othi[k] + rowlen[j]; 4282 len += rowlen[j]; k++; 4283 } 4284 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4285 } 4286 4287 /* allocate space for j and a arrrays of B_oth */ 4288 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4289 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 4290 4291 /* j-array */ 4292 /*---------*/ 4293 /* post receives of j-array */ 4294 for (i=0; i<nrecvs; i++){ 4295 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4296 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4297 } 4298 4299 /* pack the outgoing message j-array */ 4300 k = 0; 4301 for (i=0; i<nsends; i++){ 4302 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4303 bufJ = bufj+sstartsj[i]; 4304 for (j=0; j<nrows; j++) { 4305 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4306 for (ll=0; ll<sbs; ll++){ 4307 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4308 for (l=0; l<ncols; l++){ 4309 *bufJ++ = cols[l]; 4310 } 4311 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4312 } 4313 } 4314 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4315 } 4316 4317 /* recvs and sends of j-array are completed */ 4318 i = nrecvs; 4319 while (i--) { 4320 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4321 } 4322 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4323 } else if (scall == MAT_REUSE_MATRIX){ 4324 sstartsj = *startsj; 4325 rstartsj = sstartsj + nsends +1; 4326 bufa = *bufa_ptr; 4327 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4328 b_otha = b_oth->a; 4329 } else { 4330 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4331 } 4332 4333 /* a-array */ 4334 /*---------*/ 4335 /* post receives of a-array */ 4336 for (i=0; i<nrecvs; i++){ 4337 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4338 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4339 } 4340 4341 /* pack the outgoing message a-array */ 4342 k = 0; 4343 for (i=0; i<nsends; i++){ 4344 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4345 bufA = bufa+sstartsj[i]; 4346 for (j=0; j<nrows; j++) { 4347 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4348 for (ll=0; ll<sbs; ll++){ 4349 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4350 for (l=0; l<ncols; l++){ 4351 *bufA++ = vals[l]; 4352 } 4353 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4354 } 4355 } 4356 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4357 } 4358 /* recvs and sends of a-array are completed */ 4359 i = nrecvs; 4360 while (i--) { 4361 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4362 } 4363 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4364 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4365 4366 if (scall == MAT_INITIAL_MATRIX){ 4367 /* put together the new matrix */ 4368 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4369 4370 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4371 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4372 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4373 b_oth->free_a = PETSC_TRUE; 4374 b_oth->free_ij = PETSC_TRUE; 4375 b_oth->nonew = 0; 4376 4377 ierr = PetscFree(bufj);CHKERRQ(ierr); 4378 if (!startsj || !bufa_ptr){ 4379 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4380 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4381 } else { 4382 *startsj = sstartsj; 4383 *bufa_ptr = bufa; 4384 } 4385 } 4386 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4387 PetscFunctionReturn(0); 4388 } 4389 4390 #undef __FUNCT__ 4391 #define __FUNCT__ "MatGetCommunicationStructs" 4392 /*@C 4393 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4394 4395 Not Collective 4396 4397 Input Parameters: 4398 . A - The matrix in mpiaij format 4399 4400 Output Parameter: 4401 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4402 . colmap - A map from global column index to local index into lvec 4403 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4404 4405 Level: developer 4406 4407 @*/ 4408 #if defined (PETSC_USE_CTABLE) 4409 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4410 #else 4411 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4412 #endif 4413 { 4414 Mat_MPIAIJ *a; 4415 4416 PetscFunctionBegin; 4417 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4418 PetscValidPointer(lvec, 2) 4419 PetscValidPointer(colmap, 3) 4420 PetscValidPointer(multScatter, 4) 4421 a = (Mat_MPIAIJ *) A->data; 4422 if (lvec) *lvec = a->lvec; 4423 if (colmap) *colmap = a->colmap; 4424 if (multScatter) *multScatter = a->Mvctx; 4425 PetscFunctionReturn(0); 4426 } 4427 4428 EXTERN_C_BEGIN 4429 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4430 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4431 EXTERN_C_END 4432 4433 /*MC 4434 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4435 4436 Options Database Keys: 4437 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4438 4439 Level: beginner 4440 4441 .seealso: MatCreateMPIAIJ 4442 M*/ 4443 4444 EXTERN_C_BEGIN 4445 #undef __FUNCT__ 4446 #define __FUNCT__ "MatCreate_MPIAIJ" 4447 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4448 { 4449 Mat_MPIAIJ *b; 4450 PetscErrorCode ierr; 4451 PetscMPIInt size; 4452 4453 PetscFunctionBegin; 4454 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4455 4456 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 4457 B->data = (void*)b; 4458 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4459 B->factor = 0; 4460 B->rmap.bs = 1; 4461 B->assembled = PETSC_FALSE; 4462 B->mapping = 0; 4463 4464 B->insertmode = NOT_SET_VALUES; 4465 b->size = size; 4466 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4467 4468 /* build cache for off array entries formed */ 4469 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4470 b->donotstash = PETSC_FALSE; 4471 b->colmap = 0; 4472 b->garray = 0; 4473 b->roworiented = PETSC_TRUE; 4474 4475 /* stuff used for matrix vector multiply */ 4476 b->lvec = PETSC_NULL; 4477 b->Mvctx = PETSC_NULL; 4478 4479 /* stuff for MatGetRow() */ 4480 b->rowindices = 0; 4481 b->rowvalues = 0; 4482 b->getrowactive = PETSC_FALSE; 4483 4484 4485 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4486 "MatStoreValues_MPIAIJ", 4487 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4488 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4489 "MatRetrieveValues_MPIAIJ", 4490 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4491 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4492 "MatGetDiagonalBlock_MPIAIJ", 4493 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4494 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4495 "MatIsTranspose_MPIAIJ", 4496 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4497 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4498 "MatMPIAIJSetPreallocation_MPIAIJ", 4499 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4500 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4501 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4502 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4503 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4504 "MatDiagonalScaleLocal_MPIAIJ", 4505 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4506 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4507 "MatConvert_MPIAIJ_MPICSRPERM", 4508 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4509 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4510 "MatConvert_MPIAIJ_MPICRL", 4511 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4512 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4513 PetscFunctionReturn(0); 4514 } 4515 EXTERN_C_END 4516 4517 #undef __FUNCT__ 4518 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4519 /*@C 4520 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4521 and "off-diagonal" part of the matrix in CSR format. 4522 4523 Collective on MPI_Comm 4524 4525 Input Parameters: 4526 + comm - MPI communicator 4527 . m - number of local rows (Cannot be PETSC_DECIDE) 4528 . n - This value should be the same as the local size used in creating the 4529 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4530 calculated if N is given) For square matrices n is almost always m. 4531 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4532 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4533 . i - row indices for "diagonal" portion of matrix 4534 . j - column indices 4535 . a - matrix values 4536 . oi - row indices for "off-diagonal" portion of matrix 4537 . oj - column indices 4538 - oa - matrix values 4539 4540 Output Parameter: 4541 . mat - the matrix 4542 4543 Level: advanced 4544 4545 Notes: 4546 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4547 4548 The i and j indices are 0 based 4549 4550 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4551 4552 4553 .keywords: matrix, aij, compressed row, sparse, parallel 4554 4555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4556 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4557 @*/ 4558 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4559 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4560 { 4561 PetscErrorCode ierr; 4562 Mat_MPIAIJ *maij; 4563 4564 PetscFunctionBegin; 4565 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4566 if (i[0]) { 4567 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4568 } 4569 if (oi[0]) { 4570 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4571 } 4572 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4573 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4574 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4575 maij = (Mat_MPIAIJ*) (*mat)->data; 4576 maij->donotstash = PETSC_TRUE; 4577 (*mat)->preallocated = PETSC_TRUE; 4578 4579 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4580 ierr = PetscMapInitialize((*mat)->comm,&(*mat)->rmap);CHKERRQ(ierr); 4581 ierr = PetscMapInitialize((*mat)->comm,&(*mat)->cmap);CHKERRQ(ierr); 4582 4583 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4584 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4585 4586 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4587 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4588 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4589 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4590 4591 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4592 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4593 PetscFunctionReturn(0); 4594 } 4595 4596 /* 4597 Special version for direct calls from Fortran 4598 */ 4599 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4600 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4601 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4602 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4603 #endif 4604 4605 /* Change these macros so can be used in void function */ 4606 #undef CHKERRQ 4607 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4608 #undef SETERRQ2 4609 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4610 #undef SETERRQ 4611 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4612 4613 EXTERN_C_BEGIN 4614 #undef __FUNCT__ 4615 #define __FUNCT__ "matsetvaluesmpiaij_" 4616 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4617 { 4618 Mat mat = *mmat; 4619 PetscInt m = *mm, n = *mn; 4620 InsertMode addv = *maddv; 4621 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4622 PetscScalar value; 4623 PetscErrorCode ierr; 4624 4625 MatPreallocated(mat); 4626 if (mat->insertmode == NOT_SET_VALUES) { 4627 mat->insertmode = addv; 4628 } 4629 #if defined(PETSC_USE_DEBUG) 4630 else if (mat->insertmode != addv) { 4631 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4632 } 4633 #endif 4634 { 4635 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4636 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4637 PetscTruth roworiented = aij->roworiented; 4638 4639 /* Some Variables required in the macro */ 4640 Mat A = aij->A; 4641 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4642 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4643 PetscScalar *aa = a->a; 4644 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4645 Mat B = aij->B; 4646 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4647 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4648 PetscScalar *ba = b->a; 4649 4650 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4651 PetscInt nonew = a->nonew; 4652 PetscScalar *ap1,*ap2; 4653 4654 PetscFunctionBegin; 4655 for (i=0; i<m; i++) { 4656 if (im[i] < 0) continue; 4657 #if defined(PETSC_USE_DEBUG) 4658 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4659 #endif 4660 if (im[i] >= rstart && im[i] < rend) { 4661 row = im[i] - rstart; 4662 lastcol1 = -1; 4663 rp1 = aj + ai[row]; 4664 ap1 = aa + ai[row]; 4665 rmax1 = aimax[row]; 4666 nrow1 = ailen[row]; 4667 low1 = 0; 4668 high1 = nrow1; 4669 lastcol2 = -1; 4670 rp2 = bj + bi[row]; 4671 ap2 = ba + bi[row]; 4672 rmax2 = bimax[row]; 4673 nrow2 = bilen[row]; 4674 low2 = 0; 4675 high2 = nrow2; 4676 4677 for (j=0; j<n; j++) { 4678 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4679 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4680 if (in[j] >= cstart && in[j] < cend){ 4681 col = in[j] - cstart; 4682 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4683 } else if (in[j] < 0) continue; 4684 #if defined(PETSC_USE_DEBUG) 4685 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4686 #endif 4687 else { 4688 if (mat->was_assembled) { 4689 if (!aij->colmap) { 4690 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4691 } 4692 #if defined (PETSC_USE_CTABLE) 4693 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4694 col--; 4695 #else 4696 col = aij->colmap[in[j]] - 1; 4697 #endif 4698 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4699 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4700 col = in[j]; 4701 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4702 B = aij->B; 4703 b = (Mat_SeqAIJ*)B->data; 4704 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4705 rp2 = bj + bi[row]; 4706 ap2 = ba + bi[row]; 4707 rmax2 = bimax[row]; 4708 nrow2 = bilen[row]; 4709 low2 = 0; 4710 high2 = nrow2; 4711 bm = aij->B->rmap.n; 4712 ba = b->a; 4713 } 4714 } else col = in[j]; 4715 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4716 } 4717 } 4718 } else { 4719 if (!aij->donotstash) { 4720 if (roworiented) { 4721 if (ignorezeroentries && v[i*n] == 0.0) continue; 4722 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4723 } else { 4724 if (ignorezeroentries && v[i] == 0.0) continue; 4725 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4726 } 4727 } 4728 } 4729 }} 4730 PetscFunctionReturnVoid(); 4731 } 4732 EXTERN_C_END 4733 4734