1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = a->ignorezeroentries; 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetValues_MPIAIJ" 240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 241 { 242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 243 PetscErrorCode ierr; 244 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 245 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 246 247 PetscFunctionBegin; 248 for (i=0; i<m; i++) { 249 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 250 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 251 if (idxm[i] >= rstart && idxm[i] < rend) { 252 row = idxm[i] - rstart; 253 for (j=0; j<n; j++) { 254 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 255 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 256 if (idxn[j] >= cstart && idxn[j] < cend){ 257 col = idxn[j] - cstart; 258 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 259 } else { 260 if (!aij->colmap) { 261 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262 } 263 #if defined (PETSC_USE_CTABLE) 264 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 265 col --; 266 #else 267 col = aij->colmap[idxn[j]] - 1; 268 #endif 269 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 270 else { 271 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 272 } 273 } 274 } 275 } else { 276 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 277 } 278 } 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 if (aij->donotstash) { 293 PetscFunctionReturn(0); 294 } 295 296 /* make sure all processors are either in INSERTMODE or ADDMODE */ 297 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 298 if (addv == (ADD_VALUES|INSERT_VALUES)) { 299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 300 } 301 mat->insertmode = addv; /* in case this processor had no cache */ 302 303 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 304 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 305 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 312 { 313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 314 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 315 PetscErrorCode ierr; 316 PetscMPIInt n; 317 PetscInt i,j,rstart,ncols,flg; 318 PetscInt *row,*col,other_disassembled; 319 PetscScalar *val; 320 InsertMode addv = mat->insertmode; 321 322 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 358 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 359 } 360 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 361 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 362 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 363 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 364 365 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 366 aij->rowvalues = 0; 367 368 /* used by MatAXPY() */ 369 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 370 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 371 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 378 { 379 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 384 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatZeroRows_MPIAIJ" 390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 391 { 392 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 393 PetscErrorCode ierr; 394 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 395 PetscInt i,*owners = A->rmap.range; 396 PetscInt *nprocs,j,idx,nsends,row; 397 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 398 PetscInt *rvalues,count,base,slen,*source; 399 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 400 MPI_Comm comm = A->comm; 401 MPI_Request *send_waits,*recv_waits; 402 MPI_Status recv_status,*send_status; 403 #if defined(PETSC_DEBUG) 404 PetscTruth found = PETSC_FALSE; 405 #endif 406 407 PetscFunctionBegin; 408 /* first count number of contributors to each processor */ 409 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 410 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 411 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 412 j = 0; 413 for (i=0; i<N; i++) { 414 if (lastidx > (idx = rows[i])) j = 0; 415 lastidx = idx; 416 for (; j<size; j++) { 417 if (idx >= owners[j] && idx < owners[j+1]) { 418 nprocs[2*j]++; 419 nprocs[2*j+1] = 1; 420 owner[i] = j; 421 #if defined(PETSC_DEBUG) 422 found = PETSC_TRUE; 423 #endif 424 break; 425 } 426 } 427 #if defined(PETSC_DEBUG) 428 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 429 found = PETSC_FALSE; 430 #endif 431 } 432 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 433 434 /* inform other processors of number of messages and max length*/ 435 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 436 437 /* post receives: */ 438 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 439 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 440 for (i=0; i<nrecvs; i++) { 441 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 442 } 443 444 /* do sends: 445 1) starts[i] gives the starting index in svalues for stuff going to 446 the ith processor 447 */ 448 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 449 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 450 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 451 starts[0] = 0; 452 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 453 for (i=0; i<N; i++) { 454 svalues[starts[owner[i]]++] = rows[i]; 455 } 456 457 starts[0] = 0; 458 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 459 count = 0; 460 for (i=0; i<size; i++) { 461 if (nprocs[2*i+1]) { 462 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 463 } 464 } 465 ierr = PetscFree(starts);CHKERRQ(ierr); 466 467 base = owners[rank]; 468 469 /* wait on receives */ 470 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 471 source = lens + nrecvs; 472 count = nrecvs; slen = 0; 473 while (count) { 474 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 475 /* unpack receives into our local space */ 476 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 477 source[imdex] = recv_status.MPI_SOURCE; 478 lens[imdex] = n; 479 slen += n; 480 count--; 481 } 482 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 483 484 /* move the data into the send scatter */ 485 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 486 count = 0; 487 for (i=0; i<nrecvs; i++) { 488 values = rvalues + i*nmax; 489 for (j=0; j<lens[i]; j++) { 490 lrows[count++] = values[j] - base; 491 } 492 } 493 ierr = PetscFree(rvalues);CHKERRQ(ierr); 494 ierr = PetscFree(lens);CHKERRQ(ierr); 495 ierr = PetscFree(owner);CHKERRQ(ierr); 496 ierr = PetscFree(nprocs);CHKERRQ(ierr); 497 498 /* actually zap the local rows */ 499 /* 500 Zero the required rows. If the "diagonal block" of the matrix 501 is square and the user wishes to set the diagonal we use separate 502 code so that MatSetValues() is not called for each diagonal allocating 503 new memory, thus calling lots of mallocs and slowing things down. 504 505 Contributed by: Matthew Knepley 506 */ 507 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 508 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 509 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 510 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 511 } else if (diag != 0.0) { 512 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 513 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 514 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 516 } 517 for (i = 0; i < slen; i++) { 518 row = lrows[i] + rstart; 519 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 520 } 521 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 522 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 } else { 524 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 525 } 526 ierr = PetscFree(lrows);CHKERRQ(ierr); 527 528 /* wait on sends */ 529 if (nsends) { 530 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 531 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 532 ierr = PetscFree(send_status);CHKERRQ(ierr); 533 } 534 ierr = PetscFree(send_waits);CHKERRQ(ierr); 535 ierr = PetscFree(svalues);CHKERRQ(ierr); 536 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatMult_MPIAIJ" 542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 543 { 544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 545 PetscErrorCode ierr; 546 PetscInt nt; 547 548 PetscFunctionBegin; 549 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 550 if (nt != A->cmap.n) { 551 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 552 } 553 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 554 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 555 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 556 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatMultAdd_MPIAIJ" 562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 563 { 564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 567 PetscFunctionBegin; 568 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 569 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 570 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 571 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 578 { 579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 580 PetscErrorCode ierr; 581 PetscTruth merged; 582 583 PetscFunctionBegin; 584 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 585 /* do nondiagonal part */ 586 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 587 if (!merged) { 588 /* send it on its way */ 589 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 590 /* do local part */ 591 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 592 /* receive remote parts: note this assumes the values are not actually */ 593 /* added in yy until the next line, */ 594 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 595 } else { 596 /* do local part */ 597 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 598 /* send it on its way */ 599 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 600 /* values actually were received in the Begin() but we need to call this nop */ 601 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 602 } 603 PetscFunctionReturn(0); 604 } 605 606 EXTERN_C_BEGIN 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 610 { 611 MPI_Comm comm; 612 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 613 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 614 IS Me,Notme; 615 PetscErrorCode ierr; 616 PetscInt M,N,first,last,*notme,i; 617 PetscMPIInt size; 618 619 PetscFunctionBegin; 620 621 /* Easy test: symmetric diagonal block */ 622 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 623 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 624 if (!*f) PetscFunctionReturn(0); 625 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 626 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 627 if (size == 1) PetscFunctionReturn(0); 628 629 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 630 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 631 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 632 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 633 for (i=0; i<first; i++) notme[i] = i; 634 for (i=last; i<M; i++) notme[i-last+first] = i; 635 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 636 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 637 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 638 Aoff = Aoffs[0]; 639 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 640 Boff = Boffs[0]; 641 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 642 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 644 ierr = ISDestroy(Me);CHKERRQ(ierr); 645 ierr = ISDestroy(Notme);CHKERRQ(ierr); 646 647 PetscFunctionReturn(0); 648 } 649 EXTERN_C_END 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 654 { 655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 656 PetscErrorCode ierr; 657 658 PetscFunctionBegin; 659 /* do nondiagonal part */ 660 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 661 /* send it on its way */ 662 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 663 /* do local part */ 664 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 665 /* receive remote parts */ 666 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 /* 671 This only works correctly for square matrices where the subblock A->A is the 672 diagonal block 673 */ 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 677 { 678 PetscErrorCode ierr; 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 681 PetscFunctionBegin; 682 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 683 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 684 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 685 } 686 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatScale_MPIAIJ" 692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 693 { 694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 699 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatDestroy_MPIAIJ" 705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 706 { 707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 #if defined(PETSC_USE_LOG) 712 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 713 #endif 714 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 715 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 717 #if defined (PETSC_USE_CTABLE) 718 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 719 #else 720 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 721 #endif 722 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 723 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 724 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 725 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 726 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIAIJ_Binary" 742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 745 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 746 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 747 PetscErrorCode ierr; 748 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 749 int fd; 750 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 751 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (!rank) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->rmap.N; 761 header[2] = mat->cmap.N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->rmap.n; 767 range = mat->rmap.range; 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->rmap.n; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->rmap.n; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (!rank) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->rmap.n; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (!rank) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->rmap.n; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (!rank) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 PetscErrorCode ierr; 872 PetscMPIInt rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,iascii,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (iascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 PetscTruth inodes; 886 887 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 888 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 889 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 890 if (!inodes) { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 892 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 893 } else { 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 895 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 896 } 897 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 899 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 901 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 902 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 903 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } else if (format == PETSC_VIEWER_ASCII_INFO) { 906 PetscInt inodecount,inodelimit,*inodes; 907 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 908 if (inodes) { 909 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 910 } else { 911 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 915 PetscFunctionReturn(0); 916 } 917 } else if (isbinary) { 918 if (size == 1) { 919 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 920 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 921 } else { 922 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 923 } 924 PetscFunctionReturn(0); 925 } else if (isdraw) { 926 PetscDraw draw; 927 PetscTruth isnull; 928 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 929 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 930 } 931 932 if (size == 1) { 933 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 934 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 935 } else { 936 /* assemble the entire matrix onto first processor. */ 937 Mat A; 938 Mat_SeqAIJ *Aloc; 939 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 940 PetscScalar *a; 941 942 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 943 if (!rank) { 944 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 945 } else { 946 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 947 } 948 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 949 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 950 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 951 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 952 953 /* copy over the A part */ 954 Aloc = (Mat_SeqAIJ*)aij->A->data; 955 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 956 row = mat->rmap.rstart; 957 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 958 for (i=0; i<m; i++) { 959 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 960 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 961 } 962 aj = Aloc->j; 963 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 964 965 /* copy over the B part */ 966 Aloc = (Mat_SeqAIJ*)aij->B->data; 967 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 968 row = mat->rmap.rstart; 969 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 970 ct = cols; 971 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 972 for (i=0; i<m; i++) { 973 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 974 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 975 } 976 ierr = PetscFree(ct);CHKERRQ(ierr); 977 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 979 /* 980 Everyone has to call to draw the matrix since the graphics waits are 981 synchronized across all processors that share the PetscDraw object 982 */ 983 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 984 if (!rank) { 985 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 986 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 987 } 988 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 989 ierr = MatDestroy(A);CHKERRQ(ierr); 990 } 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatView_MPIAIJ" 996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 997 { 998 PetscErrorCode ierr; 999 PetscTruth iascii,isdraw,issocket,isbinary; 1000 1001 PetscFunctionBegin; 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1006 if (iascii || isdraw || isbinary || issocket) { 1007 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1008 } else { 1009 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatRelax_MPIAIJ" 1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscErrorCode ierr; 1020 Vec bb1; 1021 1022 PetscFunctionBegin; 1023 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1024 1025 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1026 1027 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1028 if (flag & SOR_ZERO_INITIAL_GUESS) { 1029 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1030 its--; 1031 } 1032 1033 while (its--) { 1034 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 1037 /* update rhs: bb1 = bb - B*x */ 1038 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1039 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1040 1041 /* local sweep */ 1042 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1043 CHKERRQ(ierr); 1044 } 1045 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1046 if (flag & SOR_ZERO_INITIAL_GUESS) { 1047 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1048 its--; 1049 } 1050 while (its--) { 1051 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 1054 /* update rhs: bb1 = bb - B*x */ 1055 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1056 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1057 1058 /* local sweep */ 1059 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1060 CHKERRQ(ierr); 1061 } 1062 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1063 if (flag & SOR_ZERO_INITIAL_GUESS) { 1064 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1065 its--; 1066 } 1067 while (its--) { 1068 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1070 1071 /* update rhs: bb1 = bb - B*x */ 1072 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1073 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1074 1075 /* local sweep */ 1076 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1077 CHKERRQ(ierr); 1078 } 1079 } else { 1080 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1081 } 1082 1083 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatPermute_MPIAIJ" 1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1090 { 1091 MPI_Comm comm,pcomm; 1092 PetscInt first,local_size,nrows,*rows; 1093 int ntids; 1094 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1099 /* make a collective version of 'rowp' */ 1100 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1101 if (pcomm==comm) { 1102 crowp = rowp; 1103 } else { 1104 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1105 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1107 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1108 } 1109 /* collect the global row permutation and invert it */ 1110 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1111 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1112 if (pcomm!=comm) { 1113 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1114 } 1115 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1116 /* get the local target indices */ 1117 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1118 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1119 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1121 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1122 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1123 /* the column permutation is so much easier; 1124 make a local version of 'colp' and invert it */ 1125 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1126 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1127 if (ntids==1) { 1128 lcolp = colp; 1129 } else { 1130 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1131 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1132 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1133 } 1134 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1135 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1136 if (ntids>1) { 1137 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1138 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1139 } 1140 /* now we just get the submatrix */ 1141 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1142 /* clean up */ 1143 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1144 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1151 { 1152 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1153 Mat A = mat->A,B = mat->B; 1154 PetscErrorCode ierr; 1155 PetscReal isend[5],irecv[5]; 1156 1157 PetscFunctionBegin; 1158 info->block_size = 1.0; 1159 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1160 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1161 isend[3] = info->memory; isend[4] = info->mallocs; 1162 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1163 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1164 isend[3] += info->memory; isend[4] += info->mallocs; 1165 if (flag == MAT_LOCAL) { 1166 info->nz_used = isend[0]; 1167 info->nz_allocated = isend[1]; 1168 info->nz_unneeded = isend[2]; 1169 info->memory = isend[3]; 1170 info->mallocs = isend[4]; 1171 } else if (flag == MAT_GLOBAL_MAX) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else if (flag == MAT_GLOBAL_SUM) { 1179 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1180 info->nz_used = irecv[0]; 1181 info->nz_allocated = irecv[1]; 1182 info->nz_unneeded = irecv[2]; 1183 info->memory = irecv[3]; 1184 info->mallocs = irecv[4]; 1185 } 1186 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1187 info->fill_ratio_needed = 0; 1188 info->factor_mallocs = 0; 1189 info->rows_global = (double)matin->rmap.N; 1190 info->columns_global = (double)matin->cmap.N; 1191 info->rows_local = (double)matin->rmap.n; 1192 info->columns_local = (double)matin->cmap.N; 1193 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatSetOption_MPIAIJ" 1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1200 { 1201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 switch (op) { 1206 case MAT_NO_NEW_NONZERO_LOCATIONS: 1207 case MAT_YES_NEW_NONZERO_LOCATIONS: 1208 case MAT_COLUMNS_UNSORTED: 1209 case MAT_COLUMNS_SORTED: 1210 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1211 case MAT_KEEP_ZEROED_ROWS: 1212 case MAT_NEW_NONZERO_LOCATION_ERR: 1213 case MAT_USE_INODES: 1214 case MAT_DO_NOT_USE_INODES: 1215 case MAT_IGNORE_ZERO_ENTRIES: 1216 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1217 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1218 break; 1219 case MAT_ROW_ORIENTED: 1220 a->roworiented = PETSC_TRUE; 1221 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1222 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1223 break; 1224 case MAT_ROWS_SORTED: 1225 case MAT_ROWS_UNSORTED: 1226 case MAT_YES_NEW_DIAGONALS: 1227 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1228 break; 1229 case MAT_COLUMN_ORIENTED: 1230 a->roworiented = PETSC_FALSE; 1231 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1232 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1233 break; 1234 case MAT_IGNORE_OFF_PROC_ENTRIES: 1235 a->donotstash = PETSC_TRUE; 1236 break; 1237 case MAT_NO_NEW_DIAGONALS: 1238 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1239 case MAT_SYMMETRIC: 1240 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1241 break; 1242 case MAT_STRUCTURALLY_SYMMETRIC: 1243 case MAT_HERMITIAN: 1244 case MAT_SYMMETRY_ETERNAL: 1245 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1246 break; 1247 case MAT_NOT_SYMMETRIC: 1248 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1249 case MAT_NOT_HERMITIAN: 1250 case MAT_NOT_SYMMETRY_ETERNAL: 1251 break; 1252 default: 1253 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatGetRow_MPIAIJ" 1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1261 { 1262 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1263 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1264 PetscErrorCode ierr; 1265 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1266 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1267 PetscInt *cmap,*idx_p; 1268 1269 PetscFunctionBegin; 1270 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1271 mat->getrowactive = PETSC_TRUE; 1272 1273 if (!mat->rowvalues && (idx || v)) { 1274 /* 1275 allocate enough space to hold information from the longest row. 1276 */ 1277 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1278 PetscInt max = 1,tmp; 1279 for (i=0; i<matin->rmap.n; i++) { 1280 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1281 if (max < tmp) { max = tmp; } 1282 } 1283 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1284 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1285 } 1286 1287 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1288 lrow = row - rstart; 1289 1290 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1291 if (!v) {pvA = 0; pvB = 0;} 1292 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1293 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1294 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1295 nztot = nzA + nzB; 1296 1297 cmap = mat->garray; 1298 if (v || idx) { 1299 if (nztot) { 1300 /* Sort by increasing column numbers, assuming A and B already sorted */ 1301 PetscInt imark = -1; 1302 if (v) { 1303 *v = v_p = mat->rowvalues; 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1306 else break; 1307 } 1308 imark = i; 1309 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1310 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1311 } 1312 if (idx) { 1313 *idx = idx_p = mat->rowindices; 1314 if (imark > -1) { 1315 for (i=0; i<imark; i++) { 1316 idx_p[i] = cmap[cworkB[i]]; 1317 } 1318 } else { 1319 for (i=0; i<nzB; i++) { 1320 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1321 else break; 1322 } 1323 imark = i; 1324 } 1325 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1326 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1327 } 1328 } else { 1329 if (idx) *idx = 0; 1330 if (v) *v = 0; 1331 } 1332 } 1333 *nz = nztot; 1334 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1335 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1342 { 1343 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1344 1345 PetscFunctionBegin; 1346 if (!aij->getrowactive) { 1347 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1348 } 1349 aij->getrowactive = PETSC_FALSE; 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatNorm_MPIAIJ" 1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1356 { 1357 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1358 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1359 PetscErrorCode ierr; 1360 PetscInt i,j,cstart = mat->cmap.rstart; 1361 PetscReal sum = 0.0; 1362 PetscScalar *v; 1363 1364 PetscFunctionBegin; 1365 if (aij->size == 1) { 1366 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1367 } else { 1368 if (type == NORM_FROBENIUS) { 1369 v = amat->a; 1370 for (i=0; i<amat->nz; i++) { 1371 #if defined(PETSC_USE_COMPLEX) 1372 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1373 #else 1374 sum += (*v)*(*v); v++; 1375 #endif 1376 } 1377 v = bmat->a; 1378 for (i=0; i<bmat->nz; i++) { 1379 #if defined(PETSC_USE_COMPLEX) 1380 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1381 #else 1382 sum += (*v)*(*v); v++; 1383 #endif 1384 } 1385 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1386 *norm = sqrt(*norm); 1387 } else if (type == NORM_1) { /* max column norm */ 1388 PetscReal *tmp,*tmp2; 1389 PetscInt *jj,*garray = aij->garray; 1390 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1391 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1392 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1393 *norm = 0.0; 1394 v = amat->a; jj = amat->j; 1395 for (j=0; j<amat->nz; j++) { 1396 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1397 } 1398 v = bmat->a; jj = bmat->j; 1399 for (j=0; j<bmat->nz; j++) { 1400 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1401 } 1402 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1403 for (j=0; j<mat->cmap.N; j++) { 1404 if (tmp2[j] > *norm) *norm = tmp2[j]; 1405 } 1406 ierr = PetscFree(tmp);CHKERRQ(ierr); 1407 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1408 } else if (type == NORM_INFINITY) { /* max row norm */ 1409 PetscReal ntemp = 0.0; 1410 for (j=0; j<aij->A->rmap.n; j++) { 1411 v = amat->a + amat->i[j]; 1412 sum = 0.0; 1413 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1414 sum += PetscAbsScalar(*v); v++; 1415 } 1416 v = bmat->a + bmat->i[j]; 1417 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1418 sum += PetscAbsScalar(*v); v++; 1419 } 1420 if (sum > ntemp) ntemp = sum; 1421 } 1422 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1423 } else { 1424 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1425 } 1426 } 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatTranspose_MPIAIJ" 1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1433 { 1434 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1435 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1436 PetscErrorCode ierr; 1437 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz; 1438 PetscInt cstart=A->cmap.rstart,ncol; 1439 Mat B; 1440 PetscScalar *array; 1441 1442 PetscFunctionBegin; 1443 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1444 1445 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1446 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1447 ai = Aloc->i; aj = Aloc->j; 1448 bi = Bloc->i; bj = Bloc->j; 1449 ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1450 cols = d_nnz + na + 1; /* work space to be used by B part */ 1451 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1452 for (i=0; i<ai[ma]; i++){ 1453 d_nnz[aj[i]] ++; 1454 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1455 } 1456 1457 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1458 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1459 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1460 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1461 1462 /* copy over the A part */ 1463 array = Aloc->a; 1464 row = A->rmap.rstart; 1465 for (i=0; i<ma; i++) { 1466 ncol = ai[i+1]-ai[i]; 1467 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1468 row++; array += ncol; aj += ncol; 1469 } 1470 aj = Aloc->j; 1471 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1472 1473 /* copy over the B part */ 1474 array = Bloc->a; 1475 row = A->rmap.rstart; 1476 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1477 for (i=0; i<mb; i++) { 1478 ncol = bi[i+1]-bi[i]; 1479 ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1480 row++; array += ncol; cols += ncol; 1481 } 1482 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1483 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1484 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1485 if (matout) { 1486 *matout = B; 1487 } else { 1488 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1489 } 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1495 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1496 { 1497 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1498 Mat a = aij->A,b = aij->B; 1499 PetscErrorCode ierr; 1500 PetscInt s1,s2,s3; 1501 1502 PetscFunctionBegin; 1503 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1504 if (rr) { 1505 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1506 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1507 /* Overlap communication with computation. */ 1508 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1509 } 1510 if (ll) { 1511 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1512 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1513 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1514 } 1515 /* scale the diagonal block */ 1516 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1517 1518 if (rr) { 1519 /* Do a scatter end and then right scale the off-diagonal block */ 1520 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1522 } 1523 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1529 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1530 { 1531 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1532 PetscErrorCode ierr; 1533 1534 PetscFunctionBegin; 1535 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1536 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1541 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1542 { 1543 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1544 PetscErrorCode ierr; 1545 1546 PetscFunctionBegin; 1547 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatEqual_MPIAIJ" 1553 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1554 { 1555 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1556 Mat a,b,c,d; 1557 PetscTruth flg; 1558 PetscErrorCode ierr; 1559 1560 PetscFunctionBegin; 1561 a = matA->A; b = matA->B; 1562 c = matB->A; d = matB->B; 1563 1564 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1565 if (flg) { 1566 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1567 } 1568 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1569 PetscFunctionReturn(0); 1570 } 1571 1572 #undef __FUNCT__ 1573 #define __FUNCT__ "MatCopy_MPIAIJ" 1574 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1575 { 1576 PetscErrorCode ierr; 1577 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1578 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1579 1580 PetscFunctionBegin; 1581 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1582 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1583 /* because of the column compression in the off-processor part of the matrix a->B, 1584 the number of columns in a->B and b->B may be different, hence we cannot call 1585 the MatCopy() directly on the two parts. If need be, we can provide a more 1586 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1587 then copying the submatrices */ 1588 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1589 } else { 1590 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1591 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1592 } 1593 PetscFunctionReturn(0); 1594 } 1595 1596 #undef __FUNCT__ 1597 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1598 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1599 { 1600 PetscErrorCode ierr; 1601 1602 PetscFunctionBegin; 1603 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1604 PetscFunctionReturn(0); 1605 } 1606 1607 #include "petscblaslapack.h" 1608 #undef __FUNCT__ 1609 #define __FUNCT__ "MatAXPY_MPIAIJ" 1610 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1611 { 1612 PetscErrorCode ierr; 1613 PetscInt i; 1614 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1615 PetscBLASInt bnz,one=1; 1616 Mat_SeqAIJ *x,*y; 1617 1618 PetscFunctionBegin; 1619 if (str == SAME_NONZERO_PATTERN) { 1620 PetscScalar alpha = a; 1621 x = (Mat_SeqAIJ *)xx->A->data; 1622 y = (Mat_SeqAIJ *)yy->A->data; 1623 bnz = (PetscBLASInt)x->nz; 1624 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1625 x = (Mat_SeqAIJ *)xx->B->data; 1626 y = (Mat_SeqAIJ *)yy->B->data; 1627 bnz = (PetscBLASInt)x->nz; 1628 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1629 } else if (str == SUBSET_NONZERO_PATTERN) { 1630 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1631 1632 x = (Mat_SeqAIJ *)xx->B->data; 1633 y = (Mat_SeqAIJ *)yy->B->data; 1634 if (y->xtoy && y->XtoY != xx->B) { 1635 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1636 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1637 } 1638 if (!y->xtoy) { /* get xtoy */ 1639 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1640 y->XtoY = xx->B; 1641 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1642 } 1643 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1644 } else { 1645 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1646 } 1647 PetscFunctionReturn(0); 1648 } 1649 1650 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1651 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatConjugate_MPIAIJ" 1654 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1655 { 1656 #if defined(PETSC_USE_COMPLEX) 1657 PetscErrorCode ierr; 1658 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1659 1660 PetscFunctionBegin; 1661 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1662 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1663 #else 1664 PetscFunctionBegin; 1665 #endif 1666 PetscFunctionReturn(0); 1667 } 1668 1669 #undef __FUNCT__ 1670 #define __FUNCT__ "MatRealPart_MPIAIJ" 1671 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1672 { 1673 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1674 PetscErrorCode ierr; 1675 1676 PetscFunctionBegin; 1677 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1678 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1684 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1685 { 1686 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1687 PetscErrorCode ierr; 1688 1689 PetscFunctionBegin; 1690 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1691 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1692 PetscFunctionReturn(0); 1693 } 1694 1695 #ifdef PETSC_HAVE_PBGL 1696 1697 #include <boost/parallel/mpi/bsp_process_group.hpp> 1698 #include <boost/graph/distributed/ilu_default_graph.hpp> 1699 #include <boost/graph/distributed/ilu_0_block.hpp> 1700 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1701 #include <boost/graph/distributed/petsc/interface.hpp> 1702 #include <boost/multi_array.hpp> 1703 #include <boost/parallel/distributed_property_map.hpp> 1704 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1707 /* 1708 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1709 */ 1710 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1711 { 1712 namespace petsc = boost::distributed::petsc; 1713 1714 namespace graph_dist = boost::graph::distributed; 1715 using boost::graph::distributed::ilu_default::process_group_type; 1716 using boost::graph::ilu_permuted; 1717 1718 PetscTruth row_identity, col_identity; 1719 PetscContainer c; 1720 PetscInt m, n, M, N; 1721 PetscErrorCode ierr; 1722 1723 PetscFunctionBegin; 1724 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1725 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1726 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1727 if (!row_identity || !col_identity) { 1728 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1729 } 1730 1731 process_group_type pg; 1732 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1733 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1734 lgraph_type& level_graph = *lgraph_p; 1735 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1736 1737 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1738 ilu_permuted(level_graph); 1739 1740 /* put together the new matrix */ 1741 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1742 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1743 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1744 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1745 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1746 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1747 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1748 (*fact)->factor = FACTOR_LU; 1749 1750 ierr = PetscContainerCreate(A->comm, &c); 1751 ierr = PetscContainerSetPointer(c, lgraph_p); 1752 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1753 PetscFunctionReturn(0); 1754 } 1755 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1758 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1759 { 1760 PetscFunctionBegin; 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatSolve_MPIAIJ" 1766 /* 1767 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1768 */ 1769 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1770 { 1771 namespace graph_dist = boost::graph::distributed; 1772 1773 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1774 lgraph_type* lgraph_p; 1775 PetscContainer c; 1776 PetscErrorCode ierr; 1777 1778 PetscFunctionBegin; 1779 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1780 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1781 ierr = VecCopy(b, x); CHKERRQ(ierr); 1782 1783 PetscScalar* array_x; 1784 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1785 PetscInt sx; 1786 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1787 1788 PetscScalar* array_b; 1789 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1790 PetscInt sb; 1791 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1792 1793 lgraph_type& level_graph = *lgraph_p; 1794 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1795 1796 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1797 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1798 ref_x(array_x, boost::extents[num_vertices(graph)]); 1799 1800 typedef boost::iterator_property_map<array_ref_type::iterator, 1801 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1802 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1803 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1804 1805 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1806 1807 PetscFunctionReturn(0); 1808 } 1809 #endif 1810 1811 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1812 PetscInt nzlocal,nsends,nrecvs; 1813 PetscMPIInt *send_rank; 1814 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1815 PetscScalar *sbuf_a,**rbuf_a; 1816 PetscErrorCode (*MatDestroy)(Mat); 1817 } Mat_Redundant; 1818 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1821 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1822 { 1823 PetscErrorCode ierr; 1824 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1825 PetscInt i; 1826 1827 PetscFunctionBegin; 1828 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1829 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1830 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1831 for (i=0; i<redund->nrecvs; i++){ 1832 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1833 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1834 } 1835 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1836 ierr = PetscFree(redund);CHKERRQ(ierr); 1837 PetscFunctionReturn(0); 1838 } 1839 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "MatDestroy_MatRedundant" 1842 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1843 { 1844 PetscErrorCode ierr; 1845 PetscContainer container; 1846 Mat_Redundant *redund=PETSC_NULL; 1847 1848 PetscFunctionBegin; 1849 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1850 if (container) { 1851 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1852 } else { 1853 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1854 } 1855 A->ops->destroy = redund->MatDestroy; 1856 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 1857 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 1858 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 1859 PetscFunctionReturn(0); 1860 } 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 1864 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 1865 { 1866 PetscMPIInt rank,size; 1867 MPI_Comm comm=mat->comm; 1868 PetscErrorCode ierr; 1869 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 1870 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 1871 PetscInt *rowrange=mat->rmap.range; 1872 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1873 Mat A=aij->A,B=aij->B,C=*matredundant; 1874 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1875 PetscScalar *sbuf_a; 1876 PetscInt nzlocal=a->nz+b->nz; 1877 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 1878 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 1879 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 1880 PetscScalar *vals,*aworkA,*aworkB; 1881 PetscMPIInt tag1,tag2,tag3,imdex; 1882 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 1883 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 1884 MPI_Status recv_status,*send_status; 1885 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 1886 PetscInt **rbuf_j=PETSC_NULL; 1887 PetscScalar **rbuf_a=PETSC_NULL; 1888 Mat_Redundant *redund=PETSC_NULL; 1889 PetscContainer container; 1890 1891 PetscFunctionBegin; 1892 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1893 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1894 1895 if (reuse == MAT_REUSE_MATRIX) { 1896 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 1897 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 1898 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 1899 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 1900 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1901 if (container) { 1902 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1903 } else { 1904 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1905 } 1906 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 1907 1908 nsends = redund->nsends; 1909 nrecvs = redund->nrecvs; 1910 send_rank = redund->send_rank; recv_rank = send_rank + size; 1911 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 1912 sbuf_j = redund->sbuf_j; 1913 sbuf_a = redund->sbuf_a; 1914 rbuf_j = redund->rbuf_j; 1915 rbuf_a = redund->rbuf_a; 1916 } 1917 1918 if (reuse == MAT_INITIAL_MATRIX){ 1919 PetscMPIInt subrank,subsize; 1920 PetscInt nleftover,np_subcomm; 1921 /* get the destination processors' id send_rank, nsends and nrecvs */ 1922 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 1923 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 1924 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 1925 recv_rank = send_rank + size; 1926 np_subcomm = size/nsubcomm; 1927 nleftover = size - nsubcomm*np_subcomm; 1928 nsends = 0; nrecvs = 0; 1929 for (i=0; i<size; i++){ /* i=rank*/ 1930 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 1931 send_rank[nsends] = i; nsends++; 1932 recv_rank[nrecvs++] = i; 1933 } 1934 } 1935 if (rank >= size - nleftover){/* this proc is a leftover processor */ 1936 i = size-nleftover-1; 1937 j = 0; 1938 while (j < nsubcomm - nleftover){ 1939 send_rank[nsends++] = i; 1940 i--; j++; 1941 } 1942 } 1943 1944 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 1945 for (i=0; i<nleftover; i++){ 1946 recv_rank[nrecvs++] = size-nleftover+i; 1947 } 1948 } 1949 1950 /* allocate sbuf_j, sbuf_a */ 1951 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 1952 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 1953 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 1954 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1955 1956 /* copy mat's local entries into the buffers */ 1957 if (reuse == MAT_INITIAL_MATRIX){ 1958 rownz_max = 0; 1959 rptr = sbuf_j; 1960 cols = sbuf_j + rend-rstart + 1; 1961 vals = sbuf_a; 1962 rptr[0] = 0; 1963 for (i=0; i<rend-rstart; i++){ 1964 row = i + rstart; 1965 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1966 ncols = nzA + nzB; 1967 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1968 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1969 /* load the column indices for this row into cols */ 1970 lwrite = 0; 1971 for (l=0; l<nzB; l++) { 1972 if ((ctmp = bmap[cworkB[l]]) < cstart){ 1973 vals[lwrite] = aworkB[l]; 1974 cols[lwrite++] = ctmp; 1975 } 1976 } 1977 for (l=0; l<nzA; l++){ 1978 vals[lwrite] = aworkA[l]; 1979 cols[lwrite++] = cstart + cworkA[l]; 1980 } 1981 for (l=0; l<nzB; l++) { 1982 if ((ctmp = bmap[cworkB[l]]) >= cend){ 1983 vals[lwrite] = aworkB[l]; 1984 cols[lwrite++] = ctmp; 1985 } 1986 } 1987 vals += ncols; 1988 cols += ncols; 1989 rptr[i+1] = rptr[i] + ncols; 1990 if (rownz_max < ncols) rownz_max = ncols; 1991 } 1992 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 1993 } else { /* only copy matrix values into sbuf_a */ 1994 rptr = sbuf_j; 1995 vals = sbuf_a; 1996 rptr[0] = 0; 1997 for (i=0; i<rend-rstart; i++){ 1998 row = i + rstart; 1999 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2000 ncols = nzA + nzB; 2001 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2002 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2003 lwrite = 0; 2004 for (l=0; l<nzB; l++) { 2005 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2006 } 2007 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2008 for (l=0; l<nzB; l++) { 2009 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2010 } 2011 vals += ncols; 2012 rptr[i+1] = rptr[i] + ncols; 2013 } 2014 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2015 2016 /* send nzlocal to others, and recv other's nzlocal */ 2017 /*--------------------------------------------------*/ 2018 if (reuse == MAT_INITIAL_MATRIX){ 2019 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2020 s_waits2 = s_waits3 + nsends; 2021 s_waits1 = s_waits2 + nsends; 2022 r_waits1 = s_waits1 + nsends; 2023 r_waits2 = r_waits1 + nrecvs; 2024 r_waits3 = r_waits2 + nrecvs; 2025 } else { 2026 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2027 r_waits3 = s_waits3 + nsends; 2028 } 2029 2030 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2031 if (reuse == MAT_INITIAL_MATRIX){ 2032 /* get new tags to keep the communication clean */ 2033 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2034 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2035 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2036 rbuf_nz = sbuf_nz + nsends; 2037 2038 /* post receives of other's nzlocal */ 2039 for (i=0; i<nrecvs; i++){ 2040 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2041 } 2042 /* send nzlocal to others */ 2043 for (i=0; i<nsends; i++){ 2044 sbuf_nz[i] = nzlocal; 2045 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2046 } 2047 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2048 count = nrecvs; 2049 while (count) { 2050 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2051 recv_rank[imdex] = recv_status.MPI_SOURCE; 2052 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2053 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2054 2055 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2056 rbuf_nz[imdex] += i + 2; 2057 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2058 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2059 count--; 2060 } 2061 /* wait on sends of nzlocal */ 2062 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2063 /* send mat->i,j to others, and recv from other's */ 2064 /*------------------------------------------------*/ 2065 for (i=0; i<nsends; i++){ 2066 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2067 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2068 } 2069 /* wait on receives of mat->i,j */ 2070 /*------------------------------*/ 2071 count = nrecvs; 2072 while (count) { 2073 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2074 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2075 count--; 2076 } 2077 /* wait on sends of mat->i,j */ 2078 /*---------------------------*/ 2079 if (nsends) { 2080 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2081 } 2082 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2083 2084 /* post receives, send and receive mat->a */ 2085 /*----------------------------------------*/ 2086 for (imdex=0; imdex<nrecvs; imdex++) { 2087 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2088 } 2089 for (i=0; i<nsends; i++){ 2090 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2091 } 2092 count = nrecvs; 2093 while (count) { 2094 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2095 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2096 count--; 2097 } 2098 if (nsends) { 2099 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2100 } 2101 2102 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2103 2104 /* create redundant matrix */ 2105 /*-------------------------*/ 2106 if (reuse == MAT_INITIAL_MATRIX){ 2107 /* compute rownz_max for preallocation */ 2108 for (imdex=0; imdex<nrecvs; imdex++){ 2109 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2110 rptr = rbuf_j[imdex]; 2111 for (i=0; i<j; i++){ 2112 ncols = rptr[i+1] - rptr[i]; 2113 if (rownz_max < ncols) rownz_max = ncols; 2114 } 2115 } 2116 2117 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2118 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2119 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2120 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2121 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2122 } else { 2123 C = *matredundant; 2124 } 2125 2126 /* insert local matrix entries */ 2127 rptr = sbuf_j; 2128 cols = sbuf_j + rend-rstart + 1; 2129 vals = sbuf_a; 2130 for (i=0; i<rend-rstart; i++){ 2131 row = i + rstart; 2132 ncols = rptr[i+1] - rptr[i]; 2133 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2134 vals += ncols; 2135 cols += ncols; 2136 } 2137 /* insert received matrix entries */ 2138 for (imdex=0; imdex<nrecvs; imdex++){ 2139 rstart = rowrange[recv_rank[imdex]]; 2140 rend = rowrange[recv_rank[imdex]+1]; 2141 rptr = rbuf_j[imdex]; 2142 cols = rbuf_j[imdex] + rend-rstart + 1; 2143 vals = rbuf_a[imdex]; 2144 for (i=0; i<rend-rstart; i++){ 2145 row = i + rstart; 2146 ncols = rptr[i+1] - rptr[i]; 2147 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2148 vals += ncols; 2149 cols += ncols; 2150 } 2151 } 2152 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2153 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2154 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2155 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2156 if (reuse == MAT_INITIAL_MATRIX){ 2157 PetscContainer container; 2158 *matredundant = C; 2159 /* create a supporting struct and attach it to C for reuse */ 2160 ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr); 2161 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2162 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2163 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2164 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2165 2166 redund->nzlocal = nzlocal; 2167 redund->nsends = nsends; 2168 redund->nrecvs = nrecvs; 2169 redund->send_rank = send_rank; 2170 redund->sbuf_nz = sbuf_nz; 2171 redund->sbuf_j = sbuf_j; 2172 redund->sbuf_a = sbuf_a; 2173 redund->rbuf_j = rbuf_j; 2174 redund->rbuf_a = rbuf_a; 2175 2176 redund->MatDestroy = C->ops->destroy; 2177 C->ops->destroy = MatDestroy_MatRedundant; 2178 } 2179 PetscFunctionReturn(0); 2180 } 2181 2182 /* -------------------------------------------------------------------*/ 2183 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2184 MatGetRow_MPIAIJ, 2185 MatRestoreRow_MPIAIJ, 2186 MatMult_MPIAIJ, 2187 /* 4*/ MatMultAdd_MPIAIJ, 2188 MatMultTranspose_MPIAIJ, 2189 MatMultTransposeAdd_MPIAIJ, 2190 #ifdef PETSC_HAVE_PBGL 2191 MatSolve_MPIAIJ, 2192 #else 2193 0, 2194 #endif 2195 0, 2196 0, 2197 /*10*/ 0, 2198 0, 2199 0, 2200 MatRelax_MPIAIJ, 2201 MatTranspose_MPIAIJ, 2202 /*15*/ MatGetInfo_MPIAIJ, 2203 MatEqual_MPIAIJ, 2204 MatGetDiagonal_MPIAIJ, 2205 MatDiagonalScale_MPIAIJ, 2206 MatNorm_MPIAIJ, 2207 /*20*/ MatAssemblyBegin_MPIAIJ, 2208 MatAssemblyEnd_MPIAIJ, 2209 0, 2210 MatSetOption_MPIAIJ, 2211 MatZeroEntries_MPIAIJ, 2212 /*25*/ MatZeroRows_MPIAIJ, 2213 0, 2214 #ifdef PETSC_HAVE_PBGL 2215 MatLUFactorNumeric_MPIAIJ, 2216 #else 2217 0, 2218 #endif 2219 0, 2220 0, 2221 /*30*/ MatSetUpPreallocation_MPIAIJ, 2222 #ifdef PETSC_HAVE_PBGL 2223 MatILUFactorSymbolic_MPIAIJ, 2224 #else 2225 0, 2226 #endif 2227 0, 2228 0, 2229 0, 2230 /*35*/ MatDuplicate_MPIAIJ, 2231 0, 2232 0, 2233 0, 2234 0, 2235 /*40*/ MatAXPY_MPIAIJ, 2236 MatGetSubMatrices_MPIAIJ, 2237 MatIncreaseOverlap_MPIAIJ, 2238 MatGetValues_MPIAIJ, 2239 MatCopy_MPIAIJ, 2240 /*45*/ 0, 2241 MatScale_MPIAIJ, 2242 0, 2243 0, 2244 0, 2245 /*50*/ MatSetBlockSize_MPIAIJ, 2246 0, 2247 0, 2248 0, 2249 0, 2250 /*55*/ MatFDColoringCreate_MPIAIJ, 2251 0, 2252 MatSetUnfactored_MPIAIJ, 2253 MatPermute_MPIAIJ, 2254 0, 2255 /*60*/ MatGetSubMatrix_MPIAIJ, 2256 MatDestroy_MPIAIJ, 2257 MatView_MPIAIJ, 2258 0, 2259 0, 2260 /*65*/ 0, 2261 0, 2262 0, 2263 0, 2264 0, 2265 /*70*/ 0, 2266 0, 2267 MatSetColoring_MPIAIJ, 2268 #if defined(PETSC_HAVE_ADIC) 2269 MatSetValuesAdic_MPIAIJ, 2270 #else 2271 0, 2272 #endif 2273 MatSetValuesAdifor_MPIAIJ, 2274 /*75*/ 0, 2275 0, 2276 0, 2277 0, 2278 0, 2279 /*80*/ 0, 2280 0, 2281 0, 2282 0, 2283 /*84*/ MatLoad_MPIAIJ, 2284 0, 2285 0, 2286 0, 2287 0, 2288 0, 2289 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2290 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2291 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2292 MatPtAP_Basic, 2293 MatPtAPSymbolic_MPIAIJ, 2294 /*95*/ MatPtAPNumeric_MPIAIJ, 2295 0, 2296 0, 2297 0, 2298 0, 2299 /*100*/0, 2300 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2301 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2302 MatConjugate_MPIAIJ, 2303 0, 2304 /*105*/MatSetValuesRow_MPIAIJ, 2305 MatRealPart_MPIAIJ, 2306 MatImaginaryPart_MPIAIJ, 2307 0, 2308 0, 2309 /*110*/0, 2310 MatGetRedundantMatrix_MPIAIJ}; 2311 2312 /* ----------------------------------------------------------------------------------------*/ 2313 2314 EXTERN_C_BEGIN 2315 #undef __FUNCT__ 2316 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2317 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2318 { 2319 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2320 PetscErrorCode ierr; 2321 2322 PetscFunctionBegin; 2323 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2324 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2325 PetscFunctionReturn(0); 2326 } 2327 EXTERN_C_END 2328 2329 EXTERN_C_BEGIN 2330 #undef __FUNCT__ 2331 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2332 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2333 { 2334 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2335 PetscErrorCode ierr; 2336 2337 PetscFunctionBegin; 2338 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2339 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } 2342 EXTERN_C_END 2343 2344 #include "petscpc.h" 2345 EXTERN_C_BEGIN 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2348 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2349 { 2350 Mat_MPIAIJ *b; 2351 PetscErrorCode ierr; 2352 PetscInt i; 2353 2354 PetscFunctionBegin; 2355 B->preallocated = PETSC_TRUE; 2356 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2357 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2358 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2359 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2360 2361 B->rmap.bs = B->cmap.bs = 1; 2362 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2363 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2364 if (d_nnz) { 2365 for (i=0; i<B->rmap.n; i++) { 2366 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2367 } 2368 } 2369 if (o_nnz) { 2370 for (i=0; i<B->rmap.n; i++) { 2371 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2372 } 2373 } 2374 b = (Mat_MPIAIJ*)B->data; 2375 2376 /* Explicitly create 2 MATSEQAIJ matrices. */ 2377 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2378 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2379 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2380 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2381 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2382 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2383 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2384 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2385 2386 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2387 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2388 2389 PetscFunctionReturn(0); 2390 } 2391 EXTERN_C_END 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2395 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2396 { 2397 Mat mat; 2398 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2399 PetscErrorCode ierr; 2400 2401 PetscFunctionBegin; 2402 *newmat = 0; 2403 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2404 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2405 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2406 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2407 a = (Mat_MPIAIJ*)mat->data; 2408 2409 mat->factor = matin->factor; 2410 mat->rmap.bs = matin->rmap.bs; 2411 mat->assembled = PETSC_TRUE; 2412 mat->insertmode = NOT_SET_VALUES; 2413 mat->preallocated = PETSC_TRUE; 2414 2415 a->size = oldmat->size; 2416 a->rank = oldmat->rank; 2417 a->donotstash = oldmat->donotstash; 2418 a->roworiented = oldmat->roworiented; 2419 a->rowindices = 0; 2420 a->rowvalues = 0; 2421 a->getrowactive = PETSC_FALSE; 2422 2423 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2424 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2425 2426 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2427 if (oldmat->colmap) { 2428 #if defined (PETSC_USE_CTABLE) 2429 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2430 #else 2431 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2432 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2433 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2434 #endif 2435 } else a->colmap = 0; 2436 if (oldmat->garray) { 2437 PetscInt len; 2438 len = oldmat->B->cmap.n; 2439 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2440 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2441 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2442 } else a->garray = 0; 2443 2444 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2445 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2446 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2447 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2448 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2449 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2450 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2451 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2452 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2453 *newmat = mat; 2454 PetscFunctionReturn(0); 2455 } 2456 2457 #include "petscsys.h" 2458 2459 #undef __FUNCT__ 2460 #define __FUNCT__ "MatLoad_MPIAIJ" 2461 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2462 { 2463 Mat A; 2464 PetscScalar *vals,*svals; 2465 MPI_Comm comm = ((PetscObject)viewer)->comm; 2466 MPI_Status status; 2467 PetscErrorCode ierr; 2468 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2469 PetscInt i,nz,j,rstart,rend,mmax; 2470 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2471 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2472 PetscInt cend,cstart,n,*rowners; 2473 int fd; 2474 2475 PetscFunctionBegin; 2476 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2477 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2478 if (!rank) { 2479 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2480 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2481 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2482 } 2483 2484 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2485 M = header[1]; N = header[2]; 2486 /* determine ownership of all rows */ 2487 m = M/size + ((M % size) > rank); 2488 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2489 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2490 2491 /* First process needs enough room for process with most rows */ 2492 if (!rank) { 2493 mmax = rowners[1]; 2494 for (i=2; i<size; i++) { 2495 mmax = PetscMax(mmax,rowners[i]); 2496 } 2497 } else mmax = m; 2498 2499 rowners[0] = 0; 2500 for (i=2; i<=size; i++) { 2501 rowners[i] += rowners[i-1]; 2502 } 2503 rstart = rowners[rank]; 2504 rend = rowners[rank+1]; 2505 2506 /* distribute row lengths to all processors */ 2507 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2508 if (!rank) { 2509 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2510 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2511 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2512 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2513 for (j=0; j<m; j++) { 2514 procsnz[0] += ourlens[j]; 2515 } 2516 for (i=1; i<size; i++) { 2517 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2518 /* calculate the number of nonzeros on each processor */ 2519 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2520 procsnz[i] += rowlengths[j]; 2521 } 2522 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2523 } 2524 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2525 } else { 2526 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2527 } 2528 2529 if (!rank) { 2530 /* determine max buffer needed and allocate it */ 2531 maxnz = 0; 2532 for (i=0; i<size; i++) { 2533 maxnz = PetscMax(maxnz,procsnz[i]); 2534 } 2535 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2536 2537 /* read in my part of the matrix column indices */ 2538 nz = procsnz[0]; 2539 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2540 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2541 2542 /* read in every one elses and ship off */ 2543 for (i=1; i<size; i++) { 2544 nz = procsnz[i]; 2545 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2546 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2547 } 2548 ierr = PetscFree(cols);CHKERRQ(ierr); 2549 } else { 2550 /* determine buffer space needed for message */ 2551 nz = 0; 2552 for (i=0; i<m; i++) { 2553 nz += ourlens[i]; 2554 } 2555 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2556 2557 /* receive message of column indices*/ 2558 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2559 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2560 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2561 } 2562 2563 /* determine column ownership if matrix is not square */ 2564 if (N != M) { 2565 n = N/size + ((N % size) > rank); 2566 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2567 cstart = cend - n; 2568 } else { 2569 cstart = rstart; 2570 cend = rend; 2571 n = cend - cstart; 2572 } 2573 2574 /* loop over local rows, determining number of off diagonal entries */ 2575 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2576 jj = 0; 2577 for (i=0; i<m; i++) { 2578 for (j=0; j<ourlens[i]; j++) { 2579 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2580 jj++; 2581 } 2582 } 2583 2584 /* create our matrix */ 2585 for (i=0; i<m; i++) { 2586 ourlens[i] -= offlens[i]; 2587 } 2588 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2589 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2590 ierr = MatSetType(A,type);CHKERRQ(ierr); 2591 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2592 2593 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2594 for (i=0; i<m; i++) { 2595 ourlens[i] += offlens[i]; 2596 } 2597 2598 if (!rank) { 2599 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2600 2601 /* read in my part of the matrix numerical values */ 2602 nz = procsnz[0]; 2603 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2604 2605 /* insert into matrix */ 2606 jj = rstart; 2607 smycols = mycols; 2608 svals = vals; 2609 for (i=0; i<m; i++) { 2610 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2611 smycols += ourlens[i]; 2612 svals += ourlens[i]; 2613 jj++; 2614 } 2615 2616 /* read in other processors and ship out */ 2617 for (i=1; i<size; i++) { 2618 nz = procsnz[i]; 2619 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2620 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2621 } 2622 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2623 } else { 2624 /* receive numeric values */ 2625 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2626 2627 /* receive message of values*/ 2628 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2629 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2630 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2631 2632 /* insert into matrix */ 2633 jj = rstart; 2634 smycols = mycols; 2635 svals = vals; 2636 for (i=0; i<m; i++) { 2637 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2638 smycols += ourlens[i]; 2639 svals += ourlens[i]; 2640 jj++; 2641 } 2642 } 2643 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2644 ierr = PetscFree(vals);CHKERRQ(ierr); 2645 ierr = PetscFree(mycols);CHKERRQ(ierr); 2646 ierr = PetscFree(rowners);CHKERRQ(ierr); 2647 2648 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2649 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2650 *newmat = A; 2651 PetscFunctionReturn(0); 2652 } 2653 2654 #undef __FUNCT__ 2655 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2656 /* 2657 Not great since it makes two copies of the submatrix, first an SeqAIJ 2658 in local and then by concatenating the local matrices the end result. 2659 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2660 */ 2661 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2662 { 2663 PetscErrorCode ierr; 2664 PetscMPIInt rank,size; 2665 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2666 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2667 Mat *local,M,Mreuse; 2668 PetscScalar *vwork,*aa; 2669 MPI_Comm comm = mat->comm; 2670 Mat_SeqAIJ *aij; 2671 2672 2673 PetscFunctionBegin; 2674 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2675 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2676 2677 if (call == MAT_REUSE_MATRIX) { 2678 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2679 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2680 local = &Mreuse; 2681 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2682 } else { 2683 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2684 Mreuse = *local; 2685 ierr = PetscFree(local);CHKERRQ(ierr); 2686 } 2687 2688 /* 2689 m - number of local rows 2690 n - number of columns (same on all processors) 2691 rstart - first row in new global matrix generated 2692 */ 2693 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2694 if (call == MAT_INITIAL_MATRIX) { 2695 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2696 ii = aij->i; 2697 jj = aij->j; 2698 2699 /* 2700 Determine the number of non-zeros in the diagonal and off-diagonal 2701 portions of the matrix in order to do correct preallocation 2702 */ 2703 2704 /* first get start and end of "diagonal" columns */ 2705 if (csize == PETSC_DECIDE) { 2706 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2707 if (mglobal == n) { /* square matrix */ 2708 nlocal = m; 2709 } else { 2710 nlocal = n/size + ((n % size) > rank); 2711 } 2712 } else { 2713 nlocal = csize; 2714 } 2715 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2716 rstart = rend - nlocal; 2717 if (rank == size - 1 && rend != n) { 2718 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2719 } 2720 2721 /* next, compute all the lengths */ 2722 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2723 olens = dlens + m; 2724 for (i=0; i<m; i++) { 2725 jend = ii[i+1] - ii[i]; 2726 olen = 0; 2727 dlen = 0; 2728 for (j=0; j<jend; j++) { 2729 if (*jj < rstart || *jj >= rend) olen++; 2730 else dlen++; 2731 jj++; 2732 } 2733 olens[i] = olen; 2734 dlens[i] = dlen; 2735 } 2736 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2737 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2738 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2739 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2740 ierr = PetscFree(dlens);CHKERRQ(ierr); 2741 } else { 2742 PetscInt ml,nl; 2743 2744 M = *newmat; 2745 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2746 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2747 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2748 /* 2749 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2750 rather than the slower MatSetValues(). 2751 */ 2752 M->was_assembled = PETSC_TRUE; 2753 M->assembled = PETSC_FALSE; 2754 } 2755 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2756 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2757 ii = aij->i; 2758 jj = aij->j; 2759 aa = aij->a; 2760 for (i=0; i<m; i++) { 2761 row = rstart + i; 2762 nz = ii[i+1] - ii[i]; 2763 cwork = jj; jj += nz; 2764 vwork = aa; aa += nz; 2765 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2766 } 2767 2768 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2769 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2770 *newmat = M; 2771 2772 /* save submatrix used in processor for next request */ 2773 if (call == MAT_INITIAL_MATRIX) { 2774 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2775 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2776 } 2777 2778 PetscFunctionReturn(0); 2779 } 2780 2781 EXTERN_C_BEGIN 2782 #undef __FUNCT__ 2783 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2784 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2785 { 2786 PetscInt m,cstart, cend,j,nnz,i,d; 2787 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2788 const PetscInt *JJ; 2789 PetscScalar *values; 2790 PetscErrorCode ierr; 2791 2792 PetscFunctionBegin; 2793 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2794 2795 B->rmap.bs = B->cmap.bs = 1; 2796 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2797 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2798 m = B->rmap.n; 2799 cstart = B->cmap.rstart; 2800 cend = B->cmap.rend; 2801 rstart = B->rmap.rstart; 2802 2803 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2804 o_nnz = d_nnz + m; 2805 2806 for (i=0; i<m; i++) { 2807 nnz = Ii[i+1]- Ii[i]; 2808 JJ = J + Ii[i]; 2809 nnz_max = PetscMax(nnz_max,nnz); 2810 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2811 for (j=0; j<nnz; j++) { 2812 if (*JJ >= cstart) break; 2813 JJ++; 2814 } 2815 d = 0; 2816 for (; j<nnz; j++) { 2817 if (*JJ++ >= cend) break; 2818 d++; 2819 } 2820 d_nnz[i] = d; 2821 o_nnz[i] = nnz - d; 2822 } 2823 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2824 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2825 2826 if (v) values = (PetscScalar*)v; 2827 else { 2828 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2829 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2830 } 2831 2832 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2833 for (i=0; i<m; i++) { 2834 ii = i + rstart; 2835 nnz = Ii[i+1]- Ii[i]; 2836 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2837 } 2838 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2839 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2840 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2841 2842 if (!v) { 2843 ierr = PetscFree(values);CHKERRQ(ierr); 2844 } 2845 PetscFunctionReturn(0); 2846 } 2847 EXTERN_C_END 2848 2849 #undef __FUNCT__ 2850 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2851 /*@ 2852 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2853 (the default parallel PETSc format). 2854 2855 Collective on MPI_Comm 2856 2857 Input Parameters: 2858 + B - the matrix 2859 . i - the indices into j for the start of each local row (starts with zero) 2860 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2861 - v - optional values in the matrix 2862 2863 Level: developer 2864 2865 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2866 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2867 2868 .keywords: matrix, aij, compressed row, sparse, parallel 2869 2870 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2871 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2872 @*/ 2873 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2874 { 2875 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2876 2877 PetscFunctionBegin; 2878 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2879 if (f) { 2880 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2881 } 2882 PetscFunctionReturn(0); 2883 } 2884 2885 #undef __FUNCT__ 2886 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2887 /*@C 2888 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2889 (the default parallel PETSc format). For good matrix assembly performance 2890 the user should preallocate the matrix storage by setting the parameters 2891 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2892 performance can be increased by more than a factor of 50. 2893 2894 Collective on MPI_Comm 2895 2896 Input Parameters: 2897 + A - the matrix 2898 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2899 (same value is used for all local rows) 2900 . d_nnz - array containing the number of nonzeros in the various rows of the 2901 DIAGONAL portion of the local submatrix (possibly different for each row) 2902 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2903 The size of this array is equal to the number of local rows, i.e 'm'. 2904 You must leave room for the diagonal entry even if it is zero. 2905 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2906 submatrix (same value is used for all local rows). 2907 - o_nnz - array containing the number of nonzeros in the various rows of the 2908 OFF-DIAGONAL portion of the local submatrix (possibly different for 2909 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2910 structure. The size of this array is equal to the number 2911 of local rows, i.e 'm'. 2912 2913 If the *_nnz parameter is given then the *_nz parameter is ignored 2914 2915 The AIJ format (also called the Yale sparse matrix format or 2916 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2917 storage. The stored row and column indices begin with zero. See the users manual for details. 2918 2919 The parallel matrix is partitioned such that the first m0 rows belong to 2920 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2921 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2922 2923 The DIAGONAL portion of the local submatrix of a processor can be defined 2924 as the submatrix which is obtained by extraction the part corresponding 2925 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2926 first row that belongs to the processor, and r2 is the last row belonging 2927 to the this processor. This is a square mxm matrix. The remaining portion 2928 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2929 2930 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2931 2932 Example usage: 2933 2934 Consider the following 8x8 matrix with 34 non-zero values, that is 2935 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2936 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2937 as follows: 2938 2939 .vb 2940 1 2 0 | 0 3 0 | 0 4 2941 Proc0 0 5 6 | 7 0 0 | 8 0 2942 9 0 10 | 11 0 0 | 12 0 2943 ------------------------------------- 2944 13 0 14 | 15 16 17 | 0 0 2945 Proc1 0 18 0 | 19 20 21 | 0 0 2946 0 0 0 | 22 23 0 | 24 0 2947 ------------------------------------- 2948 Proc2 25 26 27 | 0 0 28 | 29 0 2949 30 0 0 | 31 32 33 | 0 34 2950 .ve 2951 2952 This can be represented as a collection of submatrices as: 2953 2954 .vb 2955 A B C 2956 D E F 2957 G H I 2958 .ve 2959 2960 Where the submatrices A,B,C are owned by proc0, D,E,F are 2961 owned by proc1, G,H,I are owned by proc2. 2962 2963 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2964 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2965 The 'M','N' parameters are 8,8, and have the same values on all procs. 2966 2967 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2968 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2969 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2970 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2971 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2972 matrix, ans [DF] as another SeqAIJ matrix. 2973 2974 When d_nz, o_nz parameters are specified, d_nz storage elements are 2975 allocated for every row of the local diagonal submatrix, and o_nz 2976 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2977 One way to choose d_nz and o_nz is to use the max nonzerors per local 2978 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2979 In this case, the values of d_nz,o_nz are: 2980 .vb 2981 proc0 : dnz = 2, o_nz = 2 2982 proc1 : dnz = 3, o_nz = 2 2983 proc2 : dnz = 1, o_nz = 4 2984 .ve 2985 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2986 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2987 for proc3. i.e we are using 12+15+10=37 storage locations to store 2988 34 values. 2989 2990 When d_nnz, o_nnz parameters are specified, the storage is specified 2991 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2992 In the above case the values for d_nnz,o_nnz are: 2993 .vb 2994 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2995 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2996 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2997 .ve 2998 Here the space allocated is sum of all the above values i.e 34, and 2999 hence pre-allocation is perfect. 3000 3001 Level: intermediate 3002 3003 .keywords: matrix, aij, compressed row, sparse, parallel 3004 3005 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3006 MPIAIJ 3007 @*/ 3008 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3009 { 3010 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3011 3012 PetscFunctionBegin; 3013 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3014 if (f) { 3015 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3016 } 3017 PetscFunctionReturn(0); 3018 } 3019 3020 #undef __FUNCT__ 3021 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3022 /*@C 3023 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3024 CSR format the local rows. 3025 3026 Collective on MPI_Comm 3027 3028 Input Parameters: 3029 + comm - MPI communicator 3030 . m - number of local rows (Cannot be PETSC_DECIDE) 3031 . n - This value should be the same as the local size used in creating the 3032 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3033 calculated if N is given) For square matrices n is almost always m. 3034 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3035 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3036 . i - row indices 3037 . j - column indices 3038 - a - matrix values 3039 3040 Output Parameter: 3041 . mat - the matrix 3042 3043 Level: intermediate 3044 3045 Notes: 3046 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3047 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3048 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3049 3050 The i and j indices are 0 based 3051 3052 .keywords: matrix, aij, compressed row, sparse, parallel 3053 3054 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3055 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3056 @*/ 3057 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3058 { 3059 PetscErrorCode ierr; 3060 3061 PetscFunctionBegin; 3062 if (i[0]) { 3063 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3064 } 3065 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3066 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3067 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3068 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3069 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3070 PetscFunctionReturn(0); 3071 } 3072 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "MatCreateMPIAIJ" 3075 /*@C 3076 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3077 (the default parallel PETSc format). For good matrix assembly performance 3078 the user should preallocate the matrix storage by setting the parameters 3079 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3080 performance can be increased by more than a factor of 50. 3081 3082 Collective on MPI_Comm 3083 3084 Input Parameters: 3085 + comm - MPI communicator 3086 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3087 This value should be the same as the local size used in creating the 3088 y vector for the matrix-vector product y = Ax. 3089 . n - This value should be the same as the local size used in creating the 3090 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3091 calculated if N is given) For square matrices n is almost always m. 3092 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3093 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3094 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3095 (same value is used for all local rows) 3096 . d_nnz - array containing the number of nonzeros in the various rows of the 3097 DIAGONAL portion of the local submatrix (possibly different for each row) 3098 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3099 The size of this array is equal to the number of local rows, i.e 'm'. 3100 You must leave room for the diagonal entry even if it is zero. 3101 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3102 submatrix (same value is used for all local rows). 3103 - o_nnz - array containing the number of nonzeros in the various rows of the 3104 OFF-DIAGONAL portion of the local submatrix (possibly different for 3105 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3106 structure. The size of this array is equal to the number 3107 of local rows, i.e 'm'. 3108 3109 Output Parameter: 3110 . A - the matrix 3111 3112 Notes: 3113 If the *_nnz parameter is given then the *_nz parameter is ignored 3114 3115 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3116 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3117 storage requirements for this matrix. 3118 3119 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3120 processor than it must be used on all processors that share the object for 3121 that argument. 3122 3123 The user MUST specify either the local or global matrix dimensions 3124 (possibly both). 3125 3126 The parallel matrix is partitioned across processors such that the 3127 first m0 rows belong to process 0, the next m1 rows belong to 3128 process 1, the next m2 rows belong to process 2 etc.. where 3129 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3130 values corresponding to [m x N] submatrix. 3131 3132 The columns are logically partitioned with the n0 columns belonging 3133 to 0th partition, the next n1 columns belonging to the next 3134 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3135 3136 The DIAGONAL portion of the local submatrix on any given processor 3137 is the submatrix corresponding to the rows and columns m,n 3138 corresponding to the given processor. i.e diagonal matrix on 3139 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3140 etc. The remaining portion of the local submatrix [m x (N-n)] 3141 constitute the OFF-DIAGONAL portion. The example below better 3142 illustrates this concept. 3143 3144 For a square global matrix we define each processor's diagonal portion 3145 to be its local rows and the corresponding columns (a square submatrix); 3146 each processor's off-diagonal portion encompasses the remainder of the 3147 local matrix (a rectangular submatrix). 3148 3149 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3150 3151 When calling this routine with a single process communicator, a matrix of 3152 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3153 type of communicator, use the construction mechanism: 3154 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3155 3156 By default, this format uses inodes (identical nodes) when possible. 3157 We search for consecutive rows with the same nonzero structure, thereby 3158 reusing matrix information to achieve increased efficiency. 3159 3160 Options Database Keys: 3161 + -mat_no_inode - Do not use inodes 3162 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3163 - -mat_aij_oneindex - Internally use indexing starting at 1 3164 rather than 0. Note that when calling MatSetValues(), 3165 the user still MUST index entries starting at 0! 3166 3167 3168 Example usage: 3169 3170 Consider the following 8x8 matrix with 34 non-zero values, that is 3171 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3172 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3173 as follows: 3174 3175 .vb 3176 1 2 0 | 0 3 0 | 0 4 3177 Proc0 0 5 6 | 7 0 0 | 8 0 3178 9 0 10 | 11 0 0 | 12 0 3179 ------------------------------------- 3180 13 0 14 | 15 16 17 | 0 0 3181 Proc1 0 18 0 | 19 20 21 | 0 0 3182 0 0 0 | 22 23 0 | 24 0 3183 ------------------------------------- 3184 Proc2 25 26 27 | 0 0 28 | 29 0 3185 30 0 0 | 31 32 33 | 0 34 3186 .ve 3187 3188 This can be represented as a collection of submatrices as: 3189 3190 .vb 3191 A B C 3192 D E F 3193 G H I 3194 .ve 3195 3196 Where the submatrices A,B,C are owned by proc0, D,E,F are 3197 owned by proc1, G,H,I are owned by proc2. 3198 3199 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3200 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3201 The 'M','N' parameters are 8,8, and have the same values on all procs. 3202 3203 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3204 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3205 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3206 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3207 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3208 matrix, ans [DF] as another SeqAIJ matrix. 3209 3210 When d_nz, o_nz parameters are specified, d_nz storage elements are 3211 allocated for every row of the local diagonal submatrix, and o_nz 3212 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3213 One way to choose d_nz and o_nz is to use the max nonzerors per local 3214 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3215 In this case, the values of d_nz,o_nz are: 3216 .vb 3217 proc0 : dnz = 2, o_nz = 2 3218 proc1 : dnz = 3, o_nz = 2 3219 proc2 : dnz = 1, o_nz = 4 3220 .ve 3221 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3222 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3223 for proc3. i.e we are using 12+15+10=37 storage locations to store 3224 34 values. 3225 3226 When d_nnz, o_nnz parameters are specified, the storage is specified 3227 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3228 In the above case the values for d_nnz,o_nnz are: 3229 .vb 3230 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3231 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3232 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3233 .ve 3234 Here the space allocated is sum of all the above values i.e 34, and 3235 hence pre-allocation is perfect. 3236 3237 Level: intermediate 3238 3239 .keywords: matrix, aij, compressed row, sparse, parallel 3240 3241 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3242 MPIAIJ, MatCreateMPIAIJWithArrays() 3243 @*/ 3244 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3245 { 3246 PetscErrorCode ierr; 3247 PetscMPIInt size; 3248 3249 PetscFunctionBegin; 3250 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3251 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3252 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3253 if (size > 1) { 3254 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3255 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3256 } else { 3257 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3258 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3259 } 3260 PetscFunctionReturn(0); 3261 } 3262 3263 #undef __FUNCT__ 3264 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3265 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3266 { 3267 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3268 3269 PetscFunctionBegin; 3270 *Ad = a->A; 3271 *Ao = a->B; 3272 *colmap = a->garray; 3273 PetscFunctionReturn(0); 3274 } 3275 3276 #undef __FUNCT__ 3277 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3278 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3279 { 3280 PetscErrorCode ierr; 3281 PetscInt i; 3282 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3283 3284 PetscFunctionBegin; 3285 if (coloring->ctype == IS_COLORING_GLOBAL) { 3286 ISColoringValue *allcolors,*colors; 3287 ISColoring ocoloring; 3288 3289 /* set coloring for diagonal portion */ 3290 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3291 3292 /* set coloring for off-diagonal portion */ 3293 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3294 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3295 for (i=0; i<a->B->cmap.n; i++) { 3296 colors[i] = allcolors[a->garray[i]]; 3297 } 3298 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3299 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3300 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3301 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3302 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3303 ISColoringValue *colors; 3304 PetscInt *larray; 3305 ISColoring ocoloring; 3306 3307 /* set coloring for diagonal portion */ 3308 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3309 for (i=0; i<a->A->cmap.n; i++) { 3310 larray[i] = i + A->cmap.rstart; 3311 } 3312 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3313 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3314 for (i=0; i<a->A->cmap.n; i++) { 3315 colors[i] = coloring->colors[larray[i]]; 3316 } 3317 ierr = PetscFree(larray);CHKERRQ(ierr); 3318 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3319 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3320 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3321 3322 /* set coloring for off-diagonal portion */ 3323 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3324 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3325 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3326 for (i=0; i<a->B->cmap.n; i++) { 3327 colors[i] = coloring->colors[larray[i]]; 3328 } 3329 ierr = PetscFree(larray);CHKERRQ(ierr); 3330 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3331 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3332 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3333 } else { 3334 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3335 } 3336 3337 PetscFunctionReturn(0); 3338 } 3339 3340 #if defined(PETSC_HAVE_ADIC) 3341 #undef __FUNCT__ 3342 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3343 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3344 { 3345 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3346 PetscErrorCode ierr; 3347 3348 PetscFunctionBegin; 3349 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3350 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3351 PetscFunctionReturn(0); 3352 } 3353 #endif 3354 3355 #undef __FUNCT__ 3356 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3357 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3358 { 3359 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3360 PetscErrorCode ierr; 3361 3362 PetscFunctionBegin; 3363 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3364 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3365 PetscFunctionReturn(0); 3366 } 3367 3368 #undef __FUNCT__ 3369 #define __FUNCT__ "MatMerge" 3370 /*@C 3371 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3372 matrices from each processor 3373 3374 Collective on MPI_Comm 3375 3376 Input Parameters: 3377 + comm - the communicators the parallel matrix will live on 3378 . inmat - the input sequential matrices 3379 . n - number of local columns (or PETSC_DECIDE) 3380 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3381 3382 Output Parameter: 3383 . outmat - the parallel matrix generated 3384 3385 Level: advanced 3386 3387 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3388 3389 @*/ 3390 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3391 { 3392 PetscErrorCode ierr; 3393 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3394 PetscInt *indx; 3395 PetscScalar *values; 3396 3397 PetscFunctionBegin; 3398 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3399 if (scall == MAT_INITIAL_MATRIX){ 3400 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3401 if (n == PETSC_DECIDE){ 3402 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3403 } 3404 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3405 rstart -= m; 3406 3407 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3408 for (i=0;i<m;i++) { 3409 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3410 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3411 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3412 } 3413 /* This routine will ONLY return MPIAIJ type matrix */ 3414 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3415 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3416 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3417 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3418 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3419 3420 } else if (scall == MAT_REUSE_MATRIX){ 3421 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3422 } else { 3423 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3424 } 3425 3426 for (i=0;i<m;i++) { 3427 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3428 Ii = i + rstart; 3429 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3430 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3431 } 3432 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3433 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3434 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3435 3436 PetscFunctionReturn(0); 3437 } 3438 3439 #undef __FUNCT__ 3440 #define __FUNCT__ "MatFileSplit" 3441 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3442 { 3443 PetscErrorCode ierr; 3444 PetscMPIInt rank; 3445 PetscInt m,N,i,rstart,nnz; 3446 size_t len; 3447 const PetscInt *indx; 3448 PetscViewer out; 3449 char *name; 3450 Mat B; 3451 const PetscScalar *values; 3452 3453 PetscFunctionBegin; 3454 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3455 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3456 /* Should this be the type of the diagonal block of A? */ 3457 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3458 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3459 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3460 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3461 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3462 for (i=0;i<m;i++) { 3463 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3464 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3465 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3466 } 3467 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3468 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3469 3470 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3471 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3472 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3473 sprintf(name,"%s.%d",outfile,rank); 3474 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3475 ierr = PetscFree(name); 3476 ierr = MatView(B,out);CHKERRQ(ierr); 3477 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3478 ierr = MatDestroy(B);CHKERRQ(ierr); 3479 PetscFunctionReturn(0); 3480 } 3481 3482 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3483 #undef __FUNCT__ 3484 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3485 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3486 { 3487 PetscErrorCode ierr; 3488 Mat_Merge_SeqsToMPI *merge; 3489 PetscContainer container; 3490 3491 PetscFunctionBegin; 3492 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3493 if (container) { 3494 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3495 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3496 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3497 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3498 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3499 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3500 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3501 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3502 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3503 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3504 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3505 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3506 3507 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3508 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3509 } 3510 ierr = PetscFree(merge);CHKERRQ(ierr); 3511 3512 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3513 PetscFunctionReturn(0); 3514 } 3515 3516 #include "src/mat/utils/freespace.h" 3517 #include "petscbt.h" 3518 static PetscEvent logkey_seqstompinum = 0; 3519 #undef __FUNCT__ 3520 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3521 /*@C 3522 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3523 matrices from each processor 3524 3525 Collective on MPI_Comm 3526 3527 Input Parameters: 3528 + comm - the communicators the parallel matrix will live on 3529 . seqmat - the input sequential matrices 3530 . m - number of local rows (or PETSC_DECIDE) 3531 . n - number of local columns (or PETSC_DECIDE) 3532 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3533 3534 Output Parameter: 3535 . mpimat - the parallel matrix generated 3536 3537 Level: advanced 3538 3539 Notes: 3540 The dimensions of the sequential matrix in each processor MUST be the same. 3541 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3542 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3543 @*/ 3544 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3545 { 3546 PetscErrorCode ierr; 3547 MPI_Comm comm=mpimat->comm; 3548 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3549 PetscMPIInt size,rank,taga,*len_s; 3550 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3551 PetscInt proc,m; 3552 PetscInt **buf_ri,**buf_rj; 3553 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3554 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3555 MPI_Request *s_waits,*r_waits; 3556 MPI_Status *status; 3557 MatScalar *aa=a->a,**abuf_r,*ba_i; 3558 Mat_Merge_SeqsToMPI *merge; 3559 PetscContainer container; 3560 3561 PetscFunctionBegin; 3562 if (!logkey_seqstompinum) { 3563 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3564 } 3565 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3566 3567 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3568 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3569 3570 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3571 if (container) { 3572 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3573 } 3574 bi = merge->bi; 3575 bj = merge->bj; 3576 buf_ri = merge->buf_ri; 3577 buf_rj = merge->buf_rj; 3578 3579 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3580 owners = merge->rowmap.range; 3581 len_s = merge->len_s; 3582 3583 /* send and recv matrix values */ 3584 /*-----------------------------*/ 3585 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3586 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3587 3588 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3589 for (proc=0,k=0; proc<size; proc++){ 3590 if (!len_s[proc]) continue; 3591 i = owners[proc]; 3592 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3593 k++; 3594 } 3595 3596 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3597 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3598 ierr = PetscFree(status);CHKERRQ(ierr); 3599 3600 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3601 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3602 3603 /* insert mat values of mpimat */ 3604 /*----------------------------*/ 3605 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3606 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3607 nextrow = buf_ri_k + merge->nrecv; 3608 nextai = nextrow + merge->nrecv; 3609 3610 for (k=0; k<merge->nrecv; k++){ 3611 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3612 nrows = *(buf_ri_k[k]); 3613 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3614 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3615 } 3616 3617 /* set values of ba */ 3618 m = merge->rowmap.n; 3619 for (i=0; i<m; i++) { 3620 arow = owners[rank] + i; 3621 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3622 bnzi = bi[i+1] - bi[i]; 3623 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3624 3625 /* add local non-zero vals of this proc's seqmat into ba */ 3626 anzi = ai[arow+1] - ai[arow]; 3627 aj = a->j + ai[arow]; 3628 aa = a->a + ai[arow]; 3629 nextaj = 0; 3630 for (j=0; nextaj<anzi; j++){ 3631 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3632 ba_i[j] += aa[nextaj++]; 3633 } 3634 } 3635 3636 /* add received vals into ba */ 3637 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3638 /* i-th row */ 3639 if (i == *nextrow[k]) { 3640 anzi = *(nextai[k]+1) - *nextai[k]; 3641 aj = buf_rj[k] + *(nextai[k]); 3642 aa = abuf_r[k] + *(nextai[k]); 3643 nextaj = 0; 3644 for (j=0; nextaj<anzi; j++){ 3645 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3646 ba_i[j] += aa[nextaj++]; 3647 } 3648 } 3649 nextrow[k]++; nextai[k]++; 3650 } 3651 } 3652 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3653 } 3654 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3655 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3656 3657 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3658 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3659 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3660 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3661 PetscFunctionReturn(0); 3662 } 3663 3664 static PetscEvent logkey_seqstompisym = 0; 3665 #undef __FUNCT__ 3666 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3667 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3668 { 3669 PetscErrorCode ierr; 3670 Mat B_mpi; 3671 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3672 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3673 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3674 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3675 PetscInt len,proc,*dnz,*onz; 3676 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3677 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3678 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3679 MPI_Status *status; 3680 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3681 PetscBT lnkbt; 3682 Mat_Merge_SeqsToMPI *merge; 3683 PetscContainer container; 3684 3685 PetscFunctionBegin; 3686 if (!logkey_seqstompisym) { 3687 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3688 } 3689 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3690 3691 /* make sure it is a PETSc comm */ 3692 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3693 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3694 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3695 3696 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3697 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3698 3699 /* determine row ownership */ 3700 /*---------------------------------------------------------*/ 3701 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3702 merge->rowmap.n = m; 3703 merge->rowmap.N = M; 3704 merge->rowmap.bs = 1; 3705 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 3706 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3707 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3708 3709 m = merge->rowmap.n; 3710 M = merge->rowmap.N; 3711 owners = merge->rowmap.range; 3712 3713 /* determine the number of messages to send, their lengths */ 3714 /*---------------------------------------------------------*/ 3715 len_s = merge->len_s; 3716 3717 len = 0; /* length of buf_si[] */ 3718 merge->nsend = 0; 3719 for (proc=0; proc<size; proc++){ 3720 len_si[proc] = 0; 3721 if (proc == rank){ 3722 len_s[proc] = 0; 3723 } else { 3724 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3725 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3726 } 3727 if (len_s[proc]) { 3728 merge->nsend++; 3729 nrows = 0; 3730 for (i=owners[proc]; i<owners[proc+1]; i++){ 3731 if (ai[i+1] > ai[i]) nrows++; 3732 } 3733 len_si[proc] = 2*(nrows+1); 3734 len += len_si[proc]; 3735 } 3736 } 3737 3738 /* determine the number and length of messages to receive for ij-structure */ 3739 /*-------------------------------------------------------------------------*/ 3740 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3741 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3742 3743 /* post the Irecv of j-structure */ 3744 /*-------------------------------*/ 3745 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3746 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3747 3748 /* post the Isend of j-structure */ 3749 /*--------------------------------*/ 3750 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3751 sj_waits = si_waits + merge->nsend; 3752 3753 for (proc=0, k=0; proc<size; proc++){ 3754 if (!len_s[proc]) continue; 3755 i = owners[proc]; 3756 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3757 k++; 3758 } 3759 3760 /* receives and sends of j-structure are complete */ 3761 /*------------------------------------------------*/ 3762 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3763 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3764 3765 /* send and recv i-structure */ 3766 /*---------------------------*/ 3767 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3768 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3769 3770 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3771 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3772 for (proc=0,k=0; proc<size; proc++){ 3773 if (!len_s[proc]) continue; 3774 /* form outgoing message for i-structure: 3775 buf_si[0]: nrows to be sent 3776 [1:nrows]: row index (global) 3777 [nrows+1:2*nrows+1]: i-structure index 3778 */ 3779 /*-------------------------------------------*/ 3780 nrows = len_si[proc]/2 - 1; 3781 buf_si_i = buf_si + nrows+1; 3782 buf_si[0] = nrows; 3783 buf_si_i[0] = 0; 3784 nrows = 0; 3785 for (i=owners[proc]; i<owners[proc+1]; i++){ 3786 anzi = ai[i+1] - ai[i]; 3787 if (anzi) { 3788 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3789 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3790 nrows++; 3791 } 3792 } 3793 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3794 k++; 3795 buf_si += len_si[proc]; 3796 } 3797 3798 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3799 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3800 3801 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3802 for (i=0; i<merge->nrecv; i++){ 3803 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3804 } 3805 3806 ierr = PetscFree(len_si);CHKERRQ(ierr); 3807 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3808 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3809 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3810 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3811 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3812 ierr = PetscFree(status);CHKERRQ(ierr); 3813 3814 /* compute a local seq matrix in each processor */ 3815 /*----------------------------------------------*/ 3816 /* allocate bi array and free space for accumulating nonzero column info */ 3817 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3818 bi[0] = 0; 3819 3820 /* create and initialize a linked list */ 3821 nlnk = N+1; 3822 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3823 3824 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3825 len = 0; 3826 len = ai[owners[rank+1]] - ai[owners[rank]]; 3827 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3828 current_space = free_space; 3829 3830 /* determine symbolic info for each local row */ 3831 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3832 nextrow = buf_ri_k + merge->nrecv; 3833 nextai = nextrow + merge->nrecv; 3834 for (k=0; k<merge->nrecv; k++){ 3835 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3836 nrows = *buf_ri_k[k]; 3837 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3838 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3839 } 3840 3841 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3842 len = 0; 3843 for (i=0;i<m;i++) { 3844 bnzi = 0; 3845 /* add local non-zero cols of this proc's seqmat into lnk */ 3846 arow = owners[rank] + i; 3847 anzi = ai[arow+1] - ai[arow]; 3848 aj = a->j + ai[arow]; 3849 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3850 bnzi += nlnk; 3851 /* add received col data into lnk */ 3852 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3853 if (i == *nextrow[k]) { /* i-th row */ 3854 anzi = *(nextai[k]+1) - *nextai[k]; 3855 aj = buf_rj[k] + *nextai[k]; 3856 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3857 bnzi += nlnk; 3858 nextrow[k]++; nextai[k]++; 3859 } 3860 } 3861 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3862 3863 /* if free space is not available, make more free space */ 3864 if (current_space->local_remaining<bnzi) { 3865 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3866 nspacedouble++; 3867 } 3868 /* copy data into free space, then initialize lnk */ 3869 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3870 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3871 3872 current_space->array += bnzi; 3873 current_space->local_used += bnzi; 3874 current_space->local_remaining -= bnzi; 3875 3876 bi[i+1] = bi[i] + bnzi; 3877 } 3878 3879 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3880 3881 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3882 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3883 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3884 3885 /* create symbolic parallel matrix B_mpi */ 3886 /*---------------------------------------*/ 3887 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3888 if (n==PETSC_DECIDE) { 3889 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3890 } else { 3891 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3892 } 3893 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3894 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3895 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3896 3897 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3898 B_mpi->assembled = PETSC_FALSE; 3899 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3900 merge->bi = bi; 3901 merge->bj = bj; 3902 merge->buf_ri = buf_ri; 3903 merge->buf_rj = buf_rj; 3904 merge->coi = PETSC_NULL; 3905 merge->coj = PETSC_NULL; 3906 merge->owners_co = PETSC_NULL; 3907 3908 /* attach the supporting struct to B_mpi for reuse */ 3909 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3910 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 3911 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3912 *mpimat = B_mpi; 3913 3914 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3915 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3916 PetscFunctionReturn(0); 3917 } 3918 3919 static PetscEvent logkey_seqstompi = 0; 3920 #undef __FUNCT__ 3921 #define __FUNCT__ "MatMerge_SeqsToMPI" 3922 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3923 { 3924 PetscErrorCode ierr; 3925 3926 PetscFunctionBegin; 3927 if (!logkey_seqstompi) { 3928 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3929 } 3930 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3931 if (scall == MAT_INITIAL_MATRIX){ 3932 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3933 } 3934 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3935 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3936 PetscFunctionReturn(0); 3937 } 3938 static PetscEvent logkey_getlocalmat = 0; 3939 #undef __FUNCT__ 3940 #define __FUNCT__ "MatGetLocalMat" 3941 /*@C 3942 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3943 3944 Not Collective 3945 3946 Input Parameters: 3947 + A - the matrix 3948 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3949 3950 Output Parameter: 3951 . A_loc - the local sequential matrix generated 3952 3953 Level: developer 3954 3955 @*/ 3956 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3957 { 3958 PetscErrorCode ierr; 3959 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3960 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3961 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3962 PetscScalar *aa=a->a,*ba=b->a,*ca; 3963 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3964 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3965 3966 PetscFunctionBegin; 3967 if (!logkey_getlocalmat) { 3968 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3969 } 3970 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3971 if (scall == MAT_INITIAL_MATRIX){ 3972 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3973 ci[0] = 0; 3974 for (i=0; i<am; i++){ 3975 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3976 } 3977 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3978 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3979 k = 0; 3980 for (i=0; i<am; i++) { 3981 ncols_o = bi[i+1] - bi[i]; 3982 ncols_d = ai[i+1] - ai[i]; 3983 /* off-diagonal portion of A */ 3984 for (jo=0; jo<ncols_o; jo++) { 3985 col = cmap[*bj]; 3986 if (col >= cstart) break; 3987 cj[k] = col; bj++; 3988 ca[k++] = *ba++; 3989 } 3990 /* diagonal portion of A */ 3991 for (j=0; j<ncols_d; j++) { 3992 cj[k] = cstart + *aj++; 3993 ca[k++] = *aa++; 3994 } 3995 /* off-diagonal portion of A */ 3996 for (j=jo; j<ncols_o; j++) { 3997 cj[k] = cmap[*bj++]; 3998 ca[k++] = *ba++; 3999 } 4000 } 4001 /* put together the new matrix */ 4002 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4003 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4004 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4005 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4006 mat->free_a = PETSC_TRUE; 4007 mat->free_ij = PETSC_TRUE; 4008 mat->nonew = 0; 4009 } else if (scall == MAT_REUSE_MATRIX){ 4010 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4011 ci = mat->i; cj = mat->j; ca = mat->a; 4012 for (i=0; i<am; i++) { 4013 /* off-diagonal portion of A */ 4014 ncols_o = bi[i+1] - bi[i]; 4015 for (jo=0; jo<ncols_o; jo++) { 4016 col = cmap[*bj]; 4017 if (col >= cstart) break; 4018 *ca++ = *ba++; bj++; 4019 } 4020 /* diagonal portion of A */ 4021 ncols_d = ai[i+1] - ai[i]; 4022 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 4023 /* off-diagonal portion of A */ 4024 for (j=jo; j<ncols_o; j++) { 4025 *ca++ = *ba++; bj++; 4026 } 4027 } 4028 } else { 4029 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4030 } 4031 4032 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 4033 PetscFunctionReturn(0); 4034 } 4035 4036 static PetscEvent logkey_getlocalmatcondensed = 0; 4037 #undef __FUNCT__ 4038 #define __FUNCT__ "MatGetLocalMatCondensed" 4039 /*@C 4040 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4041 4042 Not Collective 4043 4044 Input Parameters: 4045 + A - the matrix 4046 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4047 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4048 4049 Output Parameter: 4050 . A_loc - the local sequential matrix generated 4051 4052 Level: developer 4053 4054 @*/ 4055 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4056 { 4057 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4058 PetscErrorCode ierr; 4059 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4060 IS isrowa,iscola; 4061 Mat *aloc; 4062 4063 PetscFunctionBegin; 4064 if (!logkey_getlocalmatcondensed) { 4065 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 4066 } 4067 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4068 if (!row){ 4069 start = A->rmap.rstart; end = A->rmap.rend; 4070 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4071 } else { 4072 isrowa = *row; 4073 } 4074 if (!col){ 4075 start = A->cmap.rstart; 4076 cmap = a->garray; 4077 nzA = a->A->cmap.n; 4078 nzB = a->B->cmap.n; 4079 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4080 ncols = 0; 4081 for (i=0; i<nzB; i++) { 4082 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4083 else break; 4084 } 4085 imark = i; 4086 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4087 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4088 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4089 ierr = PetscFree(idx);CHKERRQ(ierr); 4090 } else { 4091 iscola = *col; 4092 } 4093 if (scall != MAT_INITIAL_MATRIX){ 4094 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4095 aloc[0] = *A_loc; 4096 } 4097 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4098 *A_loc = aloc[0]; 4099 ierr = PetscFree(aloc);CHKERRQ(ierr); 4100 if (!row){ 4101 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4102 } 4103 if (!col){ 4104 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4105 } 4106 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4107 PetscFunctionReturn(0); 4108 } 4109 4110 static PetscEvent logkey_GetBrowsOfAcols = 0; 4111 #undef __FUNCT__ 4112 #define __FUNCT__ "MatGetBrowsOfAcols" 4113 /*@C 4114 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4115 4116 Collective on Mat 4117 4118 Input Parameters: 4119 + A,B - the matrices in mpiaij format 4120 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4121 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4122 4123 Output Parameter: 4124 + rowb, colb - index sets of rows and columns of B to extract 4125 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4126 - B_seq - the sequential matrix generated 4127 4128 Level: developer 4129 4130 @*/ 4131 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4132 { 4133 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4134 PetscErrorCode ierr; 4135 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4136 IS isrowb,iscolb; 4137 Mat *bseq; 4138 4139 PetscFunctionBegin; 4140 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4141 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4142 } 4143 if (!logkey_GetBrowsOfAcols) { 4144 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 4145 } 4146 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4147 4148 if (scall == MAT_INITIAL_MATRIX){ 4149 start = A->cmap.rstart; 4150 cmap = a->garray; 4151 nzA = a->A->cmap.n; 4152 nzB = a->B->cmap.n; 4153 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4154 ncols = 0; 4155 for (i=0; i<nzB; i++) { /* row < local row index */ 4156 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4157 else break; 4158 } 4159 imark = i; 4160 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4161 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4162 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4163 ierr = PetscFree(idx);CHKERRQ(ierr); 4164 *brstart = imark; 4165 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4166 } else { 4167 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4168 isrowb = *rowb; iscolb = *colb; 4169 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4170 bseq[0] = *B_seq; 4171 } 4172 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4173 *B_seq = bseq[0]; 4174 ierr = PetscFree(bseq);CHKERRQ(ierr); 4175 if (!rowb){ 4176 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4177 } else { 4178 *rowb = isrowb; 4179 } 4180 if (!colb){ 4181 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4182 } else { 4183 *colb = iscolb; 4184 } 4185 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4186 PetscFunctionReturn(0); 4187 } 4188 4189 static PetscEvent logkey_GetBrowsOfAocols = 0; 4190 #undef __FUNCT__ 4191 #define __FUNCT__ "MatGetBrowsOfAoCols" 4192 /*@C 4193 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4194 of the OFF-DIAGONAL portion of local A 4195 4196 Collective on Mat 4197 4198 Input Parameters: 4199 + A,B - the matrices in mpiaij format 4200 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4201 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4202 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4203 4204 Output Parameter: 4205 + B_oth - the sequential matrix generated 4206 4207 Level: developer 4208 4209 @*/ 4210 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 4211 { 4212 VecScatter_MPI_General *gen_to,*gen_from; 4213 PetscErrorCode ierr; 4214 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4215 Mat_SeqAIJ *b_oth; 4216 VecScatter ctx=a->Mvctx; 4217 MPI_Comm comm=ctx->comm; 4218 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 4219 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4220 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 4221 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4222 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4223 MPI_Status *sstatus,rstatus; 4224 PetscMPIInt jj; 4225 PetscInt *cols,sbs,rbs; 4226 PetscScalar *vals; 4227 4228 PetscFunctionBegin; 4229 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4230 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4231 } 4232 if (!logkey_GetBrowsOfAocols) { 4233 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 4234 } 4235 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4236 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4237 4238 gen_to = (VecScatter_MPI_General*)ctx->todata; 4239 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4240 rvalues = gen_from->values; /* holds the length of receiving row */ 4241 svalues = gen_to->values; /* holds the length of sending row */ 4242 nrecvs = gen_from->n; 4243 nsends = gen_to->n; 4244 4245 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4246 srow = gen_to->indices; /* local row index to be sent */ 4247 sstarts = gen_to->starts; 4248 sprocs = gen_to->procs; 4249 sstatus = gen_to->sstatus; 4250 sbs = gen_to->bs; 4251 rstarts = gen_from->starts; 4252 rprocs = gen_from->procs; 4253 rbs = gen_from->bs; 4254 4255 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4256 if (scall == MAT_INITIAL_MATRIX){ 4257 /* i-array */ 4258 /*---------*/ 4259 /* post receives */ 4260 for (i=0; i<nrecvs; i++){ 4261 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4262 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4263 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4264 } 4265 4266 /* pack the outgoing message */ 4267 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4268 rstartsj = sstartsj + nsends +1; 4269 sstartsj[0] = 0; rstartsj[0] = 0; 4270 len = 0; /* total length of j or a array to be sent */ 4271 k = 0; 4272 for (i=0; i<nsends; i++){ 4273 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4274 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4275 for (j=0; j<nrows; j++) { 4276 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4277 for (l=0; l<sbs; l++){ 4278 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4279 rowlen[j*sbs+l] = ncols; 4280 len += ncols; 4281 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4282 } 4283 k++; 4284 } 4285 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4286 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4287 } 4288 /* recvs and sends of i-array are completed */ 4289 i = nrecvs; 4290 while (i--) { 4291 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4292 } 4293 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4294 4295 /* allocate buffers for sending j and a arrays */ 4296 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4297 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4298 4299 /* create i-array of B_oth */ 4300 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4301 b_othi[0] = 0; 4302 len = 0; /* total length of j or a array to be received */ 4303 k = 0; 4304 for (i=0; i<nrecvs; i++){ 4305 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4306 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4307 for (j=0; j<nrows; j++) { 4308 b_othi[k+1] = b_othi[k] + rowlen[j]; 4309 len += rowlen[j]; k++; 4310 } 4311 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4312 } 4313 4314 /* allocate space for j and a arrrays of B_oth */ 4315 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4316 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 4317 4318 /* j-array */ 4319 /*---------*/ 4320 /* post receives of j-array */ 4321 for (i=0; i<nrecvs; i++){ 4322 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4323 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4324 } 4325 4326 /* pack the outgoing message j-array */ 4327 k = 0; 4328 for (i=0; i<nsends; i++){ 4329 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4330 bufJ = bufj+sstartsj[i]; 4331 for (j=0; j<nrows; j++) { 4332 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4333 for (ll=0; ll<sbs; ll++){ 4334 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4335 for (l=0; l<ncols; l++){ 4336 *bufJ++ = cols[l]; 4337 } 4338 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4339 } 4340 } 4341 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4342 } 4343 4344 /* recvs and sends of j-array are completed */ 4345 i = nrecvs; 4346 while (i--) { 4347 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4348 } 4349 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4350 } else if (scall == MAT_REUSE_MATRIX){ 4351 sstartsj = *startsj; 4352 rstartsj = sstartsj + nsends +1; 4353 bufa = *bufa_ptr; 4354 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4355 b_otha = b_oth->a; 4356 } else { 4357 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4358 } 4359 4360 /* a-array */ 4361 /*---------*/ 4362 /* post receives of a-array */ 4363 for (i=0; i<nrecvs; i++){ 4364 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4365 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4366 } 4367 4368 /* pack the outgoing message a-array */ 4369 k = 0; 4370 for (i=0; i<nsends; i++){ 4371 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4372 bufA = bufa+sstartsj[i]; 4373 for (j=0; j<nrows; j++) { 4374 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4375 for (ll=0; ll<sbs; ll++){ 4376 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4377 for (l=0; l<ncols; l++){ 4378 *bufA++ = vals[l]; 4379 } 4380 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4381 } 4382 } 4383 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4384 } 4385 /* recvs and sends of a-array are completed */ 4386 i = nrecvs; 4387 while (i--) { 4388 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4389 } 4390 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4391 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4392 4393 if (scall == MAT_INITIAL_MATRIX){ 4394 /* put together the new matrix */ 4395 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4396 4397 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4398 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4399 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4400 b_oth->free_a = PETSC_TRUE; 4401 b_oth->free_ij = PETSC_TRUE; 4402 b_oth->nonew = 0; 4403 4404 ierr = PetscFree(bufj);CHKERRQ(ierr); 4405 if (!startsj || !bufa_ptr){ 4406 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4407 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4408 } else { 4409 *startsj = sstartsj; 4410 *bufa_ptr = bufa; 4411 } 4412 } 4413 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4414 PetscFunctionReturn(0); 4415 } 4416 4417 #undef __FUNCT__ 4418 #define __FUNCT__ "MatGetCommunicationStructs" 4419 /*@C 4420 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4421 4422 Not Collective 4423 4424 Input Parameters: 4425 . A - The matrix in mpiaij format 4426 4427 Output Parameter: 4428 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4429 . colmap - A map from global column index to local index into lvec 4430 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4431 4432 Level: developer 4433 4434 @*/ 4435 #if defined (PETSC_USE_CTABLE) 4436 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4437 #else 4438 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4439 #endif 4440 { 4441 Mat_MPIAIJ *a; 4442 4443 PetscFunctionBegin; 4444 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4445 PetscValidPointer(lvec, 2) 4446 PetscValidPointer(colmap, 3) 4447 PetscValidPointer(multScatter, 4) 4448 a = (Mat_MPIAIJ *) A->data; 4449 if (lvec) *lvec = a->lvec; 4450 if (colmap) *colmap = a->colmap; 4451 if (multScatter) *multScatter = a->Mvctx; 4452 PetscFunctionReturn(0); 4453 } 4454 4455 EXTERN_C_BEGIN 4456 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4457 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4458 EXTERN_C_END 4459 4460 /*MC 4461 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4462 4463 Options Database Keys: 4464 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4465 4466 Level: beginner 4467 4468 .seealso: MatCreateMPIAIJ 4469 M*/ 4470 4471 EXTERN_C_BEGIN 4472 #undef __FUNCT__ 4473 #define __FUNCT__ "MatCreate_MPIAIJ" 4474 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4475 { 4476 Mat_MPIAIJ *b; 4477 PetscErrorCode ierr; 4478 PetscMPIInt size; 4479 4480 PetscFunctionBegin; 4481 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4482 4483 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 4484 B->data = (void*)b; 4485 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4486 B->factor = 0; 4487 B->rmap.bs = 1; 4488 B->assembled = PETSC_FALSE; 4489 B->mapping = 0; 4490 4491 B->insertmode = NOT_SET_VALUES; 4492 b->size = size; 4493 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4494 4495 /* build cache for off array entries formed */ 4496 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4497 b->donotstash = PETSC_FALSE; 4498 b->colmap = 0; 4499 b->garray = 0; 4500 b->roworiented = PETSC_TRUE; 4501 4502 /* stuff used for matrix vector multiply */ 4503 b->lvec = PETSC_NULL; 4504 b->Mvctx = PETSC_NULL; 4505 4506 /* stuff for MatGetRow() */ 4507 b->rowindices = 0; 4508 b->rowvalues = 0; 4509 b->getrowactive = PETSC_FALSE; 4510 4511 4512 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4513 "MatStoreValues_MPIAIJ", 4514 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4515 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4516 "MatRetrieveValues_MPIAIJ", 4517 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4518 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4519 "MatGetDiagonalBlock_MPIAIJ", 4520 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4521 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4522 "MatIsTranspose_MPIAIJ", 4523 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4524 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4525 "MatMPIAIJSetPreallocation_MPIAIJ", 4526 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4527 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4528 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4529 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4530 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4531 "MatDiagonalScaleLocal_MPIAIJ", 4532 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4533 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4534 "MatConvert_MPIAIJ_MPICSRPERM", 4535 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4536 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4537 "MatConvert_MPIAIJ_MPICRL", 4538 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4539 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4540 PetscFunctionReturn(0); 4541 } 4542 EXTERN_C_END 4543 4544 #undef __FUNCT__ 4545 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4546 /*@C 4547 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4548 and "off-diagonal" part of the matrix in CSR format. 4549 4550 Collective on MPI_Comm 4551 4552 Input Parameters: 4553 + comm - MPI communicator 4554 . m - number of local rows (Cannot be PETSC_DECIDE) 4555 . n - This value should be the same as the local size used in creating the 4556 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4557 calculated if N is given) For square matrices n is almost always m. 4558 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4559 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4560 . i - row indices for "diagonal" portion of matrix 4561 . j - column indices 4562 . a - matrix values 4563 . oi - row indices for "off-diagonal" portion of matrix 4564 . oj - column indices 4565 - oa - matrix values 4566 4567 Output Parameter: 4568 . mat - the matrix 4569 4570 Level: advanced 4571 4572 Notes: 4573 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4574 4575 The i and j indices are 0 based 4576 4577 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4578 4579 4580 .keywords: matrix, aij, compressed row, sparse, parallel 4581 4582 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4583 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4584 @*/ 4585 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4586 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4587 { 4588 PetscErrorCode ierr; 4589 Mat_MPIAIJ *maij; 4590 4591 PetscFunctionBegin; 4592 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4593 if (i[0]) { 4594 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4595 } 4596 if (oi[0]) { 4597 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4598 } 4599 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4600 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4601 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4602 maij = (Mat_MPIAIJ*) (*mat)->data; 4603 maij->donotstash = PETSC_TRUE; 4604 (*mat)->preallocated = PETSC_TRUE; 4605 4606 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4607 ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr); 4608 ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr); 4609 4610 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4611 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4612 4613 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4614 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4615 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4616 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4617 4618 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4619 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4620 PetscFunctionReturn(0); 4621 } 4622 4623 /* 4624 Special version for direct calls from Fortran 4625 */ 4626 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4627 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4628 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4629 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4630 #endif 4631 4632 /* Change these macros so can be used in void function */ 4633 #undef CHKERRQ 4634 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4635 #undef SETERRQ2 4636 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4637 #undef SETERRQ 4638 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4639 4640 EXTERN_C_BEGIN 4641 #undef __FUNCT__ 4642 #define __FUNCT__ "matsetvaluesmpiaij_" 4643 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4644 { 4645 Mat mat = *mmat; 4646 PetscInt m = *mm, n = *mn; 4647 InsertMode addv = *maddv; 4648 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4649 PetscScalar value; 4650 PetscErrorCode ierr; 4651 4652 MatPreallocated(mat); 4653 if (mat->insertmode == NOT_SET_VALUES) { 4654 mat->insertmode = addv; 4655 } 4656 #if defined(PETSC_USE_DEBUG) 4657 else if (mat->insertmode != addv) { 4658 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4659 } 4660 #endif 4661 { 4662 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4663 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4664 PetscTruth roworiented = aij->roworiented; 4665 4666 /* Some Variables required in the macro */ 4667 Mat A = aij->A; 4668 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4669 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4670 PetscScalar *aa = a->a; 4671 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4672 Mat B = aij->B; 4673 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4674 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4675 PetscScalar *ba = b->a; 4676 4677 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4678 PetscInt nonew = a->nonew; 4679 PetscScalar *ap1,*ap2; 4680 4681 PetscFunctionBegin; 4682 for (i=0; i<m; i++) { 4683 if (im[i] < 0) continue; 4684 #if defined(PETSC_USE_DEBUG) 4685 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4686 #endif 4687 if (im[i] >= rstart && im[i] < rend) { 4688 row = im[i] - rstart; 4689 lastcol1 = -1; 4690 rp1 = aj + ai[row]; 4691 ap1 = aa + ai[row]; 4692 rmax1 = aimax[row]; 4693 nrow1 = ailen[row]; 4694 low1 = 0; 4695 high1 = nrow1; 4696 lastcol2 = -1; 4697 rp2 = bj + bi[row]; 4698 ap2 = ba + bi[row]; 4699 rmax2 = bimax[row]; 4700 nrow2 = bilen[row]; 4701 low2 = 0; 4702 high2 = nrow2; 4703 4704 for (j=0; j<n; j++) { 4705 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4706 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4707 if (in[j] >= cstart && in[j] < cend){ 4708 col = in[j] - cstart; 4709 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4710 } else if (in[j] < 0) continue; 4711 #if defined(PETSC_USE_DEBUG) 4712 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4713 #endif 4714 else { 4715 if (mat->was_assembled) { 4716 if (!aij->colmap) { 4717 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4718 } 4719 #if defined (PETSC_USE_CTABLE) 4720 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4721 col--; 4722 #else 4723 col = aij->colmap[in[j]] - 1; 4724 #endif 4725 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4726 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4727 col = in[j]; 4728 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4729 B = aij->B; 4730 b = (Mat_SeqAIJ*)B->data; 4731 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4732 rp2 = bj + bi[row]; 4733 ap2 = ba + bi[row]; 4734 rmax2 = bimax[row]; 4735 nrow2 = bilen[row]; 4736 low2 = 0; 4737 high2 = nrow2; 4738 bm = aij->B->rmap.n; 4739 ba = b->a; 4740 } 4741 } else col = in[j]; 4742 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4743 } 4744 } 4745 } else { 4746 if (!aij->donotstash) { 4747 if (roworiented) { 4748 if (ignorezeroentries && v[i*n] == 0.0) continue; 4749 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4750 } else { 4751 if (ignorezeroentries && v[i] == 0.0) continue; 4752 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4753 } 4754 } 4755 } 4756 }} 4757 PetscFunctionReturnVoid(); 4758 } 4759 EXTERN_C_END 4760 4761