1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = a->ignorezeroentries; 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetValues_MPIAIJ" 240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 241 { 242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 243 PetscErrorCode ierr; 244 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 245 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 246 247 PetscFunctionBegin; 248 for (i=0; i<m; i++) { 249 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 250 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 251 if (idxm[i] >= rstart && idxm[i] < rend) { 252 row = idxm[i] - rstart; 253 for (j=0; j<n; j++) { 254 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 255 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 256 if (idxn[j] >= cstart && idxn[j] < cend){ 257 col = idxn[j] - cstart; 258 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 259 } else { 260 if (!aij->colmap) { 261 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262 } 263 #if defined (PETSC_USE_CTABLE) 264 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 265 col --; 266 #else 267 col = aij->colmap[idxn[j]] - 1; 268 #endif 269 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 270 else { 271 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 272 } 273 } 274 } 275 } else { 276 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 277 } 278 } 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 if (aij->donotstash) { 293 PetscFunctionReturn(0); 294 } 295 296 /* make sure all processors are either in INSERTMODE or ADDMODE */ 297 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 298 if (addv == (ADD_VALUES|INSERT_VALUES)) { 299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 300 } 301 mat->insertmode = addv; /* in case this processor had no cache */ 302 303 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 304 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 305 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 312 { 313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 314 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 315 PetscErrorCode ierr; 316 PetscMPIInt n; 317 PetscInt i,j,rstart,ncols,flg; 318 PetscInt *row,*col,other_disassembled; 319 PetscScalar *val; 320 InsertMode addv = mat->insertmode; 321 322 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 358 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 359 } 360 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 361 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 362 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 363 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 364 365 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 366 aij->rowvalues = 0; 367 368 /* used by MatAXPY() */ 369 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 370 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 371 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 378 { 379 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 384 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatZeroRows_MPIAIJ" 390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 391 { 392 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 393 PetscErrorCode ierr; 394 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 395 PetscInt i,*owners = A->rmap.range; 396 PetscInt *nprocs,j,idx,nsends,row; 397 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 398 PetscInt *rvalues,count,base,slen,*source; 399 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 400 MPI_Comm comm = A->comm; 401 MPI_Request *send_waits,*recv_waits; 402 MPI_Status recv_status,*send_status; 403 #if defined(PETSC_DEBUG) 404 PetscTruth found = PETSC_FALSE; 405 #endif 406 407 PetscFunctionBegin; 408 /* first count number of contributors to each processor */ 409 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 410 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 411 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 412 j = 0; 413 for (i=0; i<N; i++) { 414 if (lastidx > (idx = rows[i])) j = 0; 415 lastidx = idx; 416 for (; j<size; j++) { 417 if (idx >= owners[j] && idx < owners[j+1]) { 418 nprocs[2*j]++; 419 nprocs[2*j+1] = 1; 420 owner[i] = j; 421 #if defined(PETSC_DEBUG) 422 found = PETSC_TRUE; 423 #endif 424 break; 425 } 426 } 427 #if defined(PETSC_DEBUG) 428 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 429 found = PETSC_FALSE; 430 #endif 431 } 432 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 433 434 /* inform other processors of number of messages and max length*/ 435 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 436 437 /* post receives: */ 438 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 439 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 440 for (i=0; i<nrecvs; i++) { 441 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 442 } 443 444 /* do sends: 445 1) starts[i] gives the starting index in svalues for stuff going to 446 the ith processor 447 */ 448 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 449 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 450 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 451 starts[0] = 0; 452 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 453 for (i=0; i<N; i++) { 454 svalues[starts[owner[i]]++] = rows[i]; 455 } 456 457 starts[0] = 0; 458 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 459 count = 0; 460 for (i=0; i<size; i++) { 461 if (nprocs[2*i+1]) { 462 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 463 } 464 } 465 ierr = PetscFree(starts);CHKERRQ(ierr); 466 467 base = owners[rank]; 468 469 /* wait on receives */ 470 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 471 source = lens + nrecvs; 472 count = nrecvs; slen = 0; 473 while (count) { 474 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 475 /* unpack receives into our local space */ 476 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 477 source[imdex] = recv_status.MPI_SOURCE; 478 lens[imdex] = n; 479 slen += n; 480 count--; 481 } 482 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 483 484 /* move the data into the send scatter */ 485 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 486 count = 0; 487 for (i=0; i<nrecvs; i++) { 488 values = rvalues + i*nmax; 489 for (j=0; j<lens[i]; j++) { 490 lrows[count++] = values[j] - base; 491 } 492 } 493 ierr = PetscFree(rvalues);CHKERRQ(ierr); 494 ierr = PetscFree(lens);CHKERRQ(ierr); 495 ierr = PetscFree(owner);CHKERRQ(ierr); 496 ierr = PetscFree(nprocs);CHKERRQ(ierr); 497 498 /* actually zap the local rows */ 499 /* 500 Zero the required rows. If the "diagonal block" of the matrix 501 is square and the user wishes to set the diagonal we use separate 502 code so that MatSetValues() is not called for each diagonal allocating 503 new memory, thus calling lots of mallocs and slowing things down. 504 505 Contributed by: Matthew Knepley 506 */ 507 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 508 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 509 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 510 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 511 } else if (diag != 0.0) { 512 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 513 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 514 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 515 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 516 } 517 for (i = 0; i < slen; i++) { 518 row = lrows[i] + rstart; 519 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 520 } 521 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 522 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 } else { 524 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 525 } 526 ierr = PetscFree(lrows);CHKERRQ(ierr); 527 528 /* wait on sends */ 529 if (nsends) { 530 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 531 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 532 ierr = PetscFree(send_status);CHKERRQ(ierr); 533 } 534 ierr = PetscFree(send_waits);CHKERRQ(ierr); 535 ierr = PetscFree(svalues);CHKERRQ(ierr); 536 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatMult_MPIAIJ" 542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 543 { 544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 545 PetscErrorCode ierr; 546 PetscInt nt; 547 548 PetscFunctionBegin; 549 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 550 if (nt != A->cmap.n) { 551 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 552 } 553 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 554 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 555 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 556 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatMultAdd_MPIAIJ" 562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 563 { 564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 567 PetscFunctionBegin; 568 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 569 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 570 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 571 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 578 { 579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 580 PetscErrorCode ierr; 581 PetscTruth merged; 582 583 PetscFunctionBegin; 584 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 585 /* do nondiagonal part */ 586 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 587 if (!merged) { 588 /* send it on its way */ 589 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 590 /* do local part */ 591 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 592 /* receive remote parts: note this assumes the values are not actually */ 593 /* added in yy until the next line, */ 594 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 595 } else { 596 /* do local part */ 597 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 598 /* send it on its way */ 599 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 600 /* values actually were received in the Begin() but we need to call this nop */ 601 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 602 } 603 PetscFunctionReturn(0); 604 } 605 606 EXTERN_C_BEGIN 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 610 { 611 MPI_Comm comm; 612 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 613 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 614 IS Me,Notme; 615 PetscErrorCode ierr; 616 PetscInt M,N,first,last,*notme,i; 617 PetscMPIInt size; 618 619 PetscFunctionBegin; 620 621 /* Easy test: symmetric diagonal block */ 622 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 623 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 624 if (!*f) PetscFunctionReturn(0); 625 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 626 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 627 if (size == 1) PetscFunctionReturn(0); 628 629 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 630 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 631 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 632 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 633 for (i=0; i<first; i++) notme[i] = i; 634 for (i=last; i<M; i++) notme[i-last+first] = i; 635 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 636 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 637 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 638 Aoff = Aoffs[0]; 639 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 640 Boff = Boffs[0]; 641 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 642 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 644 ierr = ISDestroy(Me);CHKERRQ(ierr); 645 ierr = ISDestroy(Notme);CHKERRQ(ierr); 646 647 PetscFunctionReturn(0); 648 } 649 EXTERN_C_END 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 654 { 655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 656 PetscErrorCode ierr; 657 658 PetscFunctionBegin; 659 /* do nondiagonal part */ 660 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 661 /* send it on its way */ 662 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 663 /* do local part */ 664 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 665 /* receive remote parts */ 666 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 /* 671 This only works correctly for square matrices where the subblock A->A is the 672 diagonal block 673 */ 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 677 { 678 PetscErrorCode ierr; 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 681 PetscFunctionBegin; 682 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 683 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 684 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 685 } 686 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatScale_MPIAIJ" 692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 693 { 694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 699 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatDestroy_MPIAIJ" 705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 706 { 707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 #if defined(PETSC_USE_LOG) 712 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 713 #endif 714 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 715 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 717 #if defined (PETSC_USE_CTABLE) 718 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 719 #else 720 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 721 #endif 722 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 723 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 724 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 725 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 726 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIAIJ_Binary" 742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 745 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 746 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 747 PetscErrorCode ierr; 748 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 749 int fd; 750 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 751 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (!rank) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->rmap.N; 761 header[2] = mat->cmap.N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->rmap.n; 767 range = mat->rmap.range; 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->rmap.n; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->rmap.n; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (!rank) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->rmap.n; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (!rank) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->rmap.n; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (!rank) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 PetscErrorCode ierr; 872 PetscMPIInt rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,iascii,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (iascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 PetscTruth inodes; 886 887 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 888 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 889 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 890 if (!inodes) { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 892 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 893 } else { 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 895 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 896 } 897 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 899 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 901 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 902 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 903 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } else if (format == PETSC_VIEWER_ASCII_INFO) { 906 PetscInt inodecount,inodelimit,*inodes; 907 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 908 if (inodes) { 909 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 910 } else { 911 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 915 PetscFunctionReturn(0); 916 } 917 } else if (isbinary) { 918 if (size == 1) { 919 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 920 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 921 } else { 922 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 923 } 924 PetscFunctionReturn(0); 925 } else if (isdraw) { 926 PetscDraw draw; 927 PetscTruth isnull; 928 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 929 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 930 } 931 932 if (size == 1) { 933 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 934 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 935 } else { 936 /* assemble the entire matrix onto first processor. */ 937 Mat A; 938 Mat_SeqAIJ *Aloc; 939 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 940 PetscScalar *a; 941 942 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 943 if (!rank) { 944 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 945 } else { 946 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 947 } 948 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 949 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 950 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 951 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 952 953 /* copy over the A part */ 954 Aloc = (Mat_SeqAIJ*)aij->A->data; 955 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 956 row = mat->rmap.rstart; 957 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 958 for (i=0; i<m; i++) { 959 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 960 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 961 } 962 aj = Aloc->j; 963 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 964 965 /* copy over the B part */ 966 Aloc = (Mat_SeqAIJ*)aij->B->data; 967 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 968 row = mat->rmap.rstart; 969 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 970 ct = cols; 971 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 972 for (i=0; i<m; i++) { 973 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 974 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 975 } 976 ierr = PetscFree(ct);CHKERRQ(ierr); 977 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 979 /* 980 Everyone has to call to draw the matrix since the graphics waits are 981 synchronized across all processors that share the PetscDraw object 982 */ 983 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 984 if (!rank) { 985 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 986 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 987 } 988 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 989 ierr = MatDestroy(A);CHKERRQ(ierr); 990 } 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatView_MPIAIJ" 996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 997 { 998 PetscErrorCode ierr; 999 PetscTruth iascii,isdraw,issocket,isbinary; 1000 1001 PetscFunctionBegin; 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1006 if (iascii || isdraw || isbinary || issocket) { 1007 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1008 } else { 1009 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatRelax_MPIAIJ" 1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscErrorCode ierr; 1020 Vec bb1; 1021 1022 PetscFunctionBegin; 1023 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1024 1025 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1026 1027 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1028 if (flag & SOR_ZERO_INITIAL_GUESS) { 1029 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1030 its--; 1031 } 1032 1033 while (its--) { 1034 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 1037 /* update rhs: bb1 = bb - B*x */ 1038 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1039 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1040 1041 /* local sweep */ 1042 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1043 CHKERRQ(ierr); 1044 } 1045 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1046 if (flag & SOR_ZERO_INITIAL_GUESS) { 1047 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1048 its--; 1049 } 1050 while (its--) { 1051 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 1054 /* update rhs: bb1 = bb - B*x */ 1055 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1056 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1057 1058 /* local sweep */ 1059 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1060 CHKERRQ(ierr); 1061 } 1062 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1063 if (flag & SOR_ZERO_INITIAL_GUESS) { 1064 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1065 its--; 1066 } 1067 while (its--) { 1068 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1070 1071 /* update rhs: bb1 = bb - B*x */ 1072 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1073 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1074 1075 /* local sweep */ 1076 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1077 CHKERRQ(ierr); 1078 } 1079 } else { 1080 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1081 } 1082 1083 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatPermute_MPIAIJ" 1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1090 { 1091 MPI_Comm comm,pcomm; 1092 PetscInt first,local_size,nrows,*rows; 1093 int ntids; 1094 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1099 /* make a collective version of 'rowp' */ 1100 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1101 if (pcomm==comm) { 1102 crowp = rowp; 1103 } else { 1104 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1105 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1107 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1108 } 1109 /* collect the global row permutation and invert it */ 1110 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1111 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1112 if (pcomm!=comm) { 1113 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1114 } 1115 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1116 /* get the local target indices */ 1117 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1118 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1119 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1121 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1122 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1123 /* the column permutation is so much easier; 1124 make a local version of 'colp' and invert it */ 1125 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1126 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1127 if (ntids==1) { 1128 lcolp = colp; 1129 } else { 1130 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1131 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1132 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1133 } 1134 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1135 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1136 if (ntids>1) { 1137 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1138 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1139 } 1140 /* now we just get the submatrix */ 1141 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1142 /* clean up */ 1143 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1144 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1151 { 1152 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1153 Mat A = mat->A,B = mat->B; 1154 PetscErrorCode ierr; 1155 PetscReal isend[5],irecv[5]; 1156 1157 PetscFunctionBegin; 1158 info->block_size = 1.0; 1159 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1160 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1161 isend[3] = info->memory; isend[4] = info->mallocs; 1162 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1163 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1164 isend[3] += info->memory; isend[4] += info->mallocs; 1165 if (flag == MAT_LOCAL) { 1166 info->nz_used = isend[0]; 1167 info->nz_allocated = isend[1]; 1168 info->nz_unneeded = isend[2]; 1169 info->memory = isend[3]; 1170 info->mallocs = isend[4]; 1171 } else if (flag == MAT_GLOBAL_MAX) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else if (flag == MAT_GLOBAL_SUM) { 1179 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1180 info->nz_used = irecv[0]; 1181 info->nz_allocated = irecv[1]; 1182 info->nz_unneeded = irecv[2]; 1183 info->memory = irecv[3]; 1184 info->mallocs = irecv[4]; 1185 } 1186 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1187 info->fill_ratio_needed = 0; 1188 info->factor_mallocs = 0; 1189 info->rows_global = (double)matin->rmap.N; 1190 info->columns_global = (double)matin->cmap.N; 1191 info->rows_local = (double)matin->rmap.n; 1192 info->columns_local = (double)matin->cmap.N; 1193 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatSetOption_MPIAIJ" 1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1200 { 1201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 switch (op) { 1206 case MAT_NEW_NONZERO_LOCATIONS: 1207 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1208 case MAT_KEEP_ZEROED_ROWS: 1209 case MAT_NEW_NONZERO_LOCATION_ERR: 1210 case MAT_USE_INODES: 1211 case MAT_IGNORE_ZERO_ENTRIES: 1212 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1213 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1214 break; 1215 case MAT_ROW_ORIENTED: 1216 a->roworiented = flg; 1217 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1218 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1219 break; 1220 case MAT_NEW_DIAGONALS: 1221 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1222 break; 1223 case MAT_IGNORE_OFF_PROC_ENTRIES: 1224 a->donotstash = PETSC_TRUE; 1225 break; 1226 case MAT_SYMMETRIC: 1227 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1228 break; 1229 case MAT_STRUCTURALLY_SYMMETRIC: 1230 case MAT_HERMITIAN: 1231 case MAT_SYMMETRY_ETERNAL: 1232 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1233 break; 1234 default: 1235 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1236 } 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatGetRow_MPIAIJ" 1242 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1243 { 1244 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1245 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1246 PetscErrorCode ierr; 1247 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1248 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1249 PetscInt *cmap,*idx_p; 1250 1251 PetscFunctionBegin; 1252 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1253 mat->getrowactive = PETSC_TRUE; 1254 1255 if (!mat->rowvalues && (idx || v)) { 1256 /* 1257 allocate enough space to hold information from the longest row. 1258 */ 1259 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1260 PetscInt max = 1,tmp; 1261 for (i=0; i<matin->rmap.n; i++) { 1262 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1263 if (max < tmp) { max = tmp; } 1264 } 1265 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1266 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1267 } 1268 1269 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1270 lrow = row - rstart; 1271 1272 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1273 if (!v) {pvA = 0; pvB = 0;} 1274 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1275 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1276 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1277 nztot = nzA + nzB; 1278 1279 cmap = mat->garray; 1280 if (v || idx) { 1281 if (nztot) { 1282 /* Sort by increasing column numbers, assuming A and B already sorted */ 1283 PetscInt imark = -1; 1284 if (v) { 1285 *v = v_p = mat->rowvalues; 1286 for (i=0; i<nzB; i++) { 1287 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1288 else break; 1289 } 1290 imark = i; 1291 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1292 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1293 } 1294 if (idx) { 1295 *idx = idx_p = mat->rowindices; 1296 if (imark > -1) { 1297 for (i=0; i<imark; i++) { 1298 idx_p[i] = cmap[cworkB[i]]; 1299 } 1300 } else { 1301 for (i=0; i<nzB; i++) { 1302 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1303 else break; 1304 } 1305 imark = i; 1306 } 1307 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1308 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1309 } 1310 } else { 1311 if (idx) *idx = 0; 1312 if (v) *v = 0; 1313 } 1314 } 1315 *nz = nztot; 1316 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1317 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1323 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1324 { 1325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1326 1327 PetscFunctionBegin; 1328 if (!aij->getrowactive) { 1329 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1330 } 1331 aij->getrowactive = PETSC_FALSE; 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatNorm_MPIAIJ" 1337 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1338 { 1339 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1340 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1341 PetscErrorCode ierr; 1342 PetscInt i,j,cstart = mat->cmap.rstart; 1343 PetscReal sum = 0.0; 1344 PetscScalar *v; 1345 1346 PetscFunctionBegin; 1347 if (aij->size == 1) { 1348 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1349 } else { 1350 if (type == NORM_FROBENIUS) { 1351 v = amat->a; 1352 for (i=0; i<amat->nz; i++) { 1353 #if defined(PETSC_USE_COMPLEX) 1354 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1355 #else 1356 sum += (*v)*(*v); v++; 1357 #endif 1358 } 1359 v = bmat->a; 1360 for (i=0; i<bmat->nz; i++) { 1361 #if defined(PETSC_USE_COMPLEX) 1362 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1363 #else 1364 sum += (*v)*(*v); v++; 1365 #endif 1366 } 1367 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1368 *norm = sqrt(*norm); 1369 } else if (type == NORM_1) { /* max column norm */ 1370 PetscReal *tmp,*tmp2; 1371 PetscInt *jj,*garray = aij->garray; 1372 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1373 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1374 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1375 *norm = 0.0; 1376 v = amat->a; jj = amat->j; 1377 for (j=0; j<amat->nz; j++) { 1378 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1379 } 1380 v = bmat->a; jj = bmat->j; 1381 for (j=0; j<bmat->nz; j++) { 1382 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1383 } 1384 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1385 for (j=0; j<mat->cmap.N; j++) { 1386 if (tmp2[j] > *norm) *norm = tmp2[j]; 1387 } 1388 ierr = PetscFree(tmp);CHKERRQ(ierr); 1389 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1390 } else if (type == NORM_INFINITY) { /* max row norm */ 1391 PetscReal ntemp = 0.0; 1392 for (j=0; j<aij->A->rmap.n; j++) { 1393 v = amat->a + amat->i[j]; 1394 sum = 0.0; 1395 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1396 sum += PetscAbsScalar(*v); v++; 1397 } 1398 v = bmat->a + bmat->i[j]; 1399 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1400 sum += PetscAbsScalar(*v); v++; 1401 } 1402 if (sum > ntemp) ntemp = sum; 1403 } 1404 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1405 } else { 1406 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1407 } 1408 } 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatTranspose_MPIAIJ" 1414 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1415 { 1416 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1417 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1418 PetscErrorCode ierr; 1419 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz; 1420 PetscInt cstart=A->cmap.rstart,ncol; 1421 Mat B; 1422 PetscScalar *array; 1423 1424 PetscFunctionBegin; 1425 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1426 1427 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1428 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1429 ai = Aloc->i; aj = Aloc->j; 1430 bi = Bloc->i; bj = Bloc->j; 1431 ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1432 cols = d_nnz + na + 1; /* work space to be used by B part */ 1433 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1434 for (i=0; i<ai[ma]; i++){ 1435 d_nnz[aj[i]] ++; 1436 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1437 } 1438 1439 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1440 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1441 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1442 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1443 1444 /* copy over the A part */ 1445 array = Aloc->a; 1446 row = A->rmap.rstart; 1447 for (i=0; i<ma; i++) { 1448 ncol = ai[i+1]-ai[i]; 1449 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1450 row++; array += ncol; aj += ncol; 1451 } 1452 aj = Aloc->j; 1453 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1454 1455 /* copy over the B part */ 1456 array = Bloc->a; 1457 row = A->rmap.rstart; 1458 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1459 for (i=0; i<mb; i++) { 1460 ncol = bi[i+1]-bi[i]; 1461 ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1462 row++; array += ncol; cols += ncol; 1463 } 1464 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1465 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1466 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1467 if (matout) { 1468 *matout = B; 1469 } else { 1470 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1471 } 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1477 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1478 { 1479 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1480 Mat a = aij->A,b = aij->B; 1481 PetscErrorCode ierr; 1482 PetscInt s1,s2,s3; 1483 1484 PetscFunctionBegin; 1485 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1486 if (rr) { 1487 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1488 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1489 /* Overlap communication with computation. */ 1490 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1491 } 1492 if (ll) { 1493 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1494 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1495 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1496 } 1497 /* scale the diagonal block */ 1498 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1499 1500 if (rr) { 1501 /* Do a scatter end and then right scale the off-diagonal block */ 1502 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1504 } 1505 1506 PetscFunctionReturn(0); 1507 } 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1511 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1512 { 1513 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1514 PetscErrorCode ierr; 1515 1516 PetscFunctionBegin; 1517 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1518 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1523 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1524 { 1525 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1526 PetscErrorCode ierr; 1527 1528 PetscFunctionBegin; 1529 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "MatEqual_MPIAIJ" 1535 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1536 { 1537 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1538 Mat a,b,c,d; 1539 PetscTruth flg; 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 a = matA->A; b = matA->B; 1544 c = matB->A; d = matB->B; 1545 1546 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1547 if (flg) { 1548 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1549 } 1550 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1551 PetscFunctionReturn(0); 1552 } 1553 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "MatCopy_MPIAIJ" 1556 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1557 { 1558 PetscErrorCode ierr; 1559 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1560 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1561 1562 PetscFunctionBegin; 1563 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1564 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1565 /* because of the column compression in the off-processor part of the matrix a->B, 1566 the number of columns in a->B and b->B may be different, hence we cannot call 1567 the MatCopy() directly on the two parts. If need be, we can provide a more 1568 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1569 then copying the submatrices */ 1570 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1571 } else { 1572 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1573 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1574 } 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1580 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1581 { 1582 PetscErrorCode ierr; 1583 1584 PetscFunctionBegin; 1585 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #include "petscblaslapack.h" 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatAXPY_MPIAIJ" 1592 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1593 { 1594 PetscErrorCode ierr; 1595 PetscInt i; 1596 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1597 PetscBLASInt bnz,one=1; 1598 Mat_SeqAIJ *x,*y; 1599 1600 PetscFunctionBegin; 1601 if (str == SAME_NONZERO_PATTERN) { 1602 PetscScalar alpha = a; 1603 x = (Mat_SeqAIJ *)xx->A->data; 1604 y = (Mat_SeqAIJ *)yy->A->data; 1605 bnz = (PetscBLASInt)x->nz; 1606 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1607 x = (Mat_SeqAIJ *)xx->B->data; 1608 y = (Mat_SeqAIJ *)yy->B->data; 1609 bnz = (PetscBLASInt)x->nz; 1610 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1611 } else if (str == SUBSET_NONZERO_PATTERN) { 1612 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1613 1614 x = (Mat_SeqAIJ *)xx->B->data; 1615 y = (Mat_SeqAIJ *)yy->B->data; 1616 if (y->xtoy && y->XtoY != xx->B) { 1617 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1618 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1619 } 1620 if (!y->xtoy) { /* get xtoy */ 1621 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1622 y->XtoY = xx->B; 1623 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1624 } 1625 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1626 } else { 1627 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1628 } 1629 PetscFunctionReturn(0); 1630 } 1631 1632 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1633 1634 #undef __FUNCT__ 1635 #define __FUNCT__ "MatConjugate_MPIAIJ" 1636 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1637 { 1638 #if defined(PETSC_USE_COMPLEX) 1639 PetscErrorCode ierr; 1640 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1641 1642 PetscFunctionBegin; 1643 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1644 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1645 #else 1646 PetscFunctionBegin; 1647 #endif 1648 PetscFunctionReturn(0); 1649 } 1650 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatRealPart_MPIAIJ" 1653 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1654 { 1655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1656 PetscErrorCode ierr; 1657 1658 PetscFunctionBegin; 1659 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1660 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1661 PetscFunctionReturn(0); 1662 } 1663 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1666 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1667 { 1668 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1669 PetscErrorCode ierr; 1670 1671 PetscFunctionBegin; 1672 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1673 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1674 PetscFunctionReturn(0); 1675 } 1676 1677 #ifdef PETSC_HAVE_PBGL 1678 1679 #include <boost/parallel/mpi/bsp_process_group.hpp> 1680 #include <boost/graph/distributed/ilu_default_graph.hpp> 1681 #include <boost/graph/distributed/ilu_0_block.hpp> 1682 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1683 #include <boost/graph/distributed/petsc/interface.hpp> 1684 #include <boost/multi_array.hpp> 1685 #include <boost/parallel/distributed_property_map.hpp> 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1689 /* 1690 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1691 */ 1692 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1693 { 1694 namespace petsc = boost::distributed::petsc; 1695 1696 namespace graph_dist = boost::graph::distributed; 1697 using boost::graph::distributed::ilu_default::process_group_type; 1698 using boost::graph::ilu_permuted; 1699 1700 PetscTruth row_identity, col_identity; 1701 PetscContainer c; 1702 PetscInt m, n, M, N; 1703 PetscErrorCode ierr; 1704 1705 PetscFunctionBegin; 1706 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1707 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1708 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1709 if (!row_identity || !col_identity) { 1710 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1711 } 1712 1713 process_group_type pg; 1714 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1715 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1716 lgraph_type& level_graph = *lgraph_p; 1717 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1718 1719 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1720 ilu_permuted(level_graph); 1721 1722 /* put together the new matrix */ 1723 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1724 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1725 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1726 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1727 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1728 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1729 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1730 (*fact)->factor = FACTOR_LU; 1731 1732 ierr = PetscContainerCreate(A->comm, &c); 1733 ierr = PetscContainerSetPointer(c, lgraph_p); 1734 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1735 PetscFunctionReturn(0); 1736 } 1737 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1740 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1741 { 1742 PetscFunctionBegin; 1743 PetscFunctionReturn(0); 1744 } 1745 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatSolve_MPIAIJ" 1748 /* 1749 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1750 */ 1751 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1752 { 1753 namespace graph_dist = boost::graph::distributed; 1754 1755 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1756 lgraph_type* lgraph_p; 1757 PetscContainer c; 1758 PetscErrorCode ierr; 1759 1760 PetscFunctionBegin; 1761 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1762 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1763 ierr = VecCopy(b, x); CHKERRQ(ierr); 1764 1765 PetscScalar* array_x; 1766 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1767 PetscInt sx; 1768 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1769 1770 PetscScalar* array_b; 1771 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1772 PetscInt sb; 1773 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1774 1775 lgraph_type& level_graph = *lgraph_p; 1776 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1777 1778 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1779 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1780 ref_x(array_x, boost::extents[num_vertices(graph)]); 1781 1782 typedef boost::iterator_property_map<array_ref_type::iterator, 1783 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1784 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1785 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1786 1787 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1788 1789 PetscFunctionReturn(0); 1790 } 1791 #endif 1792 1793 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1794 PetscInt nzlocal,nsends,nrecvs; 1795 PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j; 1796 PetscScalar *sbuf_a,**rbuf_a; 1797 PetscErrorCode (*MatDestroy)(Mat); 1798 } Mat_Redundant; 1799 1800 #undef __FUNCT__ 1801 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1802 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1803 { 1804 PetscErrorCode ierr; 1805 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1806 PetscInt i; 1807 1808 PetscFunctionBegin; 1809 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1810 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1811 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1812 for (i=0; i<redund->nrecvs; i++){ 1813 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1814 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1815 } 1816 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1817 ierr = PetscFree(redund);CHKERRQ(ierr); 1818 PetscFunctionReturn(0); 1819 } 1820 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "MatDestroy_MatRedundant" 1823 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1824 { 1825 PetscErrorCode ierr; 1826 PetscContainer container; 1827 Mat_Redundant *redund=PETSC_NULL; 1828 1829 PetscFunctionBegin; 1830 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1831 if (container) { 1832 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1833 } else { 1834 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1835 } 1836 A->ops->destroy = redund->MatDestroy; 1837 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 1838 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 1839 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 1840 PetscFunctionReturn(0); 1841 } 1842 1843 #undef __FUNCT__ 1844 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 1845 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 1846 { 1847 PetscMPIInt rank,size; 1848 MPI_Comm comm=mat->comm; 1849 PetscErrorCode ierr; 1850 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 1851 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 1852 PetscInt *rowrange=mat->rmap.range; 1853 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1854 Mat A=aij->A,B=aij->B,C=*matredundant; 1855 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1856 PetscScalar *sbuf_a; 1857 PetscInt nzlocal=a->nz+b->nz; 1858 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 1859 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 1860 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 1861 PetscScalar *vals,*aworkA,*aworkB; 1862 PetscMPIInt tag1,tag2,tag3,imdex; 1863 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 1864 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 1865 MPI_Status recv_status,*send_status; 1866 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 1867 PetscInt **rbuf_j=PETSC_NULL; 1868 PetscScalar **rbuf_a=PETSC_NULL; 1869 Mat_Redundant *redund=PETSC_NULL; 1870 PetscContainer container; 1871 1872 PetscFunctionBegin; 1873 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1874 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1875 1876 if (reuse == MAT_REUSE_MATRIX) { 1877 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 1878 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 1879 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 1880 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 1881 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1882 if (container) { 1883 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1884 } else { 1885 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1886 } 1887 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 1888 1889 nsends = redund->nsends; 1890 nrecvs = redund->nrecvs; 1891 send_rank = redund->send_rank; recv_rank = send_rank + size; 1892 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 1893 sbuf_j = redund->sbuf_j; 1894 sbuf_a = redund->sbuf_a; 1895 rbuf_j = redund->rbuf_j; 1896 rbuf_a = redund->rbuf_a; 1897 } 1898 1899 if (reuse == MAT_INITIAL_MATRIX){ 1900 PetscMPIInt subrank,subsize; 1901 PetscInt nleftover,np_subcomm; 1902 /* get the destination processors' id send_rank, nsends and nrecvs */ 1903 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 1904 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 1905 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 1906 recv_rank = send_rank + size; 1907 np_subcomm = size/nsubcomm; 1908 nleftover = size - nsubcomm*np_subcomm; 1909 nsends = 0; nrecvs = 0; 1910 for (i=0; i<size; i++){ /* i=rank*/ 1911 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 1912 send_rank[nsends] = i; nsends++; 1913 recv_rank[nrecvs++] = i; 1914 } 1915 } 1916 if (rank >= size - nleftover){/* this proc is a leftover processor */ 1917 i = size-nleftover-1; 1918 j = 0; 1919 while (j < nsubcomm - nleftover){ 1920 send_rank[nsends++] = i; 1921 i--; j++; 1922 } 1923 } 1924 1925 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 1926 for (i=0; i<nleftover; i++){ 1927 recv_rank[nrecvs++] = size-nleftover+i; 1928 } 1929 } 1930 1931 /* allocate sbuf_j, sbuf_a */ 1932 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 1933 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 1934 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 1935 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1936 1937 /* copy mat's local entries into the buffers */ 1938 if (reuse == MAT_INITIAL_MATRIX){ 1939 rownz_max = 0; 1940 rptr = sbuf_j; 1941 cols = sbuf_j + rend-rstart + 1; 1942 vals = sbuf_a; 1943 rptr[0] = 0; 1944 for (i=0; i<rend-rstart; i++){ 1945 row = i + rstart; 1946 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1947 ncols = nzA + nzB; 1948 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1949 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1950 /* load the column indices for this row into cols */ 1951 lwrite = 0; 1952 for (l=0; l<nzB; l++) { 1953 if ((ctmp = bmap[cworkB[l]]) < cstart){ 1954 vals[lwrite] = aworkB[l]; 1955 cols[lwrite++] = ctmp; 1956 } 1957 } 1958 for (l=0; l<nzA; l++){ 1959 vals[lwrite] = aworkA[l]; 1960 cols[lwrite++] = cstart + cworkA[l]; 1961 } 1962 for (l=0; l<nzB; l++) { 1963 if ((ctmp = bmap[cworkB[l]]) >= cend){ 1964 vals[lwrite] = aworkB[l]; 1965 cols[lwrite++] = ctmp; 1966 } 1967 } 1968 vals += ncols; 1969 cols += ncols; 1970 rptr[i+1] = rptr[i] + ncols; 1971 if (rownz_max < ncols) rownz_max = ncols; 1972 } 1973 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 1974 } else { /* only copy matrix values into sbuf_a */ 1975 rptr = sbuf_j; 1976 vals = sbuf_a; 1977 rptr[0] = 0; 1978 for (i=0; i<rend-rstart; i++){ 1979 row = i + rstart; 1980 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1981 ncols = nzA + nzB; 1982 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1983 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1984 lwrite = 0; 1985 for (l=0; l<nzB; l++) { 1986 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 1987 } 1988 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 1989 for (l=0; l<nzB; l++) { 1990 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 1991 } 1992 vals += ncols; 1993 rptr[i+1] = rptr[i] + ncols; 1994 } 1995 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1996 1997 /* send nzlocal to others, and recv other's nzlocal */ 1998 /*--------------------------------------------------*/ 1999 if (reuse == MAT_INITIAL_MATRIX){ 2000 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2001 s_waits2 = s_waits3 + nsends; 2002 s_waits1 = s_waits2 + nsends; 2003 r_waits1 = s_waits1 + nsends; 2004 r_waits2 = r_waits1 + nrecvs; 2005 r_waits3 = r_waits2 + nrecvs; 2006 } else { 2007 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2008 r_waits3 = s_waits3 + nsends; 2009 } 2010 2011 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2012 if (reuse == MAT_INITIAL_MATRIX){ 2013 /* get new tags to keep the communication clean */ 2014 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2015 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2016 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2017 rbuf_nz = sbuf_nz + nsends; 2018 2019 /* post receives of other's nzlocal */ 2020 for (i=0; i<nrecvs; i++){ 2021 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2022 } 2023 /* send nzlocal to others */ 2024 for (i=0; i<nsends; i++){ 2025 sbuf_nz[i] = nzlocal; 2026 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2027 } 2028 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2029 count = nrecvs; 2030 while (count) { 2031 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2032 recv_rank[imdex] = recv_status.MPI_SOURCE; 2033 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2034 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2035 2036 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2037 rbuf_nz[imdex] += i + 2; 2038 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2039 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2040 count--; 2041 } 2042 /* wait on sends of nzlocal */ 2043 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2044 /* send mat->i,j to others, and recv from other's */ 2045 /*------------------------------------------------*/ 2046 for (i=0; i<nsends; i++){ 2047 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2048 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2049 } 2050 /* wait on receives of mat->i,j */ 2051 /*------------------------------*/ 2052 count = nrecvs; 2053 while (count) { 2054 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2055 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2056 count--; 2057 } 2058 /* wait on sends of mat->i,j */ 2059 /*---------------------------*/ 2060 if (nsends) { 2061 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2062 } 2063 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2064 2065 /* post receives, send and receive mat->a */ 2066 /*----------------------------------------*/ 2067 for (imdex=0; imdex<nrecvs; imdex++) { 2068 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2069 } 2070 for (i=0; i<nsends; i++){ 2071 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2072 } 2073 count = nrecvs; 2074 while (count) { 2075 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2076 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2077 count--; 2078 } 2079 if (nsends) { 2080 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2081 } 2082 2083 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2084 2085 /* create redundant matrix */ 2086 /*-------------------------*/ 2087 if (reuse == MAT_INITIAL_MATRIX){ 2088 /* compute rownz_max for preallocation */ 2089 for (imdex=0; imdex<nrecvs; imdex++){ 2090 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2091 rptr = rbuf_j[imdex]; 2092 for (i=0; i<j; i++){ 2093 ncols = rptr[i+1] - rptr[i]; 2094 if (rownz_max < ncols) rownz_max = ncols; 2095 } 2096 } 2097 2098 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2099 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2100 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2101 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2102 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2103 } else { 2104 C = *matredundant; 2105 } 2106 2107 /* insert local matrix entries */ 2108 rptr = sbuf_j; 2109 cols = sbuf_j + rend-rstart + 1; 2110 vals = sbuf_a; 2111 for (i=0; i<rend-rstart; i++){ 2112 row = i + rstart; 2113 ncols = rptr[i+1] - rptr[i]; 2114 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2115 vals += ncols; 2116 cols += ncols; 2117 } 2118 /* insert received matrix entries */ 2119 for (imdex=0; imdex<nrecvs; imdex++){ 2120 rstart = rowrange[recv_rank[imdex]]; 2121 rend = rowrange[recv_rank[imdex]+1]; 2122 rptr = rbuf_j[imdex]; 2123 cols = rbuf_j[imdex] + rend-rstart + 1; 2124 vals = rbuf_a[imdex]; 2125 for (i=0; i<rend-rstart; i++){ 2126 row = i + rstart; 2127 ncols = rptr[i+1] - rptr[i]; 2128 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2129 vals += ncols; 2130 cols += ncols; 2131 } 2132 } 2133 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2134 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2135 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2136 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2137 if (reuse == MAT_INITIAL_MATRIX){ 2138 PetscContainer container; 2139 *matredundant = C; 2140 /* create a supporting struct and attach it to C for reuse */ 2141 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2142 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2143 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2144 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2145 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2146 2147 redund->nzlocal = nzlocal; 2148 redund->nsends = nsends; 2149 redund->nrecvs = nrecvs; 2150 redund->send_rank = send_rank; 2151 redund->sbuf_nz = sbuf_nz; 2152 redund->sbuf_j = sbuf_j; 2153 redund->sbuf_a = sbuf_a; 2154 redund->rbuf_j = rbuf_j; 2155 redund->rbuf_a = rbuf_a; 2156 2157 redund->MatDestroy = C->ops->destroy; 2158 C->ops->destroy = MatDestroy_MatRedundant; 2159 } 2160 PetscFunctionReturn(0); 2161 } 2162 2163 /* -------------------------------------------------------------------*/ 2164 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2165 MatGetRow_MPIAIJ, 2166 MatRestoreRow_MPIAIJ, 2167 MatMult_MPIAIJ, 2168 /* 4*/ MatMultAdd_MPIAIJ, 2169 MatMultTranspose_MPIAIJ, 2170 MatMultTransposeAdd_MPIAIJ, 2171 #ifdef PETSC_HAVE_PBGL 2172 MatSolve_MPIAIJ, 2173 #else 2174 0, 2175 #endif 2176 0, 2177 0, 2178 /*10*/ 0, 2179 0, 2180 0, 2181 MatRelax_MPIAIJ, 2182 MatTranspose_MPIAIJ, 2183 /*15*/ MatGetInfo_MPIAIJ, 2184 MatEqual_MPIAIJ, 2185 MatGetDiagonal_MPIAIJ, 2186 MatDiagonalScale_MPIAIJ, 2187 MatNorm_MPIAIJ, 2188 /*20*/ MatAssemblyBegin_MPIAIJ, 2189 MatAssemblyEnd_MPIAIJ, 2190 0, 2191 MatSetOption_MPIAIJ, 2192 MatZeroEntries_MPIAIJ, 2193 /*25*/ MatZeroRows_MPIAIJ, 2194 0, 2195 #ifdef PETSC_HAVE_PBGL 2196 MatLUFactorNumeric_MPIAIJ, 2197 #else 2198 0, 2199 #endif 2200 0, 2201 0, 2202 /*30*/ MatSetUpPreallocation_MPIAIJ, 2203 #ifdef PETSC_HAVE_PBGL 2204 MatILUFactorSymbolic_MPIAIJ, 2205 #else 2206 0, 2207 #endif 2208 0, 2209 0, 2210 0, 2211 /*35*/ MatDuplicate_MPIAIJ, 2212 0, 2213 0, 2214 0, 2215 0, 2216 /*40*/ MatAXPY_MPIAIJ, 2217 MatGetSubMatrices_MPIAIJ, 2218 MatIncreaseOverlap_MPIAIJ, 2219 MatGetValues_MPIAIJ, 2220 MatCopy_MPIAIJ, 2221 /*45*/ 0, 2222 MatScale_MPIAIJ, 2223 0, 2224 0, 2225 0, 2226 /*50*/ MatSetBlockSize_MPIAIJ, 2227 0, 2228 0, 2229 0, 2230 0, 2231 /*55*/ MatFDColoringCreate_MPIAIJ, 2232 0, 2233 MatSetUnfactored_MPIAIJ, 2234 MatPermute_MPIAIJ, 2235 0, 2236 /*60*/ MatGetSubMatrix_MPIAIJ, 2237 MatDestroy_MPIAIJ, 2238 MatView_MPIAIJ, 2239 0, 2240 0, 2241 /*65*/ 0, 2242 0, 2243 0, 2244 0, 2245 0, 2246 /*70*/ 0, 2247 0, 2248 MatSetColoring_MPIAIJ, 2249 #if defined(PETSC_HAVE_ADIC) 2250 MatSetValuesAdic_MPIAIJ, 2251 #else 2252 0, 2253 #endif 2254 MatSetValuesAdifor_MPIAIJ, 2255 /*75*/ 0, 2256 0, 2257 0, 2258 0, 2259 0, 2260 /*80*/ 0, 2261 0, 2262 0, 2263 0, 2264 /*84*/ MatLoad_MPIAIJ, 2265 0, 2266 0, 2267 0, 2268 0, 2269 0, 2270 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2271 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2272 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2273 MatPtAP_Basic, 2274 MatPtAPSymbolic_MPIAIJ, 2275 /*95*/ MatPtAPNumeric_MPIAIJ, 2276 0, 2277 0, 2278 0, 2279 0, 2280 /*100*/0, 2281 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2282 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2283 MatConjugate_MPIAIJ, 2284 0, 2285 /*105*/MatSetValuesRow_MPIAIJ, 2286 MatRealPart_MPIAIJ, 2287 MatImaginaryPart_MPIAIJ, 2288 0, 2289 0, 2290 /*110*/0, 2291 MatGetRedundantMatrix_MPIAIJ}; 2292 2293 /* ----------------------------------------------------------------------------------------*/ 2294 2295 EXTERN_C_BEGIN 2296 #undef __FUNCT__ 2297 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2298 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2299 { 2300 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2301 PetscErrorCode ierr; 2302 2303 PetscFunctionBegin; 2304 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2305 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2306 PetscFunctionReturn(0); 2307 } 2308 EXTERN_C_END 2309 2310 EXTERN_C_BEGIN 2311 #undef __FUNCT__ 2312 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2313 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2314 { 2315 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2316 PetscErrorCode ierr; 2317 2318 PetscFunctionBegin; 2319 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2320 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2321 PetscFunctionReturn(0); 2322 } 2323 EXTERN_C_END 2324 2325 #include "petscpc.h" 2326 EXTERN_C_BEGIN 2327 #undef __FUNCT__ 2328 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2329 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2330 { 2331 Mat_MPIAIJ *b; 2332 PetscErrorCode ierr; 2333 PetscInt i; 2334 2335 PetscFunctionBegin; 2336 B->preallocated = PETSC_TRUE; 2337 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2338 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2339 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2340 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2341 2342 B->rmap.bs = B->cmap.bs = 1; 2343 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2344 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2345 if (d_nnz) { 2346 for (i=0; i<B->rmap.n; i++) { 2347 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2348 } 2349 } 2350 if (o_nnz) { 2351 for (i=0; i<B->rmap.n; i++) { 2352 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2353 } 2354 } 2355 b = (Mat_MPIAIJ*)B->data; 2356 2357 /* Explicitly create 2 MATSEQAIJ matrices. */ 2358 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2359 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2360 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2361 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2362 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2363 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2364 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2365 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2366 2367 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2368 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2369 2370 PetscFunctionReturn(0); 2371 } 2372 EXTERN_C_END 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2376 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2377 { 2378 Mat mat; 2379 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2380 PetscErrorCode ierr; 2381 2382 PetscFunctionBegin; 2383 *newmat = 0; 2384 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2385 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2386 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2387 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2388 a = (Mat_MPIAIJ*)mat->data; 2389 2390 mat->factor = matin->factor; 2391 mat->rmap.bs = matin->rmap.bs; 2392 mat->assembled = PETSC_TRUE; 2393 mat->insertmode = NOT_SET_VALUES; 2394 mat->preallocated = PETSC_TRUE; 2395 2396 a->size = oldmat->size; 2397 a->rank = oldmat->rank; 2398 a->donotstash = oldmat->donotstash; 2399 a->roworiented = oldmat->roworiented; 2400 a->rowindices = 0; 2401 a->rowvalues = 0; 2402 a->getrowactive = PETSC_FALSE; 2403 2404 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2405 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2406 2407 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2408 if (oldmat->colmap) { 2409 #if defined (PETSC_USE_CTABLE) 2410 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2411 #else 2412 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2413 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2414 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2415 #endif 2416 } else a->colmap = 0; 2417 if (oldmat->garray) { 2418 PetscInt len; 2419 len = oldmat->B->cmap.n; 2420 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2421 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2422 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2423 } else a->garray = 0; 2424 2425 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2426 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2427 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2428 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2429 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2430 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2431 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2432 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2433 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2434 *newmat = mat; 2435 PetscFunctionReturn(0); 2436 } 2437 2438 #include "petscsys.h" 2439 2440 #undef __FUNCT__ 2441 #define __FUNCT__ "MatLoad_MPIAIJ" 2442 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2443 { 2444 Mat A; 2445 PetscScalar *vals,*svals; 2446 MPI_Comm comm = ((PetscObject)viewer)->comm; 2447 MPI_Status status; 2448 PetscErrorCode ierr; 2449 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2450 PetscInt i,nz,j,rstart,rend,mmax; 2451 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2452 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2453 PetscInt cend,cstart,n,*rowners; 2454 int fd; 2455 2456 PetscFunctionBegin; 2457 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2458 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2459 if (!rank) { 2460 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2461 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2462 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2463 } 2464 2465 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2466 M = header[1]; N = header[2]; 2467 /* determine ownership of all rows */ 2468 m = M/size + ((M % size) > rank); 2469 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2470 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2471 2472 /* First process needs enough room for process with most rows */ 2473 if (!rank) { 2474 mmax = rowners[1]; 2475 for (i=2; i<size; i++) { 2476 mmax = PetscMax(mmax,rowners[i]); 2477 } 2478 } else mmax = m; 2479 2480 rowners[0] = 0; 2481 for (i=2; i<=size; i++) { 2482 rowners[i] += rowners[i-1]; 2483 } 2484 rstart = rowners[rank]; 2485 rend = rowners[rank+1]; 2486 2487 /* distribute row lengths to all processors */ 2488 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2489 if (!rank) { 2490 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2491 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2492 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2493 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2494 for (j=0; j<m; j++) { 2495 procsnz[0] += ourlens[j]; 2496 } 2497 for (i=1; i<size; i++) { 2498 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2499 /* calculate the number of nonzeros on each processor */ 2500 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2501 procsnz[i] += rowlengths[j]; 2502 } 2503 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2504 } 2505 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2506 } else { 2507 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2508 } 2509 2510 if (!rank) { 2511 /* determine max buffer needed and allocate it */ 2512 maxnz = 0; 2513 for (i=0; i<size; i++) { 2514 maxnz = PetscMax(maxnz,procsnz[i]); 2515 } 2516 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2517 2518 /* read in my part of the matrix column indices */ 2519 nz = procsnz[0]; 2520 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2521 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2522 2523 /* read in every one elses and ship off */ 2524 for (i=1; i<size; i++) { 2525 nz = procsnz[i]; 2526 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2527 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2528 } 2529 ierr = PetscFree(cols);CHKERRQ(ierr); 2530 } else { 2531 /* determine buffer space needed for message */ 2532 nz = 0; 2533 for (i=0; i<m; i++) { 2534 nz += ourlens[i]; 2535 } 2536 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2537 2538 /* receive message of column indices*/ 2539 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2540 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2541 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2542 } 2543 2544 /* determine column ownership if matrix is not square */ 2545 if (N != M) { 2546 n = N/size + ((N % size) > rank); 2547 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2548 cstart = cend - n; 2549 } else { 2550 cstart = rstart; 2551 cend = rend; 2552 n = cend - cstart; 2553 } 2554 2555 /* loop over local rows, determining number of off diagonal entries */ 2556 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2557 jj = 0; 2558 for (i=0; i<m; i++) { 2559 for (j=0; j<ourlens[i]; j++) { 2560 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2561 jj++; 2562 } 2563 } 2564 2565 /* create our matrix */ 2566 for (i=0; i<m; i++) { 2567 ourlens[i] -= offlens[i]; 2568 } 2569 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2570 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2571 ierr = MatSetType(A,type);CHKERRQ(ierr); 2572 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2573 2574 for (i=0; i<m; i++) { 2575 ourlens[i] += offlens[i]; 2576 } 2577 2578 if (!rank) { 2579 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2580 2581 /* read in my part of the matrix numerical values */ 2582 nz = procsnz[0]; 2583 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2584 2585 /* insert into matrix */ 2586 jj = rstart; 2587 smycols = mycols; 2588 svals = vals; 2589 for (i=0; i<m; i++) { 2590 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2591 smycols += ourlens[i]; 2592 svals += ourlens[i]; 2593 jj++; 2594 } 2595 2596 /* read in other processors and ship out */ 2597 for (i=1; i<size; i++) { 2598 nz = procsnz[i]; 2599 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2600 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2601 } 2602 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2603 } else { 2604 /* receive numeric values */ 2605 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2606 2607 /* receive message of values*/ 2608 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2609 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2610 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2611 2612 /* insert into matrix */ 2613 jj = rstart; 2614 smycols = mycols; 2615 svals = vals; 2616 for (i=0; i<m; i++) { 2617 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2618 smycols += ourlens[i]; 2619 svals += ourlens[i]; 2620 jj++; 2621 } 2622 } 2623 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2624 ierr = PetscFree(vals);CHKERRQ(ierr); 2625 ierr = PetscFree(mycols);CHKERRQ(ierr); 2626 ierr = PetscFree(rowners);CHKERRQ(ierr); 2627 2628 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2629 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2630 *newmat = A; 2631 PetscFunctionReturn(0); 2632 } 2633 2634 #undef __FUNCT__ 2635 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2636 /* 2637 Not great since it makes two copies of the submatrix, first an SeqAIJ 2638 in local and then by concatenating the local matrices the end result. 2639 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2640 */ 2641 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2642 { 2643 PetscErrorCode ierr; 2644 PetscMPIInt rank,size; 2645 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2646 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2647 Mat *local,M,Mreuse; 2648 PetscScalar *vwork,*aa; 2649 MPI_Comm comm = mat->comm; 2650 Mat_SeqAIJ *aij; 2651 2652 2653 PetscFunctionBegin; 2654 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2655 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2656 2657 if (call == MAT_REUSE_MATRIX) { 2658 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2659 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2660 local = &Mreuse; 2661 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2662 } else { 2663 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2664 Mreuse = *local; 2665 ierr = PetscFree(local);CHKERRQ(ierr); 2666 } 2667 2668 /* 2669 m - number of local rows 2670 n - number of columns (same on all processors) 2671 rstart - first row in new global matrix generated 2672 */ 2673 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2674 if (call == MAT_INITIAL_MATRIX) { 2675 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2676 ii = aij->i; 2677 jj = aij->j; 2678 2679 /* 2680 Determine the number of non-zeros in the diagonal and off-diagonal 2681 portions of the matrix in order to do correct preallocation 2682 */ 2683 2684 /* first get start and end of "diagonal" columns */ 2685 if (csize == PETSC_DECIDE) { 2686 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2687 if (mglobal == n) { /* square matrix */ 2688 nlocal = m; 2689 } else { 2690 nlocal = n/size + ((n % size) > rank); 2691 } 2692 } else { 2693 nlocal = csize; 2694 } 2695 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2696 rstart = rend - nlocal; 2697 if (rank == size - 1 && rend != n) { 2698 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2699 } 2700 2701 /* next, compute all the lengths */ 2702 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2703 olens = dlens + m; 2704 for (i=0; i<m; i++) { 2705 jend = ii[i+1] - ii[i]; 2706 olen = 0; 2707 dlen = 0; 2708 for (j=0; j<jend; j++) { 2709 if (*jj < rstart || *jj >= rend) olen++; 2710 else dlen++; 2711 jj++; 2712 } 2713 olens[i] = olen; 2714 dlens[i] = dlen; 2715 } 2716 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2717 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2718 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2719 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2720 ierr = PetscFree(dlens);CHKERRQ(ierr); 2721 } else { 2722 PetscInt ml,nl; 2723 2724 M = *newmat; 2725 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2726 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2727 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2728 /* 2729 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2730 rather than the slower MatSetValues(). 2731 */ 2732 M->was_assembled = PETSC_TRUE; 2733 M->assembled = PETSC_FALSE; 2734 } 2735 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2736 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2737 ii = aij->i; 2738 jj = aij->j; 2739 aa = aij->a; 2740 for (i=0; i<m; i++) { 2741 row = rstart + i; 2742 nz = ii[i+1] - ii[i]; 2743 cwork = jj; jj += nz; 2744 vwork = aa; aa += nz; 2745 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2746 } 2747 2748 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2749 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2750 *newmat = M; 2751 2752 /* save submatrix used in processor for next request */ 2753 if (call == MAT_INITIAL_MATRIX) { 2754 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2755 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2756 } 2757 2758 PetscFunctionReturn(0); 2759 } 2760 2761 EXTERN_C_BEGIN 2762 #undef __FUNCT__ 2763 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2764 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2765 { 2766 PetscInt m,cstart, cend,j,nnz,i,d; 2767 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2768 const PetscInt *JJ; 2769 PetscScalar *values; 2770 PetscErrorCode ierr; 2771 2772 PetscFunctionBegin; 2773 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2774 2775 B->rmap.bs = B->cmap.bs = 1; 2776 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2777 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2778 m = B->rmap.n; 2779 cstart = B->cmap.rstart; 2780 cend = B->cmap.rend; 2781 rstart = B->rmap.rstart; 2782 2783 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2784 o_nnz = d_nnz + m; 2785 2786 for (i=0; i<m; i++) { 2787 nnz = Ii[i+1]- Ii[i]; 2788 JJ = J + Ii[i]; 2789 nnz_max = PetscMax(nnz_max,nnz); 2790 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2791 for (j=0; j<nnz; j++) { 2792 if (*JJ >= cstart) break; 2793 JJ++; 2794 } 2795 d = 0; 2796 for (; j<nnz; j++) { 2797 if (*JJ++ >= cend) break; 2798 d++; 2799 } 2800 d_nnz[i] = d; 2801 o_nnz[i] = nnz - d; 2802 } 2803 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2804 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2805 2806 if (v) values = (PetscScalar*)v; 2807 else { 2808 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2809 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2810 } 2811 2812 for (i=0; i<m; i++) { 2813 ii = i + rstart; 2814 nnz = Ii[i+1]- Ii[i]; 2815 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2816 } 2817 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2818 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2819 2820 if (!v) { 2821 ierr = PetscFree(values);CHKERRQ(ierr); 2822 } 2823 PetscFunctionReturn(0); 2824 } 2825 EXTERN_C_END 2826 2827 #undef __FUNCT__ 2828 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2829 /*@ 2830 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2831 (the default parallel PETSc format). 2832 2833 Collective on MPI_Comm 2834 2835 Input Parameters: 2836 + B - the matrix 2837 . i - the indices into j for the start of each local row (starts with zero) 2838 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2839 - v - optional values in the matrix 2840 2841 Level: developer 2842 2843 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2844 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2845 2846 .keywords: matrix, aij, compressed row, sparse, parallel 2847 2848 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2849 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2850 @*/ 2851 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2852 { 2853 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2854 2855 PetscFunctionBegin; 2856 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2857 if (f) { 2858 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2859 } 2860 PetscFunctionReturn(0); 2861 } 2862 2863 #undef __FUNCT__ 2864 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2865 /*@C 2866 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2867 (the default parallel PETSc format). For good matrix assembly performance 2868 the user should preallocate the matrix storage by setting the parameters 2869 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2870 performance can be increased by more than a factor of 50. 2871 2872 Collective on MPI_Comm 2873 2874 Input Parameters: 2875 + A - the matrix 2876 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2877 (same value is used for all local rows) 2878 . d_nnz - array containing the number of nonzeros in the various rows of the 2879 DIAGONAL portion of the local submatrix (possibly different for each row) 2880 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2881 The size of this array is equal to the number of local rows, i.e 'm'. 2882 You must leave room for the diagonal entry even if it is zero. 2883 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2884 submatrix (same value is used for all local rows). 2885 - o_nnz - array containing the number of nonzeros in the various rows of the 2886 OFF-DIAGONAL portion of the local submatrix (possibly different for 2887 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2888 structure. The size of this array is equal to the number 2889 of local rows, i.e 'm'. 2890 2891 If the *_nnz parameter is given then the *_nz parameter is ignored 2892 2893 The AIJ format (also called the Yale sparse matrix format or 2894 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2895 storage. The stored row and column indices begin with zero. See the users manual for details. 2896 2897 The parallel matrix is partitioned such that the first m0 rows belong to 2898 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2899 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2900 2901 The DIAGONAL portion of the local submatrix of a processor can be defined 2902 as the submatrix which is obtained by extraction the part corresponding 2903 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2904 first row that belongs to the processor, and r2 is the last row belonging 2905 to the this processor. This is a square mxm matrix. The remaining portion 2906 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2907 2908 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2909 2910 Example usage: 2911 2912 Consider the following 8x8 matrix with 34 non-zero values, that is 2913 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2914 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2915 as follows: 2916 2917 .vb 2918 1 2 0 | 0 3 0 | 0 4 2919 Proc0 0 5 6 | 7 0 0 | 8 0 2920 9 0 10 | 11 0 0 | 12 0 2921 ------------------------------------- 2922 13 0 14 | 15 16 17 | 0 0 2923 Proc1 0 18 0 | 19 20 21 | 0 0 2924 0 0 0 | 22 23 0 | 24 0 2925 ------------------------------------- 2926 Proc2 25 26 27 | 0 0 28 | 29 0 2927 30 0 0 | 31 32 33 | 0 34 2928 .ve 2929 2930 This can be represented as a collection of submatrices as: 2931 2932 .vb 2933 A B C 2934 D E F 2935 G H I 2936 .ve 2937 2938 Where the submatrices A,B,C are owned by proc0, D,E,F are 2939 owned by proc1, G,H,I are owned by proc2. 2940 2941 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2942 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2943 The 'M','N' parameters are 8,8, and have the same values on all procs. 2944 2945 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2946 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2947 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2948 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2949 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2950 matrix, ans [DF] as another SeqAIJ matrix. 2951 2952 When d_nz, o_nz parameters are specified, d_nz storage elements are 2953 allocated for every row of the local diagonal submatrix, and o_nz 2954 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2955 One way to choose d_nz and o_nz is to use the max nonzerors per local 2956 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2957 In this case, the values of d_nz,o_nz are: 2958 .vb 2959 proc0 : dnz = 2, o_nz = 2 2960 proc1 : dnz = 3, o_nz = 2 2961 proc2 : dnz = 1, o_nz = 4 2962 .ve 2963 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2964 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2965 for proc3. i.e we are using 12+15+10=37 storage locations to store 2966 34 values. 2967 2968 When d_nnz, o_nnz parameters are specified, the storage is specified 2969 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2970 In the above case the values for d_nnz,o_nnz are: 2971 .vb 2972 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2973 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2974 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2975 .ve 2976 Here the space allocated is sum of all the above values i.e 34, and 2977 hence pre-allocation is perfect. 2978 2979 Level: intermediate 2980 2981 .keywords: matrix, aij, compressed row, sparse, parallel 2982 2983 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2984 MPIAIJ 2985 @*/ 2986 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2987 { 2988 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2989 2990 PetscFunctionBegin; 2991 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2992 if (f) { 2993 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2994 } 2995 PetscFunctionReturn(0); 2996 } 2997 2998 #undef __FUNCT__ 2999 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3000 /*@C 3001 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3002 CSR format the local rows. 3003 3004 Collective on MPI_Comm 3005 3006 Input Parameters: 3007 + comm - MPI communicator 3008 . m - number of local rows (Cannot be PETSC_DECIDE) 3009 . n - This value should be the same as the local size used in creating the 3010 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3011 calculated if N is given) For square matrices n is almost always m. 3012 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3013 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3014 . i - row indices 3015 . j - column indices 3016 - a - matrix values 3017 3018 Output Parameter: 3019 . mat - the matrix 3020 3021 Level: intermediate 3022 3023 Notes: 3024 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3025 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3026 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3027 3028 The i and j indices are 0 based 3029 3030 .keywords: matrix, aij, compressed row, sparse, parallel 3031 3032 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3033 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3034 @*/ 3035 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3036 { 3037 PetscErrorCode ierr; 3038 3039 PetscFunctionBegin; 3040 if (i[0]) { 3041 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3042 } 3043 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3044 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3045 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3046 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3047 PetscFunctionReturn(0); 3048 } 3049 3050 #undef __FUNCT__ 3051 #define __FUNCT__ "MatCreateMPIAIJ" 3052 /*@C 3053 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3054 (the default parallel PETSc format). For good matrix assembly performance 3055 the user should preallocate the matrix storage by setting the parameters 3056 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3057 performance can be increased by more than a factor of 50. 3058 3059 Collective on MPI_Comm 3060 3061 Input Parameters: 3062 + comm - MPI communicator 3063 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3064 This value should be the same as the local size used in creating the 3065 y vector for the matrix-vector product y = Ax. 3066 . n - This value should be the same as the local size used in creating the 3067 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3068 calculated if N is given) For square matrices n is almost always m. 3069 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3070 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3071 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3072 (same value is used for all local rows) 3073 . d_nnz - array containing the number of nonzeros in the various rows of the 3074 DIAGONAL portion of the local submatrix (possibly different for each row) 3075 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3076 The size of this array is equal to the number of local rows, i.e 'm'. 3077 You must leave room for the diagonal entry even if it is zero. 3078 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3079 submatrix (same value is used for all local rows). 3080 - o_nnz - array containing the number of nonzeros in the various rows of the 3081 OFF-DIAGONAL portion of the local submatrix (possibly different for 3082 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3083 structure. The size of this array is equal to the number 3084 of local rows, i.e 'm'. 3085 3086 Output Parameter: 3087 . A - the matrix 3088 3089 Notes: 3090 If the *_nnz parameter is given then the *_nz parameter is ignored 3091 3092 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3093 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3094 storage requirements for this matrix. 3095 3096 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3097 processor than it must be used on all processors that share the object for 3098 that argument. 3099 3100 The user MUST specify either the local or global matrix dimensions 3101 (possibly both). 3102 3103 The parallel matrix is partitioned across processors such that the 3104 first m0 rows belong to process 0, the next m1 rows belong to 3105 process 1, the next m2 rows belong to process 2 etc.. where 3106 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3107 values corresponding to [m x N] submatrix. 3108 3109 The columns are logically partitioned with the n0 columns belonging 3110 to 0th partition, the next n1 columns belonging to the next 3111 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3112 3113 The DIAGONAL portion of the local submatrix on any given processor 3114 is the submatrix corresponding to the rows and columns m,n 3115 corresponding to the given processor. i.e diagonal matrix on 3116 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3117 etc. The remaining portion of the local submatrix [m x (N-n)] 3118 constitute the OFF-DIAGONAL portion. The example below better 3119 illustrates this concept. 3120 3121 For a square global matrix we define each processor's diagonal portion 3122 to be its local rows and the corresponding columns (a square submatrix); 3123 each processor's off-diagonal portion encompasses the remainder of the 3124 local matrix (a rectangular submatrix). 3125 3126 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3127 3128 When calling this routine with a single process communicator, a matrix of 3129 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3130 type of communicator, use the construction mechanism: 3131 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3132 3133 By default, this format uses inodes (identical nodes) when possible. 3134 We search for consecutive rows with the same nonzero structure, thereby 3135 reusing matrix information to achieve increased efficiency. 3136 3137 Options Database Keys: 3138 + -mat_no_inode - Do not use inodes 3139 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3140 - -mat_aij_oneindex - Internally use indexing starting at 1 3141 rather than 0. Note that when calling MatSetValues(), 3142 the user still MUST index entries starting at 0! 3143 3144 3145 Example usage: 3146 3147 Consider the following 8x8 matrix with 34 non-zero values, that is 3148 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3149 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3150 as follows: 3151 3152 .vb 3153 1 2 0 | 0 3 0 | 0 4 3154 Proc0 0 5 6 | 7 0 0 | 8 0 3155 9 0 10 | 11 0 0 | 12 0 3156 ------------------------------------- 3157 13 0 14 | 15 16 17 | 0 0 3158 Proc1 0 18 0 | 19 20 21 | 0 0 3159 0 0 0 | 22 23 0 | 24 0 3160 ------------------------------------- 3161 Proc2 25 26 27 | 0 0 28 | 29 0 3162 30 0 0 | 31 32 33 | 0 34 3163 .ve 3164 3165 This can be represented as a collection of submatrices as: 3166 3167 .vb 3168 A B C 3169 D E F 3170 G H I 3171 .ve 3172 3173 Where the submatrices A,B,C are owned by proc0, D,E,F are 3174 owned by proc1, G,H,I are owned by proc2. 3175 3176 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3177 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3178 The 'M','N' parameters are 8,8, and have the same values on all procs. 3179 3180 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3181 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3182 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3183 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3184 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3185 matrix, ans [DF] as another SeqAIJ matrix. 3186 3187 When d_nz, o_nz parameters are specified, d_nz storage elements are 3188 allocated for every row of the local diagonal submatrix, and o_nz 3189 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3190 One way to choose d_nz and o_nz is to use the max nonzerors per local 3191 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3192 In this case, the values of d_nz,o_nz are: 3193 .vb 3194 proc0 : dnz = 2, o_nz = 2 3195 proc1 : dnz = 3, o_nz = 2 3196 proc2 : dnz = 1, o_nz = 4 3197 .ve 3198 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3199 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3200 for proc3. i.e we are using 12+15+10=37 storage locations to store 3201 34 values. 3202 3203 When d_nnz, o_nnz parameters are specified, the storage is specified 3204 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3205 In the above case the values for d_nnz,o_nnz are: 3206 .vb 3207 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3208 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3209 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3210 .ve 3211 Here the space allocated is sum of all the above values i.e 34, and 3212 hence pre-allocation is perfect. 3213 3214 Level: intermediate 3215 3216 .keywords: matrix, aij, compressed row, sparse, parallel 3217 3218 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3219 MPIAIJ, MatCreateMPIAIJWithArrays() 3220 @*/ 3221 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3222 { 3223 PetscErrorCode ierr; 3224 PetscMPIInt size; 3225 3226 PetscFunctionBegin; 3227 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3228 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3229 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3230 if (size > 1) { 3231 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3232 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3233 } else { 3234 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3235 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3236 } 3237 PetscFunctionReturn(0); 3238 } 3239 3240 #undef __FUNCT__ 3241 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3242 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3243 { 3244 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3245 3246 PetscFunctionBegin; 3247 *Ad = a->A; 3248 *Ao = a->B; 3249 *colmap = a->garray; 3250 PetscFunctionReturn(0); 3251 } 3252 3253 #undef __FUNCT__ 3254 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3255 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3256 { 3257 PetscErrorCode ierr; 3258 PetscInt i; 3259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3260 3261 PetscFunctionBegin; 3262 if (coloring->ctype == IS_COLORING_GLOBAL) { 3263 ISColoringValue *allcolors,*colors; 3264 ISColoring ocoloring; 3265 3266 /* set coloring for diagonal portion */ 3267 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3268 3269 /* set coloring for off-diagonal portion */ 3270 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3271 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3272 for (i=0; i<a->B->cmap.n; i++) { 3273 colors[i] = allcolors[a->garray[i]]; 3274 } 3275 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3276 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3277 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3278 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3279 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3280 ISColoringValue *colors; 3281 PetscInt *larray; 3282 ISColoring ocoloring; 3283 3284 /* set coloring for diagonal portion */ 3285 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3286 for (i=0; i<a->A->cmap.n; i++) { 3287 larray[i] = i + A->cmap.rstart; 3288 } 3289 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3290 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3291 for (i=0; i<a->A->cmap.n; i++) { 3292 colors[i] = coloring->colors[larray[i]]; 3293 } 3294 ierr = PetscFree(larray);CHKERRQ(ierr); 3295 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3296 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3297 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3298 3299 /* set coloring for off-diagonal portion */ 3300 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3301 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3302 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3303 for (i=0; i<a->B->cmap.n; i++) { 3304 colors[i] = coloring->colors[larray[i]]; 3305 } 3306 ierr = PetscFree(larray);CHKERRQ(ierr); 3307 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3308 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3309 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3310 } else { 3311 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3312 } 3313 3314 PetscFunctionReturn(0); 3315 } 3316 3317 #if defined(PETSC_HAVE_ADIC) 3318 #undef __FUNCT__ 3319 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3320 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3321 { 3322 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3323 PetscErrorCode ierr; 3324 3325 PetscFunctionBegin; 3326 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3327 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3328 PetscFunctionReturn(0); 3329 } 3330 #endif 3331 3332 #undef __FUNCT__ 3333 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3334 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3335 { 3336 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3337 PetscErrorCode ierr; 3338 3339 PetscFunctionBegin; 3340 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3341 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3342 PetscFunctionReturn(0); 3343 } 3344 3345 #undef __FUNCT__ 3346 #define __FUNCT__ "MatMerge" 3347 /*@C 3348 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3349 matrices from each processor 3350 3351 Collective on MPI_Comm 3352 3353 Input Parameters: 3354 + comm - the communicators the parallel matrix will live on 3355 . inmat - the input sequential matrices 3356 . n - number of local columns (or PETSC_DECIDE) 3357 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3358 3359 Output Parameter: 3360 . outmat - the parallel matrix generated 3361 3362 Level: advanced 3363 3364 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3365 3366 @*/ 3367 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3368 { 3369 PetscErrorCode ierr; 3370 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3371 PetscInt *indx; 3372 PetscScalar *values; 3373 3374 PetscFunctionBegin; 3375 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3376 if (scall == MAT_INITIAL_MATRIX){ 3377 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3378 if (n == PETSC_DECIDE){ 3379 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3380 } 3381 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3382 rstart -= m; 3383 3384 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3385 for (i=0;i<m;i++) { 3386 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3387 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3388 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3389 } 3390 /* This routine will ONLY return MPIAIJ type matrix */ 3391 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3392 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3393 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3394 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3395 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3396 3397 } else if (scall == MAT_REUSE_MATRIX){ 3398 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3399 } else { 3400 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3401 } 3402 3403 for (i=0;i<m;i++) { 3404 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3405 Ii = i + rstart; 3406 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3407 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3408 } 3409 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3410 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3411 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3412 3413 PetscFunctionReturn(0); 3414 } 3415 3416 #undef __FUNCT__ 3417 #define __FUNCT__ "MatFileSplit" 3418 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3419 { 3420 PetscErrorCode ierr; 3421 PetscMPIInt rank; 3422 PetscInt m,N,i,rstart,nnz; 3423 size_t len; 3424 const PetscInt *indx; 3425 PetscViewer out; 3426 char *name; 3427 Mat B; 3428 const PetscScalar *values; 3429 3430 PetscFunctionBegin; 3431 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3432 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3433 /* Should this be the type of the diagonal block of A? */ 3434 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3435 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3436 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3437 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3438 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3439 for (i=0;i<m;i++) { 3440 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3441 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3442 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3443 } 3444 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3445 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3446 3447 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3448 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3449 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3450 sprintf(name,"%s.%d",outfile,rank); 3451 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3452 ierr = PetscFree(name); 3453 ierr = MatView(B,out);CHKERRQ(ierr); 3454 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3455 ierr = MatDestroy(B);CHKERRQ(ierr); 3456 PetscFunctionReturn(0); 3457 } 3458 3459 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3460 #undef __FUNCT__ 3461 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3462 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3463 { 3464 PetscErrorCode ierr; 3465 Mat_Merge_SeqsToMPI *merge; 3466 PetscContainer container; 3467 3468 PetscFunctionBegin; 3469 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3470 if (container) { 3471 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3472 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3473 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3474 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3475 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3476 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3477 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3478 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3479 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3480 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3481 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3482 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3483 3484 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3485 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3486 } 3487 ierr = PetscFree(merge);CHKERRQ(ierr); 3488 3489 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3490 PetscFunctionReturn(0); 3491 } 3492 3493 #include "src/mat/utils/freespace.h" 3494 #include "petscbt.h" 3495 static PetscEvent logkey_seqstompinum = 0; 3496 #undef __FUNCT__ 3497 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3498 /*@C 3499 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3500 matrices from each processor 3501 3502 Collective on MPI_Comm 3503 3504 Input Parameters: 3505 + comm - the communicators the parallel matrix will live on 3506 . seqmat - the input sequential matrices 3507 . m - number of local rows (or PETSC_DECIDE) 3508 . n - number of local columns (or PETSC_DECIDE) 3509 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3510 3511 Output Parameter: 3512 . mpimat - the parallel matrix generated 3513 3514 Level: advanced 3515 3516 Notes: 3517 The dimensions of the sequential matrix in each processor MUST be the same. 3518 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3519 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3520 @*/ 3521 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3522 { 3523 PetscErrorCode ierr; 3524 MPI_Comm comm=mpimat->comm; 3525 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3526 PetscMPIInt size,rank,taga,*len_s; 3527 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3528 PetscInt proc,m; 3529 PetscInt **buf_ri,**buf_rj; 3530 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3531 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3532 MPI_Request *s_waits,*r_waits; 3533 MPI_Status *status; 3534 MatScalar *aa=a->a,**abuf_r,*ba_i; 3535 Mat_Merge_SeqsToMPI *merge; 3536 PetscContainer container; 3537 3538 PetscFunctionBegin; 3539 if (!logkey_seqstompinum) { 3540 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3541 } 3542 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3543 3544 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3545 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3546 3547 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3548 if (container) { 3549 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3550 } 3551 bi = merge->bi; 3552 bj = merge->bj; 3553 buf_ri = merge->buf_ri; 3554 buf_rj = merge->buf_rj; 3555 3556 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3557 owners = merge->rowmap.range; 3558 len_s = merge->len_s; 3559 3560 /* send and recv matrix values */ 3561 /*-----------------------------*/ 3562 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3563 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3564 3565 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3566 for (proc=0,k=0; proc<size; proc++){ 3567 if (!len_s[proc]) continue; 3568 i = owners[proc]; 3569 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3570 k++; 3571 } 3572 3573 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3574 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3575 ierr = PetscFree(status);CHKERRQ(ierr); 3576 3577 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3578 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3579 3580 /* insert mat values of mpimat */ 3581 /*----------------------------*/ 3582 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3583 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3584 nextrow = buf_ri_k + merge->nrecv; 3585 nextai = nextrow + merge->nrecv; 3586 3587 for (k=0; k<merge->nrecv; k++){ 3588 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3589 nrows = *(buf_ri_k[k]); 3590 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3591 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3592 } 3593 3594 /* set values of ba */ 3595 m = merge->rowmap.n; 3596 for (i=0; i<m; i++) { 3597 arow = owners[rank] + i; 3598 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3599 bnzi = bi[i+1] - bi[i]; 3600 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3601 3602 /* add local non-zero vals of this proc's seqmat into ba */ 3603 anzi = ai[arow+1] - ai[arow]; 3604 aj = a->j + ai[arow]; 3605 aa = a->a + ai[arow]; 3606 nextaj = 0; 3607 for (j=0; nextaj<anzi; j++){ 3608 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3609 ba_i[j] += aa[nextaj++]; 3610 } 3611 } 3612 3613 /* add received vals into ba */ 3614 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3615 /* i-th row */ 3616 if (i == *nextrow[k]) { 3617 anzi = *(nextai[k]+1) - *nextai[k]; 3618 aj = buf_rj[k] + *(nextai[k]); 3619 aa = abuf_r[k] + *(nextai[k]); 3620 nextaj = 0; 3621 for (j=0; nextaj<anzi; j++){ 3622 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3623 ba_i[j] += aa[nextaj++]; 3624 } 3625 } 3626 nextrow[k]++; nextai[k]++; 3627 } 3628 } 3629 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3630 } 3631 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3632 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3633 3634 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3635 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3636 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3637 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3638 PetscFunctionReturn(0); 3639 } 3640 3641 static PetscEvent logkey_seqstompisym = 0; 3642 #undef __FUNCT__ 3643 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3644 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3645 { 3646 PetscErrorCode ierr; 3647 Mat B_mpi; 3648 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3649 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3650 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3651 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3652 PetscInt len,proc,*dnz,*onz; 3653 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3654 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3655 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3656 MPI_Status *status; 3657 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3658 PetscBT lnkbt; 3659 Mat_Merge_SeqsToMPI *merge; 3660 PetscContainer container; 3661 3662 PetscFunctionBegin; 3663 if (!logkey_seqstompisym) { 3664 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3665 } 3666 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3667 3668 /* make sure it is a PETSc comm */ 3669 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3670 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3671 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3672 3673 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3674 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3675 3676 /* determine row ownership */ 3677 /*---------------------------------------------------------*/ 3678 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3679 merge->rowmap.n = m; 3680 merge->rowmap.N = M; 3681 merge->rowmap.bs = 1; 3682 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 3683 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3684 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3685 3686 m = merge->rowmap.n; 3687 M = merge->rowmap.N; 3688 owners = merge->rowmap.range; 3689 3690 /* determine the number of messages to send, their lengths */ 3691 /*---------------------------------------------------------*/ 3692 len_s = merge->len_s; 3693 3694 len = 0; /* length of buf_si[] */ 3695 merge->nsend = 0; 3696 for (proc=0; proc<size; proc++){ 3697 len_si[proc] = 0; 3698 if (proc == rank){ 3699 len_s[proc] = 0; 3700 } else { 3701 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3702 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3703 } 3704 if (len_s[proc]) { 3705 merge->nsend++; 3706 nrows = 0; 3707 for (i=owners[proc]; i<owners[proc+1]; i++){ 3708 if (ai[i+1] > ai[i]) nrows++; 3709 } 3710 len_si[proc] = 2*(nrows+1); 3711 len += len_si[proc]; 3712 } 3713 } 3714 3715 /* determine the number and length of messages to receive for ij-structure */ 3716 /*-------------------------------------------------------------------------*/ 3717 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3718 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3719 3720 /* post the Irecv of j-structure */ 3721 /*-------------------------------*/ 3722 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3723 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3724 3725 /* post the Isend of j-structure */ 3726 /*--------------------------------*/ 3727 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3728 sj_waits = si_waits + merge->nsend; 3729 3730 for (proc=0, k=0; proc<size; proc++){ 3731 if (!len_s[proc]) continue; 3732 i = owners[proc]; 3733 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3734 k++; 3735 } 3736 3737 /* receives and sends of j-structure are complete */ 3738 /*------------------------------------------------*/ 3739 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3740 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3741 3742 /* send and recv i-structure */ 3743 /*---------------------------*/ 3744 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3745 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3746 3747 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3748 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3749 for (proc=0,k=0; proc<size; proc++){ 3750 if (!len_s[proc]) continue; 3751 /* form outgoing message for i-structure: 3752 buf_si[0]: nrows to be sent 3753 [1:nrows]: row index (global) 3754 [nrows+1:2*nrows+1]: i-structure index 3755 */ 3756 /*-------------------------------------------*/ 3757 nrows = len_si[proc]/2 - 1; 3758 buf_si_i = buf_si + nrows+1; 3759 buf_si[0] = nrows; 3760 buf_si_i[0] = 0; 3761 nrows = 0; 3762 for (i=owners[proc]; i<owners[proc+1]; i++){ 3763 anzi = ai[i+1] - ai[i]; 3764 if (anzi) { 3765 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3766 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3767 nrows++; 3768 } 3769 } 3770 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3771 k++; 3772 buf_si += len_si[proc]; 3773 } 3774 3775 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3776 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3777 3778 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3779 for (i=0; i<merge->nrecv; i++){ 3780 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3781 } 3782 3783 ierr = PetscFree(len_si);CHKERRQ(ierr); 3784 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3785 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3786 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3787 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3788 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3789 ierr = PetscFree(status);CHKERRQ(ierr); 3790 3791 /* compute a local seq matrix in each processor */ 3792 /*----------------------------------------------*/ 3793 /* allocate bi array and free space for accumulating nonzero column info */ 3794 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3795 bi[0] = 0; 3796 3797 /* create and initialize a linked list */ 3798 nlnk = N+1; 3799 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3800 3801 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3802 len = 0; 3803 len = ai[owners[rank+1]] - ai[owners[rank]]; 3804 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3805 current_space = free_space; 3806 3807 /* determine symbolic info for each local row */ 3808 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3809 nextrow = buf_ri_k + merge->nrecv; 3810 nextai = nextrow + merge->nrecv; 3811 for (k=0; k<merge->nrecv; k++){ 3812 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3813 nrows = *buf_ri_k[k]; 3814 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3815 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3816 } 3817 3818 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3819 len = 0; 3820 for (i=0;i<m;i++) { 3821 bnzi = 0; 3822 /* add local non-zero cols of this proc's seqmat into lnk */ 3823 arow = owners[rank] + i; 3824 anzi = ai[arow+1] - ai[arow]; 3825 aj = a->j + ai[arow]; 3826 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3827 bnzi += nlnk; 3828 /* add received col data into lnk */ 3829 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3830 if (i == *nextrow[k]) { /* i-th row */ 3831 anzi = *(nextai[k]+1) - *nextai[k]; 3832 aj = buf_rj[k] + *nextai[k]; 3833 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3834 bnzi += nlnk; 3835 nextrow[k]++; nextai[k]++; 3836 } 3837 } 3838 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3839 3840 /* if free space is not available, make more free space */ 3841 if (current_space->local_remaining<bnzi) { 3842 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3843 nspacedouble++; 3844 } 3845 /* copy data into free space, then initialize lnk */ 3846 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3847 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3848 3849 current_space->array += bnzi; 3850 current_space->local_used += bnzi; 3851 current_space->local_remaining -= bnzi; 3852 3853 bi[i+1] = bi[i] + bnzi; 3854 } 3855 3856 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3857 3858 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3859 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3860 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3861 3862 /* create symbolic parallel matrix B_mpi */ 3863 /*---------------------------------------*/ 3864 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3865 if (n==PETSC_DECIDE) { 3866 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3867 } else { 3868 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3869 } 3870 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3871 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3872 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3873 3874 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3875 B_mpi->assembled = PETSC_FALSE; 3876 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3877 merge->bi = bi; 3878 merge->bj = bj; 3879 merge->buf_ri = buf_ri; 3880 merge->buf_rj = buf_rj; 3881 merge->coi = PETSC_NULL; 3882 merge->coj = PETSC_NULL; 3883 merge->owners_co = PETSC_NULL; 3884 3885 /* attach the supporting struct to B_mpi for reuse */ 3886 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3887 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 3888 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3889 *mpimat = B_mpi; 3890 3891 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3892 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3893 PetscFunctionReturn(0); 3894 } 3895 3896 static PetscEvent logkey_seqstompi = 0; 3897 #undef __FUNCT__ 3898 #define __FUNCT__ "MatMerge_SeqsToMPI" 3899 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3900 { 3901 PetscErrorCode ierr; 3902 3903 PetscFunctionBegin; 3904 if (!logkey_seqstompi) { 3905 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3906 } 3907 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3908 if (scall == MAT_INITIAL_MATRIX){ 3909 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3910 } 3911 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3912 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3913 PetscFunctionReturn(0); 3914 } 3915 static PetscEvent logkey_getlocalmat = 0; 3916 #undef __FUNCT__ 3917 #define __FUNCT__ "MatGetLocalMat" 3918 /*@C 3919 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3920 3921 Not Collective 3922 3923 Input Parameters: 3924 + A - the matrix 3925 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3926 3927 Output Parameter: 3928 . A_loc - the local sequential matrix generated 3929 3930 Level: developer 3931 3932 @*/ 3933 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3934 { 3935 PetscErrorCode ierr; 3936 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3937 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3938 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3939 PetscScalar *aa=a->a,*ba=b->a,*ca; 3940 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3941 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3942 3943 PetscFunctionBegin; 3944 if (!logkey_getlocalmat) { 3945 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3946 } 3947 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3948 if (scall == MAT_INITIAL_MATRIX){ 3949 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3950 ci[0] = 0; 3951 for (i=0; i<am; i++){ 3952 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3953 } 3954 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3955 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3956 k = 0; 3957 for (i=0; i<am; i++) { 3958 ncols_o = bi[i+1] - bi[i]; 3959 ncols_d = ai[i+1] - ai[i]; 3960 /* off-diagonal portion of A */ 3961 for (jo=0; jo<ncols_o; jo++) { 3962 col = cmap[*bj]; 3963 if (col >= cstart) break; 3964 cj[k] = col; bj++; 3965 ca[k++] = *ba++; 3966 } 3967 /* diagonal portion of A */ 3968 for (j=0; j<ncols_d; j++) { 3969 cj[k] = cstart + *aj++; 3970 ca[k++] = *aa++; 3971 } 3972 /* off-diagonal portion of A */ 3973 for (j=jo; j<ncols_o; j++) { 3974 cj[k] = cmap[*bj++]; 3975 ca[k++] = *ba++; 3976 } 3977 } 3978 /* put together the new matrix */ 3979 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3980 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3981 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3982 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3983 mat->free_a = PETSC_TRUE; 3984 mat->free_ij = PETSC_TRUE; 3985 mat->nonew = 0; 3986 } else if (scall == MAT_REUSE_MATRIX){ 3987 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3988 ci = mat->i; cj = mat->j; ca = mat->a; 3989 for (i=0; i<am; i++) { 3990 /* off-diagonal portion of A */ 3991 ncols_o = bi[i+1] - bi[i]; 3992 for (jo=0; jo<ncols_o; jo++) { 3993 col = cmap[*bj]; 3994 if (col >= cstart) break; 3995 *ca++ = *ba++; bj++; 3996 } 3997 /* diagonal portion of A */ 3998 ncols_d = ai[i+1] - ai[i]; 3999 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 4000 /* off-diagonal portion of A */ 4001 for (j=jo; j<ncols_o; j++) { 4002 *ca++ = *ba++; bj++; 4003 } 4004 } 4005 } else { 4006 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4007 } 4008 4009 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 4010 PetscFunctionReturn(0); 4011 } 4012 4013 static PetscEvent logkey_getlocalmatcondensed = 0; 4014 #undef __FUNCT__ 4015 #define __FUNCT__ "MatGetLocalMatCondensed" 4016 /*@C 4017 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4018 4019 Not Collective 4020 4021 Input Parameters: 4022 + A - the matrix 4023 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4024 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4025 4026 Output Parameter: 4027 . A_loc - the local sequential matrix generated 4028 4029 Level: developer 4030 4031 @*/ 4032 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4033 { 4034 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4035 PetscErrorCode ierr; 4036 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4037 IS isrowa,iscola; 4038 Mat *aloc; 4039 4040 PetscFunctionBegin; 4041 if (!logkey_getlocalmatcondensed) { 4042 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 4043 } 4044 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4045 if (!row){ 4046 start = A->rmap.rstart; end = A->rmap.rend; 4047 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4048 } else { 4049 isrowa = *row; 4050 } 4051 if (!col){ 4052 start = A->cmap.rstart; 4053 cmap = a->garray; 4054 nzA = a->A->cmap.n; 4055 nzB = a->B->cmap.n; 4056 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4057 ncols = 0; 4058 for (i=0; i<nzB; i++) { 4059 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4060 else break; 4061 } 4062 imark = i; 4063 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4064 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4065 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4066 ierr = PetscFree(idx);CHKERRQ(ierr); 4067 } else { 4068 iscola = *col; 4069 } 4070 if (scall != MAT_INITIAL_MATRIX){ 4071 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4072 aloc[0] = *A_loc; 4073 } 4074 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4075 *A_loc = aloc[0]; 4076 ierr = PetscFree(aloc);CHKERRQ(ierr); 4077 if (!row){ 4078 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4079 } 4080 if (!col){ 4081 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4082 } 4083 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4084 PetscFunctionReturn(0); 4085 } 4086 4087 static PetscEvent logkey_GetBrowsOfAcols = 0; 4088 #undef __FUNCT__ 4089 #define __FUNCT__ "MatGetBrowsOfAcols" 4090 /*@C 4091 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4092 4093 Collective on Mat 4094 4095 Input Parameters: 4096 + A,B - the matrices in mpiaij format 4097 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4098 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4099 4100 Output Parameter: 4101 + rowb, colb - index sets of rows and columns of B to extract 4102 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4103 - B_seq - the sequential matrix generated 4104 4105 Level: developer 4106 4107 @*/ 4108 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4109 { 4110 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4111 PetscErrorCode ierr; 4112 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4113 IS isrowb,iscolb; 4114 Mat *bseq; 4115 4116 PetscFunctionBegin; 4117 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4118 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4119 } 4120 if (!logkey_GetBrowsOfAcols) { 4121 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 4122 } 4123 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4124 4125 if (scall == MAT_INITIAL_MATRIX){ 4126 start = A->cmap.rstart; 4127 cmap = a->garray; 4128 nzA = a->A->cmap.n; 4129 nzB = a->B->cmap.n; 4130 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4131 ncols = 0; 4132 for (i=0; i<nzB; i++) { /* row < local row index */ 4133 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4134 else break; 4135 } 4136 imark = i; 4137 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4138 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4139 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4140 ierr = PetscFree(idx);CHKERRQ(ierr); 4141 *brstart = imark; 4142 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4143 } else { 4144 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4145 isrowb = *rowb; iscolb = *colb; 4146 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4147 bseq[0] = *B_seq; 4148 } 4149 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4150 *B_seq = bseq[0]; 4151 ierr = PetscFree(bseq);CHKERRQ(ierr); 4152 if (!rowb){ 4153 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4154 } else { 4155 *rowb = isrowb; 4156 } 4157 if (!colb){ 4158 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4159 } else { 4160 *colb = iscolb; 4161 } 4162 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4163 PetscFunctionReturn(0); 4164 } 4165 4166 static PetscEvent logkey_GetBrowsOfAocols = 0; 4167 #undef __FUNCT__ 4168 #define __FUNCT__ "MatGetBrowsOfAoCols" 4169 /*@C 4170 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4171 of the OFF-DIAGONAL portion of local A 4172 4173 Collective on Mat 4174 4175 Input Parameters: 4176 + A,B - the matrices in mpiaij format 4177 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4178 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4179 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4180 4181 Output Parameter: 4182 + B_oth - the sequential matrix generated 4183 4184 Level: developer 4185 4186 @*/ 4187 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 4188 { 4189 VecScatter_MPI_General *gen_to,*gen_from; 4190 PetscErrorCode ierr; 4191 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4192 Mat_SeqAIJ *b_oth; 4193 VecScatter ctx=a->Mvctx; 4194 MPI_Comm comm=ctx->comm; 4195 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 4196 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4197 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 4198 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4199 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4200 MPI_Status *sstatus,rstatus; 4201 PetscInt *cols,sbs,rbs; 4202 PetscScalar *vals; 4203 4204 PetscFunctionBegin; 4205 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4206 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4207 } 4208 if (!logkey_GetBrowsOfAocols) { 4209 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 4210 } 4211 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4212 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4213 4214 gen_to = (VecScatter_MPI_General*)ctx->todata; 4215 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4216 rvalues = gen_from->values; /* holds the length of receiving row */ 4217 svalues = gen_to->values; /* holds the length of sending row */ 4218 nrecvs = gen_from->n; 4219 nsends = gen_to->n; 4220 4221 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4222 srow = gen_to->indices; /* local row index to be sent */ 4223 sstarts = gen_to->starts; 4224 sprocs = gen_to->procs; 4225 sstatus = gen_to->sstatus; 4226 sbs = gen_to->bs; 4227 rstarts = gen_from->starts; 4228 rprocs = gen_from->procs; 4229 rbs = gen_from->bs; 4230 4231 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4232 if (scall == MAT_INITIAL_MATRIX){ 4233 /* i-array */ 4234 /*---------*/ 4235 /* post receives */ 4236 for (i=0; i<nrecvs; i++){ 4237 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4238 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4239 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4240 } 4241 4242 /* pack the outgoing message */ 4243 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4244 rstartsj = sstartsj + nsends +1; 4245 sstartsj[0] = 0; rstartsj[0] = 0; 4246 len = 0; /* total length of j or a array to be sent */ 4247 k = 0; 4248 for (i=0; i<nsends; i++){ 4249 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4250 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4251 for (j=0; j<nrows; j++) { 4252 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4253 for (l=0; l<sbs; l++){ 4254 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4255 rowlen[j*sbs+l] = ncols; 4256 len += ncols; 4257 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4258 } 4259 k++; 4260 } 4261 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4262 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4263 } 4264 /* recvs and sends of i-array are completed */ 4265 i = nrecvs; 4266 while (i--) { 4267 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4268 } 4269 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4270 4271 /* allocate buffers for sending j and a arrays */ 4272 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4273 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4274 4275 /* create i-array of B_oth */ 4276 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4277 b_othi[0] = 0; 4278 len = 0; /* total length of j or a array to be received */ 4279 k = 0; 4280 for (i=0; i<nrecvs; i++){ 4281 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4282 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4283 for (j=0; j<nrows; j++) { 4284 b_othi[k+1] = b_othi[k] + rowlen[j]; 4285 len += rowlen[j]; k++; 4286 } 4287 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4288 } 4289 4290 /* allocate space for j and a arrrays of B_oth */ 4291 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4292 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 4293 4294 /* j-array */ 4295 /*---------*/ 4296 /* post receives of j-array */ 4297 for (i=0; i<nrecvs; i++){ 4298 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4299 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4300 } 4301 4302 /* pack the outgoing message j-array */ 4303 k = 0; 4304 for (i=0; i<nsends; i++){ 4305 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4306 bufJ = bufj+sstartsj[i]; 4307 for (j=0; j<nrows; j++) { 4308 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4309 for (ll=0; ll<sbs; ll++){ 4310 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4311 for (l=0; l<ncols; l++){ 4312 *bufJ++ = cols[l]; 4313 } 4314 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4315 } 4316 } 4317 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4318 } 4319 4320 /* recvs and sends of j-array are completed */ 4321 i = nrecvs; 4322 while (i--) { 4323 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4324 } 4325 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4326 } else if (scall == MAT_REUSE_MATRIX){ 4327 sstartsj = *startsj; 4328 rstartsj = sstartsj + nsends +1; 4329 bufa = *bufa_ptr; 4330 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4331 b_otha = b_oth->a; 4332 } else { 4333 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4334 } 4335 4336 /* a-array */ 4337 /*---------*/ 4338 /* post receives of a-array */ 4339 for (i=0; i<nrecvs; i++){ 4340 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4341 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4342 } 4343 4344 /* pack the outgoing message a-array */ 4345 k = 0; 4346 for (i=0; i<nsends; i++){ 4347 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4348 bufA = bufa+sstartsj[i]; 4349 for (j=0; j<nrows; j++) { 4350 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4351 for (ll=0; ll<sbs; ll++){ 4352 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4353 for (l=0; l<ncols; l++){ 4354 *bufA++ = vals[l]; 4355 } 4356 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4357 } 4358 } 4359 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4360 } 4361 /* recvs and sends of a-array are completed */ 4362 i = nrecvs; 4363 while (i--) { 4364 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4365 } 4366 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4367 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4368 4369 if (scall == MAT_INITIAL_MATRIX){ 4370 /* put together the new matrix */ 4371 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4372 4373 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4374 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4375 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4376 b_oth->free_a = PETSC_TRUE; 4377 b_oth->free_ij = PETSC_TRUE; 4378 b_oth->nonew = 0; 4379 4380 ierr = PetscFree(bufj);CHKERRQ(ierr); 4381 if (!startsj || !bufa_ptr){ 4382 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4383 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4384 } else { 4385 *startsj = sstartsj; 4386 *bufa_ptr = bufa; 4387 } 4388 } 4389 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4390 PetscFunctionReturn(0); 4391 } 4392 4393 #undef __FUNCT__ 4394 #define __FUNCT__ "MatGetCommunicationStructs" 4395 /*@C 4396 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4397 4398 Not Collective 4399 4400 Input Parameters: 4401 . A - The matrix in mpiaij format 4402 4403 Output Parameter: 4404 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4405 . colmap - A map from global column index to local index into lvec 4406 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4407 4408 Level: developer 4409 4410 @*/ 4411 #if defined (PETSC_USE_CTABLE) 4412 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4413 #else 4414 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4415 #endif 4416 { 4417 Mat_MPIAIJ *a; 4418 4419 PetscFunctionBegin; 4420 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4421 PetscValidPointer(lvec, 2) 4422 PetscValidPointer(colmap, 3) 4423 PetscValidPointer(multScatter, 4) 4424 a = (Mat_MPIAIJ *) A->data; 4425 if (lvec) *lvec = a->lvec; 4426 if (colmap) *colmap = a->colmap; 4427 if (multScatter) *multScatter = a->Mvctx; 4428 PetscFunctionReturn(0); 4429 } 4430 4431 EXTERN_C_BEGIN 4432 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4433 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4434 EXTERN_C_END 4435 4436 /*MC 4437 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4438 4439 Options Database Keys: 4440 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4441 4442 Level: beginner 4443 4444 .seealso: MatCreateMPIAIJ 4445 M*/ 4446 4447 EXTERN_C_BEGIN 4448 #undef __FUNCT__ 4449 #define __FUNCT__ "MatCreate_MPIAIJ" 4450 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4451 { 4452 Mat_MPIAIJ *b; 4453 PetscErrorCode ierr; 4454 PetscMPIInt size; 4455 4456 PetscFunctionBegin; 4457 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4458 4459 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4460 B->data = (void*)b; 4461 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4462 B->factor = 0; 4463 B->rmap.bs = 1; 4464 B->assembled = PETSC_FALSE; 4465 B->mapping = 0; 4466 4467 B->insertmode = NOT_SET_VALUES; 4468 b->size = size; 4469 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4470 4471 /* build cache for off array entries formed */ 4472 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4473 b->donotstash = PETSC_FALSE; 4474 b->colmap = 0; 4475 b->garray = 0; 4476 b->roworiented = PETSC_TRUE; 4477 4478 /* stuff used for matrix vector multiply */ 4479 b->lvec = PETSC_NULL; 4480 b->Mvctx = PETSC_NULL; 4481 4482 /* stuff for MatGetRow() */ 4483 b->rowindices = 0; 4484 b->rowvalues = 0; 4485 b->getrowactive = PETSC_FALSE; 4486 4487 4488 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4489 "MatStoreValues_MPIAIJ", 4490 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4491 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4492 "MatRetrieveValues_MPIAIJ", 4493 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4494 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4495 "MatGetDiagonalBlock_MPIAIJ", 4496 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4497 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4498 "MatIsTranspose_MPIAIJ", 4499 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4500 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4501 "MatMPIAIJSetPreallocation_MPIAIJ", 4502 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4503 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4504 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4505 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4506 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4507 "MatDiagonalScaleLocal_MPIAIJ", 4508 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4509 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4510 "MatConvert_MPIAIJ_MPICSRPERM", 4511 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4512 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4513 "MatConvert_MPIAIJ_MPICRL", 4514 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4515 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4516 PetscFunctionReturn(0); 4517 } 4518 EXTERN_C_END 4519 4520 #undef __FUNCT__ 4521 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4522 /*@C 4523 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4524 and "off-diagonal" part of the matrix in CSR format. 4525 4526 Collective on MPI_Comm 4527 4528 Input Parameters: 4529 + comm - MPI communicator 4530 . m - number of local rows (Cannot be PETSC_DECIDE) 4531 . n - This value should be the same as the local size used in creating the 4532 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4533 calculated if N is given) For square matrices n is almost always m. 4534 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4535 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4536 . i - row indices for "diagonal" portion of matrix 4537 . j - column indices 4538 . a - matrix values 4539 . oi - row indices for "off-diagonal" portion of matrix 4540 . oj - column indices 4541 - oa - matrix values 4542 4543 Output Parameter: 4544 . mat - the matrix 4545 4546 Level: advanced 4547 4548 Notes: 4549 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4550 4551 The i and j indices are 0 based 4552 4553 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4554 4555 4556 .keywords: matrix, aij, compressed row, sparse, parallel 4557 4558 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4559 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4560 @*/ 4561 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4562 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4563 { 4564 PetscErrorCode ierr; 4565 Mat_MPIAIJ *maij; 4566 4567 PetscFunctionBegin; 4568 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4569 if (i[0]) { 4570 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4571 } 4572 if (oi[0]) { 4573 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4574 } 4575 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4576 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4577 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4578 maij = (Mat_MPIAIJ*) (*mat)->data; 4579 maij->donotstash = PETSC_TRUE; 4580 (*mat)->preallocated = PETSC_TRUE; 4581 4582 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4583 ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr); 4584 ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr); 4585 4586 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4587 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4588 4589 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4590 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4591 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4592 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4593 4594 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4595 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4596 PetscFunctionReturn(0); 4597 } 4598 4599 /* 4600 Special version for direct calls from Fortran 4601 */ 4602 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4603 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4604 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4605 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4606 #endif 4607 4608 /* Change these macros so can be used in void function */ 4609 #undef CHKERRQ 4610 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4611 #undef SETERRQ2 4612 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4613 #undef SETERRQ 4614 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4615 4616 EXTERN_C_BEGIN 4617 #undef __FUNCT__ 4618 #define __FUNCT__ "matsetvaluesmpiaij_" 4619 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4620 { 4621 Mat mat = *mmat; 4622 PetscInt m = *mm, n = *mn; 4623 InsertMode addv = *maddv; 4624 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4625 PetscScalar value; 4626 PetscErrorCode ierr; 4627 4628 MatPreallocated(mat); 4629 if (mat->insertmode == NOT_SET_VALUES) { 4630 mat->insertmode = addv; 4631 } 4632 #if defined(PETSC_USE_DEBUG) 4633 else if (mat->insertmode != addv) { 4634 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4635 } 4636 #endif 4637 { 4638 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4639 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4640 PetscTruth roworiented = aij->roworiented; 4641 4642 /* Some Variables required in the macro */ 4643 Mat A = aij->A; 4644 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4645 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4646 PetscScalar *aa = a->a; 4647 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4648 Mat B = aij->B; 4649 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4650 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4651 PetscScalar *ba = b->a; 4652 4653 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4654 PetscInt nonew = a->nonew; 4655 PetscScalar *ap1,*ap2; 4656 4657 PetscFunctionBegin; 4658 for (i=0; i<m; i++) { 4659 if (im[i] < 0) continue; 4660 #if defined(PETSC_USE_DEBUG) 4661 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4662 #endif 4663 if (im[i] >= rstart && im[i] < rend) { 4664 row = im[i] - rstart; 4665 lastcol1 = -1; 4666 rp1 = aj + ai[row]; 4667 ap1 = aa + ai[row]; 4668 rmax1 = aimax[row]; 4669 nrow1 = ailen[row]; 4670 low1 = 0; 4671 high1 = nrow1; 4672 lastcol2 = -1; 4673 rp2 = bj + bi[row]; 4674 ap2 = ba + bi[row]; 4675 rmax2 = bimax[row]; 4676 nrow2 = bilen[row]; 4677 low2 = 0; 4678 high2 = nrow2; 4679 4680 for (j=0; j<n; j++) { 4681 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4682 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4683 if (in[j] >= cstart && in[j] < cend){ 4684 col = in[j] - cstart; 4685 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4686 } else if (in[j] < 0) continue; 4687 #if defined(PETSC_USE_DEBUG) 4688 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4689 #endif 4690 else { 4691 if (mat->was_assembled) { 4692 if (!aij->colmap) { 4693 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4694 } 4695 #if defined (PETSC_USE_CTABLE) 4696 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4697 col--; 4698 #else 4699 col = aij->colmap[in[j]] - 1; 4700 #endif 4701 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4702 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4703 col = in[j]; 4704 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4705 B = aij->B; 4706 b = (Mat_SeqAIJ*)B->data; 4707 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4708 rp2 = bj + bi[row]; 4709 ap2 = ba + bi[row]; 4710 rmax2 = bimax[row]; 4711 nrow2 = bilen[row]; 4712 low2 = 0; 4713 high2 = nrow2; 4714 bm = aij->B->rmap.n; 4715 ba = b->a; 4716 } 4717 } else col = in[j]; 4718 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4719 } 4720 } 4721 } else { 4722 if (!aij->donotstash) { 4723 if (roworiented) { 4724 if (ignorezeroentries && v[i*n] == 0.0) continue; 4725 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4726 } else { 4727 if (ignorezeroentries && v[i] == 0.0) continue; 4728 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4729 } 4730 } 4731 } 4732 }} 4733 PetscFunctionReturnVoid(); 4734 } 4735 EXTERN_C_END 4736 4737