1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetValues_MPIAIJ" 240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 241 { 242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 243 PetscErrorCode ierr; 244 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 245 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 246 247 PetscFunctionBegin; 248 for (i=0; i<m; i++) { 249 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 250 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 251 if (idxm[i] >= rstart && idxm[i] < rend) { 252 row = idxm[i] - rstart; 253 for (j=0; j<n; j++) { 254 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 255 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 256 if (idxn[j] >= cstart && idxn[j] < cend){ 257 col = idxn[j] - cstart; 258 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 259 } else { 260 if (!aij->colmap) { 261 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262 } 263 #if defined (PETSC_USE_CTABLE) 264 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 265 col --; 266 #else 267 col = aij->colmap[idxn[j]] - 1; 268 #endif 269 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 270 else { 271 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 272 } 273 } 274 } 275 } else { 276 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 277 } 278 } 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 if (aij->donotstash) { 293 PetscFunctionReturn(0); 294 } 295 296 /* make sure all processors are either in INSERTMODE or ADDMODE */ 297 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 298 if (addv == (ADD_VALUES|INSERT_VALUES)) { 299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 300 } 301 mat->insertmode = addv; /* in case this processor had no cache */ 302 303 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 304 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 305 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 312 { 313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 314 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 315 PetscErrorCode ierr; 316 PetscMPIInt n; 317 PetscInt i,j,rstart,ncols,flg; 318 PetscInt *row,*col,other_disassembled; 319 PetscScalar *val; 320 InsertMode addv = mat->insertmode; 321 322 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 358 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 359 } 360 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 361 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 362 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 363 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 364 365 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 366 aij->rowvalues = 0; 367 368 /* used by MatAXPY() */ 369 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 370 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 371 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 378 { 379 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 384 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatZeroRows_MPIAIJ" 390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 391 { 392 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 393 PetscErrorCode ierr; 394 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 395 PetscInt i,*owners = A->rmap.range; 396 PetscInt *nprocs,j,idx,nsends,row; 397 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 398 PetscInt *rvalues,count,base,slen,*source; 399 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 400 MPI_Comm comm = A->comm; 401 MPI_Request *send_waits,*recv_waits; 402 MPI_Status recv_status,*send_status; 403 #if defined(PETSC_DEBUG) 404 PetscTruth found = PETSC_FALSE; 405 #endif 406 407 PetscFunctionBegin; 408 /* first count number of contributors to each processor */ 409 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 410 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 411 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 412 j = 0; 413 for (i=0; i<N; i++) { 414 if (lastidx > (idx = rows[i])) j = 0; 415 lastidx = idx; 416 for (; j<size; j++) { 417 if (idx >= owners[j] && idx < owners[j+1]) { 418 nprocs[2*j]++; 419 nprocs[2*j+1] = 1; 420 owner[i] = j; 421 #if defined(PETSC_DEBUG) 422 found = PETSC_TRUE; 423 #endif 424 break; 425 } 426 } 427 #if defined(PETSC_DEBUG) 428 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 429 found = PETSC_FALSE; 430 #endif 431 } 432 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 433 434 /* inform other processors of number of messages and max length*/ 435 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 436 437 /* post receives: */ 438 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 439 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 440 for (i=0; i<nrecvs; i++) { 441 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 442 } 443 444 /* do sends: 445 1) starts[i] gives the starting index in svalues for stuff going to 446 the ith processor 447 */ 448 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 449 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 450 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 451 starts[0] = 0; 452 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 453 for (i=0; i<N; i++) { 454 svalues[starts[owner[i]]++] = rows[i]; 455 } 456 457 starts[0] = 0; 458 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 459 count = 0; 460 for (i=0; i<size; i++) { 461 if (nprocs[2*i+1]) { 462 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 463 } 464 } 465 ierr = PetscFree(starts);CHKERRQ(ierr); 466 467 base = owners[rank]; 468 469 /* wait on receives */ 470 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 471 source = lens + nrecvs; 472 count = nrecvs; slen = 0; 473 while (count) { 474 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 475 /* unpack receives into our local space */ 476 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 477 source[imdex] = recv_status.MPI_SOURCE; 478 lens[imdex] = n; 479 slen += n; 480 count--; 481 } 482 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 483 484 /* move the data into the send scatter */ 485 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 486 count = 0; 487 for (i=0; i<nrecvs; i++) { 488 values = rvalues + i*nmax; 489 for (j=0; j<lens[i]; j++) { 490 lrows[count++] = values[j] - base; 491 } 492 } 493 ierr = PetscFree(rvalues);CHKERRQ(ierr); 494 ierr = PetscFree(lens);CHKERRQ(ierr); 495 ierr = PetscFree(owner);CHKERRQ(ierr); 496 ierr = PetscFree(nprocs);CHKERRQ(ierr); 497 498 /* actually zap the local rows */ 499 /* 500 Zero the required rows. If the "diagonal block" of the matrix 501 is square and the user wishes to set the diagonal we use separate 502 code so that MatSetValues() is not called for each diagonal allocating 503 new memory, thus calling lots of mallocs and slowing things down. 504 505 Contributed by: Matthew Knepley 506 */ 507 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 508 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 509 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 510 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 511 } else if (diag != 0.0) { 512 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 513 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 514 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 516 } 517 for (i = 0; i < slen; i++) { 518 row = lrows[i] + rstart; 519 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 520 } 521 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 522 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 } else { 524 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 525 } 526 ierr = PetscFree(lrows);CHKERRQ(ierr); 527 528 /* wait on sends */ 529 if (nsends) { 530 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 531 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 532 ierr = PetscFree(send_status);CHKERRQ(ierr); 533 } 534 ierr = PetscFree(send_waits);CHKERRQ(ierr); 535 ierr = PetscFree(svalues);CHKERRQ(ierr); 536 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatMult_MPIAIJ" 542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 543 { 544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 545 PetscErrorCode ierr; 546 PetscInt nt; 547 548 PetscFunctionBegin; 549 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 550 if (nt != A->cmap.n) { 551 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 552 } 553 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 554 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 555 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 556 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatMultAdd_MPIAIJ" 562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 563 { 564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 567 PetscFunctionBegin; 568 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 569 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 570 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 571 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 578 { 579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 580 PetscErrorCode ierr; 581 PetscTruth merged; 582 583 PetscFunctionBegin; 584 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 585 /* do nondiagonal part */ 586 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 587 if (!merged) { 588 /* send it on its way */ 589 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 590 /* do local part */ 591 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 592 /* receive remote parts: note this assumes the values are not actually */ 593 /* added in yy until the next line, */ 594 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 595 } else { 596 /* do local part */ 597 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 598 /* send it on its way */ 599 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 600 /* values actually were received in the Begin() but we need to call this nop */ 601 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 602 } 603 PetscFunctionReturn(0); 604 } 605 606 EXTERN_C_BEGIN 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 610 { 611 MPI_Comm comm; 612 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 613 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 614 IS Me,Notme; 615 PetscErrorCode ierr; 616 PetscInt M,N,first,last,*notme,i; 617 PetscMPIInt size; 618 619 PetscFunctionBegin; 620 621 /* Easy test: symmetric diagonal block */ 622 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 623 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 624 if (!*f) PetscFunctionReturn(0); 625 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 626 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 627 if (size == 1) PetscFunctionReturn(0); 628 629 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 630 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 631 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 632 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 633 for (i=0; i<first; i++) notme[i] = i; 634 for (i=last; i<M; i++) notme[i-last+first] = i; 635 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 636 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 637 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 638 Aoff = Aoffs[0]; 639 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 640 Boff = Boffs[0]; 641 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 642 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 644 ierr = ISDestroy(Me);CHKERRQ(ierr); 645 ierr = ISDestroy(Notme);CHKERRQ(ierr); 646 647 PetscFunctionReturn(0); 648 } 649 EXTERN_C_END 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 654 { 655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 656 PetscErrorCode ierr; 657 658 PetscFunctionBegin; 659 /* do nondiagonal part */ 660 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 661 /* send it on its way */ 662 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 663 /* do local part */ 664 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 665 /* receive remote parts */ 666 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 /* 671 This only works correctly for square matrices where the subblock A->A is the 672 diagonal block 673 */ 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 677 { 678 PetscErrorCode ierr; 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 681 PetscFunctionBegin; 682 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 683 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 684 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 685 } 686 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatScale_MPIAIJ" 692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 693 { 694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 699 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatDestroy_MPIAIJ" 705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 706 { 707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 #if defined(PETSC_USE_LOG) 712 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 713 #endif 714 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 715 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 717 #if defined (PETSC_USE_CTABLE) 718 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 719 #else 720 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 721 #endif 722 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 723 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 724 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 725 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 726 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIAIJ_Binary" 742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 745 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 746 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 747 PetscErrorCode ierr; 748 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 749 int fd; 750 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 751 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (!rank) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->rmap.N; 761 header[2] = mat->cmap.N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->rmap.n; 767 range = mat->rmap.range; 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->rmap.n; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->rmap.n; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (!rank) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->rmap.n; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (!rank) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->rmap.n; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (!rank) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 PetscErrorCode ierr; 872 PetscMPIInt rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,iascii,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (iascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 PetscTruth inodes; 886 887 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 888 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 889 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 890 if (!inodes) { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 892 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 893 } else { 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 895 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 896 } 897 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 899 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 901 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 902 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 903 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } else if (format == PETSC_VIEWER_ASCII_INFO) { 906 PetscInt inodecount,inodelimit,*inodes; 907 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 908 if (inodes) { 909 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 910 } else { 911 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 915 PetscFunctionReturn(0); 916 } 917 } else if (isbinary) { 918 if (size == 1) { 919 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 920 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 921 } else { 922 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 923 } 924 PetscFunctionReturn(0); 925 } else if (isdraw) { 926 PetscDraw draw; 927 PetscTruth isnull; 928 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 929 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 930 } 931 932 if (size == 1) { 933 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 934 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 935 } else { 936 /* assemble the entire matrix onto first processor. */ 937 Mat A; 938 Mat_SeqAIJ *Aloc; 939 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 940 PetscScalar *a; 941 942 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 943 if (!rank) { 944 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 945 } else { 946 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 947 } 948 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 949 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 950 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 951 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 952 953 /* copy over the A part */ 954 Aloc = (Mat_SeqAIJ*)aij->A->data; 955 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 956 row = mat->rmap.rstart; 957 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 958 for (i=0; i<m; i++) { 959 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 960 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 961 } 962 aj = Aloc->j; 963 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 964 965 /* copy over the B part */ 966 Aloc = (Mat_SeqAIJ*)aij->B->data; 967 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 968 row = mat->rmap.rstart; 969 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 970 ct = cols; 971 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 972 for (i=0; i<m; i++) { 973 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 974 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 975 } 976 ierr = PetscFree(ct);CHKERRQ(ierr); 977 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 979 /* 980 Everyone has to call to draw the matrix since the graphics waits are 981 synchronized across all processors that share the PetscDraw object 982 */ 983 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 984 if (!rank) { 985 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 986 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 987 } 988 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 989 ierr = MatDestroy(A);CHKERRQ(ierr); 990 } 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatView_MPIAIJ" 996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 997 { 998 PetscErrorCode ierr; 999 PetscTruth iascii,isdraw,issocket,isbinary; 1000 1001 PetscFunctionBegin; 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1006 if (iascii || isdraw || isbinary || issocket) { 1007 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1008 } else { 1009 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatRelax_MPIAIJ" 1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscErrorCode ierr; 1020 Vec bb1; 1021 1022 PetscFunctionBegin; 1023 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1024 1025 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1026 1027 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1028 if (flag & SOR_ZERO_INITIAL_GUESS) { 1029 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1030 its--; 1031 } 1032 1033 while (its--) { 1034 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1036 1037 /* update rhs: bb1 = bb - B*x */ 1038 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1039 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1040 1041 /* local sweep */ 1042 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1043 CHKERRQ(ierr); 1044 } 1045 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1046 if (flag & SOR_ZERO_INITIAL_GUESS) { 1047 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1048 its--; 1049 } 1050 while (its--) { 1051 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 1054 /* update rhs: bb1 = bb - B*x */ 1055 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1056 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1057 1058 /* local sweep */ 1059 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1060 CHKERRQ(ierr); 1061 } 1062 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1063 if (flag & SOR_ZERO_INITIAL_GUESS) { 1064 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1065 its--; 1066 } 1067 while (its--) { 1068 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 1071 /* update rhs: bb1 = bb - B*x */ 1072 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1073 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1074 1075 /* local sweep */ 1076 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1077 CHKERRQ(ierr); 1078 } 1079 } else { 1080 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1081 } 1082 1083 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatPermute_MPIAIJ" 1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1090 { 1091 MPI_Comm comm,pcomm; 1092 PetscInt first,local_size,nrows,*rows; 1093 int ntids; 1094 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1099 /* make a collective version of 'rowp' */ 1100 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1101 if (pcomm==comm) { 1102 crowp = rowp; 1103 } else { 1104 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1105 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1107 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1108 } 1109 /* collect the global row permutation and invert it */ 1110 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1111 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1112 if (pcomm!=comm) { 1113 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1114 } 1115 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1116 /* get the local target indices */ 1117 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1118 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1119 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1121 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1122 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1123 /* the column permutation is so much easier; 1124 make a local version of 'colp' and invert it */ 1125 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1126 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1127 if (ntids==1) { 1128 lcolp = colp; 1129 } else { 1130 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1131 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1132 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1133 } 1134 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1135 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1136 if (ntids>1) { 1137 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1138 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1139 } 1140 /* now we just get the submatrix */ 1141 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1142 /* clean up */ 1143 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1144 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1151 { 1152 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1153 Mat A = mat->A,B = mat->B; 1154 PetscErrorCode ierr; 1155 PetscReal isend[5],irecv[5]; 1156 1157 PetscFunctionBegin; 1158 info->block_size = 1.0; 1159 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1160 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1161 isend[3] = info->memory; isend[4] = info->mallocs; 1162 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1163 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1164 isend[3] += info->memory; isend[4] += info->mallocs; 1165 if (flag == MAT_LOCAL) { 1166 info->nz_used = isend[0]; 1167 info->nz_allocated = isend[1]; 1168 info->nz_unneeded = isend[2]; 1169 info->memory = isend[3]; 1170 info->mallocs = isend[4]; 1171 } else if (flag == MAT_GLOBAL_MAX) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else if (flag == MAT_GLOBAL_SUM) { 1179 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1180 info->nz_used = irecv[0]; 1181 info->nz_allocated = irecv[1]; 1182 info->nz_unneeded = irecv[2]; 1183 info->memory = irecv[3]; 1184 info->mallocs = irecv[4]; 1185 } 1186 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1187 info->fill_ratio_needed = 0; 1188 info->factor_mallocs = 0; 1189 info->rows_global = (double)matin->rmap.N; 1190 info->columns_global = (double)matin->cmap.N; 1191 info->rows_local = (double)matin->rmap.n; 1192 info->columns_local = (double)matin->cmap.N; 1193 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatSetOption_MPIAIJ" 1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1200 { 1201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 switch (op) { 1206 case MAT_NO_NEW_NONZERO_LOCATIONS: 1207 case MAT_YES_NEW_NONZERO_LOCATIONS: 1208 case MAT_COLUMNS_UNSORTED: 1209 case MAT_COLUMNS_SORTED: 1210 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1211 case MAT_KEEP_ZEROED_ROWS: 1212 case MAT_NEW_NONZERO_LOCATION_ERR: 1213 case MAT_USE_INODES: 1214 case MAT_DO_NOT_USE_INODES: 1215 case MAT_IGNORE_ZERO_ENTRIES: 1216 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1217 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1218 break; 1219 case MAT_ROW_ORIENTED: 1220 a->roworiented = PETSC_TRUE; 1221 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1222 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1223 break; 1224 case MAT_ROWS_SORTED: 1225 case MAT_ROWS_UNSORTED: 1226 case MAT_YES_NEW_DIAGONALS: 1227 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1228 break; 1229 case MAT_COLUMN_ORIENTED: 1230 a->roworiented = PETSC_FALSE; 1231 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1232 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1233 break; 1234 case MAT_IGNORE_OFF_PROC_ENTRIES: 1235 a->donotstash = PETSC_TRUE; 1236 break; 1237 case MAT_NO_NEW_DIAGONALS: 1238 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1239 case MAT_SYMMETRIC: 1240 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1241 break; 1242 case MAT_STRUCTURALLY_SYMMETRIC: 1243 case MAT_HERMITIAN: 1244 case MAT_SYMMETRY_ETERNAL: 1245 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1246 break; 1247 case MAT_NOT_SYMMETRIC: 1248 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1249 case MAT_NOT_HERMITIAN: 1250 case MAT_NOT_SYMMETRY_ETERNAL: 1251 break; 1252 default: 1253 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatGetRow_MPIAIJ" 1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1261 { 1262 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1263 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1264 PetscErrorCode ierr; 1265 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1266 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1267 PetscInt *cmap,*idx_p; 1268 1269 PetscFunctionBegin; 1270 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1271 mat->getrowactive = PETSC_TRUE; 1272 1273 if (!mat->rowvalues && (idx || v)) { 1274 /* 1275 allocate enough space to hold information from the longest row. 1276 */ 1277 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1278 PetscInt max = 1,tmp; 1279 for (i=0; i<matin->rmap.n; i++) { 1280 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1281 if (max < tmp) { max = tmp; } 1282 } 1283 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1284 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1285 } 1286 1287 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1288 lrow = row - rstart; 1289 1290 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1291 if (!v) {pvA = 0; pvB = 0;} 1292 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1293 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1294 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1295 nztot = nzA + nzB; 1296 1297 cmap = mat->garray; 1298 if (v || idx) { 1299 if (nztot) { 1300 /* Sort by increasing column numbers, assuming A and B already sorted */ 1301 PetscInt imark = -1; 1302 if (v) { 1303 *v = v_p = mat->rowvalues; 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1306 else break; 1307 } 1308 imark = i; 1309 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1310 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1311 } 1312 if (idx) { 1313 *idx = idx_p = mat->rowindices; 1314 if (imark > -1) { 1315 for (i=0; i<imark; i++) { 1316 idx_p[i] = cmap[cworkB[i]]; 1317 } 1318 } else { 1319 for (i=0; i<nzB; i++) { 1320 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1321 else break; 1322 } 1323 imark = i; 1324 } 1325 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1326 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1327 } 1328 } else { 1329 if (idx) *idx = 0; 1330 if (v) *v = 0; 1331 } 1332 } 1333 *nz = nztot; 1334 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1335 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1342 { 1343 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1344 1345 PetscFunctionBegin; 1346 if (!aij->getrowactive) { 1347 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1348 } 1349 aij->getrowactive = PETSC_FALSE; 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatNorm_MPIAIJ" 1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1356 { 1357 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1358 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1359 PetscErrorCode ierr; 1360 PetscInt i,j,cstart = mat->cmap.rstart; 1361 PetscReal sum = 0.0; 1362 PetscScalar *v; 1363 1364 PetscFunctionBegin; 1365 if (aij->size == 1) { 1366 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1367 } else { 1368 if (type == NORM_FROBENIUS) { 1369 v = amat->a; 1370 for (i=0; i<amat->nz; i++) { 1371 #if defined(PETSC_USE_COMPLEX) 1372 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1373 #else 1374 sum += (*v)*(*v); v++; 1375 #endif 1376 } 1377 v = bmat->a; 1378 for (i=0; i<bmat->nz; i++) { 1379 #if defined(PETSC_USE_COMPLEX) 1380 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1381 #else 1382 sum += (*v)*(*v); v++; 1383 #endif 1384 } 1385 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1386 *norm = sqrt(*norm); 1387 } else if (type == NORM_1) { /* max column norm */ 1388 PetscReal *tmp,*tmp2; 1389 PetscInt *jj,*garray = aij->garray; 1390 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1391 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1392 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1393 *norm = 0.0; 1394 v = amat->a; jj = amat->j; 1395 for (j=0; j<amat->nz; j++) { 1396 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1397 } 1398 v = bmat->a; jj = bmat->j; 1399 for (j=0; j<bmat->nz; j++) { 1400 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1401 } 1402 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1403 for (j=0; j<mat->cmap.N; j++) { 1404 if (tmp2[j] > *norm) *norm = tmp2[j]; 1405 } 1406 ierr = PetscFree(tmp);CHKERRQ(ierr); 1407 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1408 } else if (type == NORM_INFINITY) { /* max row norm */ 1409 PetscReal ntemp = 0.0; 1410 for (j=0; j<aij->A->rmap.n; j++) { 1411 v = amat->a + amat->i[j]; 1412 sum = 0.0; 1413 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1414 sum += PetscAbsScalar(*v); v++; 1415 } 1416 v = bmat->a + bmat->i[j]; 1417 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1418 sum += PetscAbsScalar(*v); v++; 1419 } 1420 if (sum > ntemp) ntemp = sum; 1421 } 1422 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1423 } else { 1424 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1425 } 1426 } 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatTranspose_MPIAIJ" 1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1433 { 1434 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1435 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1436 PetscErrorCode ierr; 1437 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz; 1438 PetscInt cstart=A->cmap.rstart,ncol; 1439 Mat B; 1440 PetscScalar *array; 1441 1442 PetscFunctionBegin; 1443 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1444 1445 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1446 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1447 ai = Aloc->i; aj = Aloc->j; 1448 bi = Bloc->i; bj = Bloc->j; 1449 ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1450 cols = d_nnz + na + 1; /* work space to be used by B part */ 1451 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1452 for (i=0; i<ai[ma]; i++){ 1453 d_nnz[aj[i]] ++; 1454 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1455 } 1456 1457 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1458 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1459 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1460 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1461 1462 /* copy over the A part */ 1463 array = Aloc->a; 1464 row = A->rmap.rstart; 1465 for (i=0; i<ma; i++) { 1466 ncol = ai[i+1]-ai[i]; 1467 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1468 row++; array += ncol; aj += ncol; 1469 } 1470 aj = Aloc->j; 1471 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1472 1473 /* copy over the B part */ 1474 array = Bloc->a; 1475 row = A->rmap.rstart; 1476 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1477 for (i=0; i<mb; i++) { 1478 ncol = bi[i+1]-bi[i]; 1479 ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1480 row++; array += ncol; cols += ncol; 1481 } 1482 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1483 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1484 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1485 if (matout) { 1486 *matout = B; 1487 } else { 1488 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1489 } 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1495 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1496 { 1497 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1498 Mat a = aij->A,b = aij->B; 1499 PetscErrorCode ierr; 1500 PetscInt s1,s2,s3; 1501 1502 PetscFunctionBegin; 1503 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1504 if (rr) { 1505 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1506 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1507 /* Overlap communication with computation. */ 1508 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1509 } 1510 if (ll) { 1511 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1512 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1513 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1514 } 1515 /* scale the diagonal block */ 1516 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1517 1518 if (rr) { 1519 /* Do a scatter end and then right scale the off-diagonal block */ 1520 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1521 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1522 } 1523 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1529 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1530 { 1531 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1532 PetscErrorCode ierr; 1533 1534 PetscFunctionBegin; 1535 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1536 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1541 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1542 { 1543 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1544 PetscErrorCode ierr; 1545 1546 PetscFunctionBegin; 1547 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatEqual_MPIAIJ" 1553 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1554 { 1555 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1556 Mat a,b,c,d; 1557 PetscTruth flg; 1558 PetscErrorCode ierr; 1559 1560 PetscFunctionBegin; 1561 a = matA->A; b = matA->B; 1562 c = matB->A; d = matB->B; 1563 1564 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1565 if (flg) { 1566 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1567 } 1568 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1569 PetscFunctionReturn(0); 1570 } 1571 1572 #undef __FUNCT__ 1573 #define __FUNCT__ "MatCopy_MPIAIJ" 1574 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1575 { 1576 PetscErrorCode ierr; 1577 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1578 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1579 1580 PetscFunctionBegin; 1581 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1582 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1583 /* because of the column compression in the off-processor part of the matrix a->B, 1584 the number of columns in a->B and b->B may be different, hence we cannot call 1585 the MatCopy() directly on the two parts. If need be, we can provide a more 1586 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1587 then copying the submatrices */ 1588 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1589 } else { 1590 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1591 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1592 } 1593 PetscFunctionReturn(0); 1594 } 1595 1596 #undef __FUNCT__ 1597 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1598 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1599 { 1600 PetscErrorCode ierr; 1601 1602 PetscFunctionBegin; 1603 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1604 PetscFunctionReturn(0); 1605 } 1606 1607 #include "petscblaslapack.h" 1608 #undef __FUNCT__ 1609 #define __FUNCT__ "MatAXPY_MPIAIJ" 1610 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1611 { 1612 PetscErrorCode ierr; 1613 PetscInt i; 1614 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1615 PetscBLASInt bnz,one=1; 1616 Mat_SeqAIJ *x,*y; 1617 1618 PetscFunctionBegin; 1619 if (str == SAME_NONZERO_PATTERN) { 1620 PetscScalar alpha = a; 1621 x = (Mat_SeqAIJ *)xx->A->data; 1622 y = (Mat_SeqAIJ *)yy->A->data; 1623 bnz = (PetscBLASInt)x->nz; 1624 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1625 x = (Mat_SeqAIJ *)xx->B->data; 1626 y = (Mat_SeqAIJ *)yy->B->data; 1627 bnz = (PetscBLASInt)x->nz; 1628 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1629 } else if (str == SUBSET_NONZERO_PATTERN) { 1630 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1631 1632 x = (Mat_SeqAIJ *)xx->B->data; 1633 y = (Mat_SeqAIJ *)yy->B->data; 1634 if (y->xtoy && y->XtoY != xx->B) { 1635 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1636 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1637 } 1638 if (!y->xtoy) { /* get xtoy */ 1639 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1640 y->XtoY = xx->B; 1641 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1642 } 1643 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1644 } else { 1645 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1646 } 1647 PetscFunctionReturn(0); 1648 } 1649 1650 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1651 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatConjugate_MPIAIJ" 1654 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1655 { 1656 #if defined(PETSC_USE_COMPLEX) 1657 PetscErrorCode ierr; 1658 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1659 1660 PetscFunctionBegin; 1661 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1662 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1663 #else 1664 PetscFunctionBegin; 1665 #endif 1666 PetscFunctionReturn(0); 1667 } 1668 1669 #undef __FUNCT__ 1670 #define __FUNCT__ "MatRealPart_MPIAIJ" 1671 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1672 { 1673 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1674 PetscErrorCode ierr; 1675 1676 PetscFunctionBegin; 1677 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1678 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1684 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1685 { 1686 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1687 PetscErrorCode ierr; 1688 1689 PetscFunctionBegin; 1690 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1691 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1692 PetscFunctionReturn(0); 1693 } 1694 1695 #ifdef PETSC_HAVE_PBGL 1696 1697 #include <boost/parallel/mpi/bsp_process_group.hpp> 1698 #include <boost/graph/distributed/ilu_default_graph.hpp> 1699 #include <boost/graph/distributed/ilu_0_block.hpp> 1700 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1701 #include <boost/graph/distributed/petsc/interface.hpp> 1702 #include <boost/multi_array.hpp> 1703 #include <boost/parallel/distributed_property_map.hpp> 1704 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1707 /* 1708 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1709 */ 1710 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1711 { 1712 namespace petsc = boost::distributed::petsc; 1713 1714 namespace graph_dist = boost::graph::distributed; 1715 using boost::graph::distributed::ilu_default::process_group_type; 1716 using boost::graph::ilu_permuted; 1717 1718 PetscTruth row_identity, col_identity; 1719 PetscContainer c; 1720 PetscInt m, n, M, N; 1721 PetscErrorCode ierr; 1722 1723 PetscFunctionBegin; 1724 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1725 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1726 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1727 if (!row_identity || !col_identity) { 1728 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1729 } 1730 1731 process_group_type pg; 1732 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1733 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1734 lgraph_type& level_graph = *lgraph_p; 1735 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1736 1737 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1738 ilu_permuted(level_graph); 1739 1740 /* put together the new matrix */ 1741 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1742 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1743 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1744 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1745 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1746 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1747 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1748 (*fact)->factor = FACTOR_LU; 1749 1750 ierr = PetscContainerCreate(A->comm, &c); 1751 ierr = PetscContainerSetPointer(c, lgraph_p); 1752 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1753 PetscFunctionReturn(0); 1754 } 1755 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1758 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1759 { 1760 PetscFunctionBegin; 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatSolve_MPIAIJ" 1766 /* 1767 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1768 */ 1769 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1770 { 1771 namespace graph_dist = boost::graph::distributed; 1772 1773 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1774 lgraph_type* lgraph_p; 1775 PetscContainer c; 1776 PetscErrorCode ierr; 1777 1778 PetscFunctionBegin; 1779 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1780 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1781 ierr = VecCopy(b, x); CHKERRQ(ierr); 1782 1783 PetscScalar* array_x; 1784 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1785 PetscInt sx; 1786 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1787 1788 PetscScalar* array_b; 1789 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1790 PetscInt sb; 1791 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1792 1793 lgraph_type& level_graph = *lgraph_p; 1794 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1795 1796 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1797 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1798 ref_x(array_x, boost::extents[num_vertices(graph)]); 1799 1800 typedef boost::iterator_property_map<array_ref_type::iterator, 1801 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1802 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1803 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1804 1805 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1806 1807 PetscFunctionReturn(0); 1808 } 1809 #endif 1810 1811 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1812 PetscInt nzlocal,nsends,nrecvs; 1813 PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j; 1814 PetscScalar *sbuf_a,**rbuf_a; 1815 PetscErrorCode (*MatDestroy)(Mat); 1816 } Mat_Redundant; 1817 1818 #undef __FUNCT__ 1819 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1820 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1821 { 1822 PetscErrorCode ierr; 1823 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1824 PetscInt i; 1825 1826 PetscFunctionBegin; 1827 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1828 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1829 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1830 for (i=0; i<redund->nrecvs; i++){ 1831 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1832 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1833 } 1834 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1835 ierr = PetscFree(redund);CHKERRQ(ierr); 1836 PetscFunctionReturn(0); 1837 } 1838 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatDestroy_MatRedundant" 1841 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1842 { 1843 PetscErrorCode ierr; 1844 PetscContainer container; 1845 Mat_Redundant *redund=PETSC_NULL; 1846 1847 PetscFunctionBegin; 1848 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1849 if (container) { 1850 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1851 } else { 1852 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1853 } 1854 A->ops->destroy = redund->MatDestroy; 1855 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 1856 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 1857 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 1858 PetscFunctionReturn(0); 1859 } 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 1863 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 1864 { 1865 PetscMPIInt rank,size; 1866 MPI_Comm comm=mat->comm; 1867 PetscErrorCode ierr; 1868 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 1869 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 1870 PetscInt *rowrange=mat->rmap.range; 1871 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1872 Mat A=aij->A,B=aij->B,C=*matredundant; 1873 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1874 PetscScalar *sbuf_a; 1875 PetscInt nzlocal=a->nz+b->nz; 1876 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 1877 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 1878 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 1879 PetscScalar *vals,*aworkA,*aworkB; 1880 PetscMPIInt tag1,tag2,tag3,imdex; 1881 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 1882 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 1883 MPI_Status recv_status,*send_status; 1884 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 1885 PetscInt **rbuf_j=PETSC_NULL; 1886 PetscScalar **rbuf_a=PETSC_NULL; 1887 Mat_Redundant *redund=PETSC_NULL; 1888 PetscContainer container; 1889 1890 PetscFunctionBegin; 1891 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1892 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1893 1894 if (reuse == MAT_REUSE_MATRIX) { 1895 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 1896 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 1897 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 1898 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 1899 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1900 if (container) { 1901 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1902 } else { 1903 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1904 } 1905 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 1906 1907 nsends = redund->nsends; 1908 nrecvs = redund->nrecvs; 1909 send_rank = redund->send_rank; recv_rank = send_rank + size; 1910 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 1911 sbuf_j = redund->sbuf_j; 1912 sbuf_a = redund->sbuf_a; 1913 rbuf_j = redund->rbuf_j; 1914 rbuf_a = redund->rbuf_a; 1915 } 1916 1917 if (reuse == MAT_INITIAL_MATRIX){ 1918 PetscMPIInt subrank,subsize; 1919 PetscInt nleftover,np_subcomm; 1920 /* get the destination processors' id send_rank, nsends and nrecvs */ 1921 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 1922 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 1923 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 1924 recv_rank = send_rank + size; 1925 np_subcomm = size/nsubcomm; 1926 nleftover = size - nsubcomm*np_subcomm; 1927 nsends = 0; nrecvs = 0; 1928 for (i=0; i<size; i++){ /* i=rank*/ 1929 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 1930 send_rank[nsends] = i; nsends++; 1931 recv_rank[nrecvs++] = i; 1932 } 1933 } 1934 if (rank >= size - nleftover){/* this proc is a leftover processor */ 1935 i = size-nleftover-1; 1936 j = 0; 1937 while (j < nsubcomm - nleftover){ 1938 send_rank[nsends++] = i; 1939 i--; j++; 1940 } 1941 } 1942 1943 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 1944 for (i=0; i<nleftover; i++){ 1945 recv_rank[nrecvs++] = size-nleftover+i; 1946 } 1947 } 1948 1949 /* allocate sbuf_j, sbuf_a */ 1950 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 1951 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 1952 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 1953 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1954 1955 /* copy mat's local entries into the buffers */ 1956 if (reuse == MAT_INITIAL_MATRIX){ 1957 rownz_max = 0; 1958 rptr = sbuf_j; 1959 cols = sbuf_j + rend-rstart + 1; 1960 vals = sbuf_a; 1961 rptr[0] = 0; 1962 for (i=0; i<rend-rstart; i++){ 1963 row = i + rstart; 1964 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1965 ncols = nzA + nzB; 1966 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1967 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1968 /* load the column indices for this row into cols */ 1969 lwrite = 0; 1970 for (l=0; l<nzB; l++) { 1971 if ((ctmp = bmap[cworkB[l]]) < cstart){ 1972 vals[lwrite] = aworkB[l]; 1973 cols[lwrite++] = ctmp; 1974 } 1975 } 1976 for (l=0; l<nzA; l++){ 1977 vals[lwrite] = aworkA[l]; 1978 cols[lwrite++] = cstart + cworkA[l]; 1979 } 1980 for (l=0; l<nzB; l++) { 1981 if ((ctmp = bmap[cworkB[l]]) >= cend){ 1982 vals[lwrite] = aworkB[l]; 1983 cols[lwrite++] = ctmp; 1984 } 1985 } 1986 vals += ncols; 1987 cols += ncols; 1988 rptr[i+1] = rptr[i] + ncols; 1989 if (rownz_max < ncols) rownz_max = ncols; 1990 } 1991 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 1992 } else { /* only copy matrix values into sbuf_a */ 1993 rptr = sbuf_j; 1994 vals = sbuf_a; 1995 rptr[0] = 0; 1996 for (i=0; i<rend-rstart; i++){ 1997 row = i + rstart; 1998 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1999 ncols = nzA + nzB; 2000 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2001 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2002 lwrite = 0; 2003 for (l=0; l<nzB; l++) { 2004 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2005 } 2006 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2007 for (l=0; l<nzB; l++) { 2008 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2009 } 2010 vals += ncols; 2011 rptr[i+1] = rptr[i] + ncols; 2012 } 2013 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2014 2015 /* send nzlocal to others, and recv other's nzlocal */ 2016 /*--------------------------------------------------*/ 2017 if (reuse == MAT_INITIAL_MATRIX){ 2018 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2019 s_waits2 = s_waits3 + nsends; 2020 s_waits1 = s_waits2 + nsends; 2021 r_waits1 = s_waits1 + nsends; 2022 r_waits2 = r_waits1 + nrecvs; 2023 r_waits3 = r_waits2 + nrecvs; 2024 } else { 2025 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2026 r_waits3 = s_waits3 + nsends; 2027 } 2028 2029 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2030 if (reuse == MAT_INITIAL_MATRIX){ 2031 /* get new tags to keep the communication clean */ 2032 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2033 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2034 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2035 rbuf_nz = sbuf_nz + nsends; 2036 2037 /* post receives of other's nzlocal */ 2038 for (i=0; i<nrecvs; i++){ 2039 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2040 } 2041 /* send nzlocal to others */ 2042 for (i=0; i<nsends; i++){ 2043 sbuf_nz[i] = nzlocal; 2044 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2045 } 2046 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2047 count = nrecvs; 2048 while (count) { 2049 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2050 recv_rank[imdex] = recv_status.MPI_SOURCE; 2051 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2052 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2053 2054 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2055 rbuf_nz[imdex] += i + 2; 2056 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2057 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2058 count--; 2059 } 2060 /* wait on sends of nzlocal */ 2061 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2062 /* send mat->i,j to others, and recv from other's */ 2063 /*------------------------------------------------*/ 2064 for (i=0; i<nsends; i++){ 2065 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2066 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2067 } 2068 /* wait on receives of mat->i,j */ 2069 /*------------------------------*/ 2070 count = nrecvs; 2071 while (count) { 2072 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2073 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2074 count--; 2075 } 2076 /* wait on sends of mat->i,j */ 2077 /*---------------------------*/ 2078 if (nsends) { 2079 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2080 } 2081 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2082 2083 /* post receives, send and receive mat->a */ 2084 /*----------------------------------------*/ 2085 for (imdex=0; imdex<nrecvs; imdex++) { 2086 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2087 } 2088 for (i=0; i<nsends; i++){ 2089 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2090 } 2091 count = nrecvs; 2092 while (count) { 2093 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2094 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2095 count--; 2096 } 2097 if (nsends) { 2098 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2099 } 2100 2101 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2102 2103 /* create redundant matrix */ 2104 /*-------------------------*/ 2105 if (reuse == MAT_INITIAL_MATRIX){ 2106 /* compute rownz_max for preallocation */ 2107 for (imdex=0; imdex<nrecvs; imdex++){ 2108 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2109 rptr = rbuf_j[imdex]; 2110 for (i=0; i<j; i++){ 2111 ncols = rptr[i+1] - rptr[i]; 2112 if (rownz_max < ncols) rownz_max = ncols; 2113 } 2114 } 2115 2116 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2117 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2118 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2119 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2120 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2121 } else { 2122 C = *matredundant; 2123 } 2124 2125 /* insert local matrix entries */ 2126 rptr = sbuf_j; 2127 cols = sbuf_j + rend-rstart + 1; 2128 vals = sbuf_a; 2129 for (i=0; i<rend-rstart; i++){ 2130 row = i + rstart; 2131 ncols = rptr[i+1] - rptr[i]; 2132 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2133 vals += ncols; 2134 cols += ncols; 2135 } 2136 /* insert received matrix entries */ 2137 for (imdex=0; imdex<nrecvs; imdex++){ 2138 rstart = rowrange[recv_rank[imdex]]; 2139 rend = rowrange[recv_rank[imdex]+1]; 2140 rptr = rbuf_j[imdex]; 2141 cols = rbuf_j[imdex] + rend-rstart + 1; 2142 vals = rbuf_a[imdex]; 2143 for (i=0; i<rend-rstart; i++){ 2144 row = i + rstart; 2145 ncols = rptr[i+1] - rptr[i]; 2146 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2147 vals += ncols; 2148 cols += ncols; 2149 } 2150 } 2151 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2152 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2153 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2154 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2155 if (reuse == MAT_INITIAL_MATRIX){ 2156 PetscContainer container; 2157 *matredundant = C; 2158 /* create a supporting struct and attach it to C for reuse */ 2159 ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr); 2160 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2161 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2162 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2163 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2164 2165 redund->nzlocal = nzlocal; 2166 redund->nsends = nsends; 2167 redund->nrecvs = nrecvs; 2168 redund->send_rank = send_rank; 2169 redund->sbuf_nz = sbuf_nz; 2170 redund->sbuf_j = sbuf_j; 2171 redund->sbuf_a = sbuf_a; 2172 redund->rbuf_j = rbuf_j; 2173 redund->rbuf_a = rbuf_a; 2174 2175 redund->MatDestroy = C->ops->destroy; 2176 C->ops->destroy = MatDestroy_MatRedundant; 2177 } 2178 PetscFunctionReturn(0); 2179 } 2180 2181 /* -------------------------------------------------------------------*/ 2182 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2183 MatGetRow_MPIAIJ, 2184 MatRestoreRow_MPIAIJ, 2185 MatMult_MPIAIJ, 2186 /* 4*/ MatMultAdd_MPIAIJ, 2187 MatMultTranspose_MPIAIJ, 2188 MatMultTransposeAdd_MPIAIJ, 2189 #ifdef PETSC_HAVE_PBGL 2190 MatSolve_MPIAIJ, 2191 #else 2192 0, 2193 #endif 2194 0, 2195 0, 2196 /*10*/ 0, 2197 0, 2198 0, 2199 MatRelax_MPIAIJ, 2200 MatTranspose_MPIAIJ, 2201 /*15*/ MatGetInfo_MPIAIJ, 2202 MatEqual_MPIAIJ, 2203 MatGetDiagonal_MPIAIJ, 2204 MatDiagonalScale_MPIAIJ, 2205 MatNorm_MPIAIJ, 2206 /*20*/ MatAssemblyBegin_MPIAIJ, 2207 MatAssemblyEnd_MPIAIJ, 2208 0, 2209 MatSetOption_MPIAIJ, 2210 MatZeroEntries_MPIAIJ, 2211 /*25*/ MatZeroRows_MPIAIJ, 2212 0, 2213 #ifdef PETSC_HAVE_PBGL 2214 MatLUFactorNumeric_MPIAIJ, 2215 #else 2216 0, 2217 #endif 2218 0, 2219 0, 2220 /*30*/ MatSetUpPreallocation_MPIAIJ, 2221 #ifdef PETSC_HAVE_PBGL 2222 MatILUFactorSymbolic_MPIAIJ, 2223 #else 2224 0, 2225 #endif 2226 0, 2227 0, 2228 0, 2229 /*35*/ MatDuplicate_MPIAIJ, 2230 0, 2231 0, 2232 0, 2233 0, 2234 /*40*/ MatAXPY_MPIAIJ, 2235 MatGetSubMatrices_MPIAIJ, 2236 MatIncreaseOverlap_MPIAIJ, 2237 MatGetValues_MPIAIJ, 2238 MatCopy_MPIAIJ, 2239 /*45*/ 0, 2240 MatScale_MPIAIJ, 2241 0, 2242 0, 2243 0, 2244 /*50*/ MatSetBlockSize_MPIAIJ, 2245 0, 2246 0, 2247 0, 2248 0, 2249 /*55*/ MatFDColoringCreate_MPIAIJ, 2250 0, 2251 MatSetUnfactored_MPIAIJ, 2252 MatPermute_MPIAIJ, 2253 0, 2254 /*60*/ MatGetSubMatrix_MPIAIJ, 2255 MatDestroy_MPIAIJ, 2256 MatView_MPIAIJ, 2257 0, 2258 0, 2259 /*65*/ 0, 2260 0, 2261 0, 2262 0, 2263 0, 2264 /*70*/ 0, 2265 0, 2266 MatSetColoring_MPIAIJ, 2267 #if defined(PETSC_HAVE_ADIC) 2268 MatSetValuesAdic_MPIAIJ, 2269 #else 2270 0, 2271 #endif 2272 MatSetValuesAdifor_MPIAIJ, 2273 /*75*/ 0, 2274 0, 2275 0, 2276 0, 2277 0, 2278 /*80*/ 0, 2279 0, 2280 0, 2281 0, 2282 /*84*/ MatLoad_MPIAIJ, 2283 0, 2284 0, 2285 0, 2286 0, 2287 0, 2288 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2289 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2290 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2291 MatPtAP_Basic, 2292 MatPtAPSymbolic_MPIAIJ, 2293 /*95*/ MatPtAPNumeric_MPIAIJ, 2294 0, 2295 0, 2296 0, 2297 0, 2298 /*100*/0, 2299 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2300 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2301 MatConjugate_MPIAIJ, 2302 0, 2303 /*105*/MatSetValuesRow_MPIAIJ, 2304 MatRealPart_MPIAIJ, 2305 MatImaginaryPart_MPIAIJ, 2306 0, 2307 0, 2308 /*110*/0, 2309 MatGetRedundantMatrix_MPIAIJ}; 2310 2311 /* ----------------------------------------------------------------------------------------*/ 2312 2313 EXTERN_C_BEGIN 2314 #undef __FUNCT__ 2315 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2316 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2317 { 2318 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2319 PetscErrorCode ierr; 2320 2321 PetscFunctionBegin; 2322 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2323 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2324 PetscFunctionReturn(0); 2325 } 2326 EXTERN_C_END 2327 2328 EXTERN_C_BEGIN 2329 #undef __FUNCT__ 2330 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2331 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2332 { 2333 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2334 PetscErrorCode ierr; 2335 2336 PetscFunctionBegin; 2337 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2338 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2339 PetscFunctionReturn(0); 2340 } 2341 EXTERN_C_END 2342 2343 #include "petscpc.h" 2344 EXTERN_C_BEGIN 2345 #undef __FUNCT__ 2346 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2347 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2348 { 2349 Mat_MPIAIJ *b; 2350 PetscErrorCode ierr; 2351 PetscInt i; 2352 2353 PetscFunctionBegin; 2354 B->preallocated = PETSC_TRUE; 2355 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2356 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2357 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2358 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2359 2360 B->rmap.bs = B->cmap.bs = 1; 2361 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2362 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2363 if (d_nnz) { 2364 for (i=0; i<B->rmap.n; i++) { 2365 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2366 } 2367 } 2368 if (o_nnz) { 2369 for (i=0; i<B->rmap.n; i++) { 2370 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2371 } 2372 } 2373 b = (Mat_MPIAIJ*)B->data; 2374 2375 /* Explicitly create 2 MATSEQAIJ matrices. */ 2376 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2377 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2378 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2379 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2380 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2381 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2382 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2383 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2384 2385 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2386 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2387 2388 PetscFunctionReturn(0); 2389 } 2390 EXTERN_C_END 2391 2392 #undef __FUNCT__ 2393 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2394 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2395 { 2396 Mat mat; 2397 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2398 PetscErrorCode ierr; 2399 2400 PetscFunctionBegin; 2401 *newmat = 0; 2402 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2403 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2404 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2405 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2406 a = (Mat_MPIAIJ*)mat->data; 2407 2408 mat->factor = matin->factor; 2409 mat->rmap.bs = matin->rmap.bs; 2410 mat->assembled = PETSC_TRUE; 2411 mat->insertmode = NOT_SET_VALUES; 2412 mat->preallocated = PETSC_TRUE; 2413 2414 a->size = oldmat->size; 2415 a->rank = oldmat->rank; 2416 a->donotstash = oldmat->donotstash; 2417 a->roworiented = oldmat->roworiented; 2418 a->rowindices = 0; 2419 a->rowvalues = 0; 2420 a->getrowactive = PETSC_FALSE; 2421 2422 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2423 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2424 2425 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2426 if (oldmat->colmap) { 2427 #if defined (PETSC_USE_CTABLE) 2428 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2429 #else 2430 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2431 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2432 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2433 #endif 2434 } else a->colmap = 0; 2435 if (oldmat->garray) { 2436 PetscInt len; 2437 len = oldmat->B->cmap.n; 2438 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2439 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2440 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2441 } else a->garray = 0; 2442 2443 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2444 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2445 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2446 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2447 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2448 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2449 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2450 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2451 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2452 *newmat = mat; 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #include "petscsys.h" 2457 2458 #undef __FUNCT__ 2459 #define __FUNCT__ "MatLoad_MPIAIJ" 2460 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2461 { 2462 Mat A; 2463 PetscScalar *vals,*svals; 2464 MPI_Comm comm = ((PetscObject)viewer)->comm; 2465 MPI_Status status; 2466 PetscErrorCode ierr; 2467 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2468 PetscInt i,nz,j,rstart,rend,mmax; 2469 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2470 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2471 PetscInt cend,cstart,n,*rowners; 2472 int fd; 2473 2474 PetscFunctionBegin; 2475 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2476 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2477 if (!rank) { 2478 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2479 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2480 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2481 } 2482 2483 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2484 M = header[1]; N = header[2]; 2485 /* determine ownership of all rows */ 2486 m = M/size + ((M % size) > rank); 2487 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2488 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2489 2490 /* First process needs enough room for process with most rows */ 2491 if (!rank) { 2492 mmax = rowners[1]; 2493 for (i=2; i<size; i++) { 2494 mmax = PetscMax(mmax,rowners[i]); 2495 } 2496 } else mmax = m; 2497 2498 rowners[0] = 0; 2499 for (i=2; i<=size; i++) { 2500 rowners[i] += rowners[i-1]; 2501 } 2502 rstart = rowners[rank]; 2503 rend = rowners[rank+1]; 2504 2505 /* distribute row lengths to all processors */ 2506 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2507 if (!rank) { 2508 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2509 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2510 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2511 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2512 for (j=0; j<m; j++) { 2513 procsnz[0] += ourlens[j]; 2514 } 2515 for (i=1; i<size; i++) { 2516 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2517 /* calculate the number of nonzeros on each processor */ 2518 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2519 procsnz[i] += rowlengths[j]; 2520 } 2521 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2522 } 2523 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2524 } else { 2525 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2526 } 2527 2528 if (!rank) { 2529 /* determine max buffer needed and allocate it */ 2530 maxnz = 0; 2531 for (i=0; i<size; i++) { 2532 maxnz = PetscMax(maxnz,procsnz[i]); 2533 } 2534 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2535 2536 /* read in my part of the matrix column indices */ 2537 nz = procsnz[0]; 2538 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2539 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2540 2541 /* read in every one elses and ship off */ 2542 for (i=1; i<size; i++) { 2543 nz = procsnz[i]; 2544 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2545 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2546 } 2547 ierr = PetscFree(cols);CHKERRQ(ierr); 2548 } else { 2549 /* determine buffer space needed for message */ 2550 nz = 0; 2551 for (i=0; i<m; i++) { 2552 nz += ourlens[i]; 2553 } 2554 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2555 2556 /* receive message of column indices*/ 2557 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2558 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2559 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2560 } 2561 2562 /* determine column ownership if matrix is not square */ 2563 if (N != M) { 2564 n = N/size + ((N % size) > rank); 2565 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2566 cstart = cend - n; 2567 } else { 2568 cstart = rstart; 2569 cend = rend; 2570 n = cend - cstart; 2571 } 2572 2573 /* loop over local rows, determining number of off diagonal entries */ 2574 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2575 jj = 0; 2576 for (i=0; i<m; i++) { 2577 for (j=0; j<ourlens[i]; j++) { 2578 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2579 jj++; 2580 } 2581 } 2582 2583 /* create our matrix */ 2584 for (i=0; i<m; i++) { 2585 ourlens[i] -= offlens[i]; 2586 } 2587 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2588 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2589 ierr = MatSetType(A,type);CHKERRQ(ierr); 2590 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2591 2592 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2593 for (i=0; i<m; i++) { 2594 ourlens[i] += offlens[i]; 2595 } 2596 2597 if (!rank) { 2598 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2599 2600 /* read in my part of the matrix numerical values */ 2601 nz = procsnz[0]; 2602 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2603 2604 /* insert into matrix */ 2605 jj = rstart; 2606 smycols = mycols; 2607 svals = vals; 2608 for (i=0; i<m; i++) { 2609 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2610 smycols += ourlens[i]; 2611 svals += ourlens[i]; 2612 jj++; 2613 } 2614 2615 /* read in other processors and ship out */ 2616 for (i=1; i<size; i++) { 2617 nz = procsnz[i]; 2618 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2619 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2620 } 2621 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2622 } else { 2623 /* receive numeric values */ 2624 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2625 2626 /* receive message of values*/ 2627 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2628 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2629 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2630 2631 /* insert into matrix */ 2632 jj = rstart; 2633 smycols = mycols; 2634 svals = vals; 2635 for (i=0; i<m; i++) { 2636 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2637 smycols += ourlens[i]; 2638 svals += ourlens[i]; 2639 jj++; 2640 } 2641 } 2642 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2643 ierr = PetscFree(vals);CHKERRQ(ierr); 2644 ierr = PetscFree(mycols);CHKERRQ(ierr); 2645 ierr = PetscFree(rowners);CHKERRQ(ierr); 2646 2647 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2648 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2649 *newmat = A; 2650 PetscFunctionReturn(0); 2651 } 2652 2653 #undef __FUNCT__ 2654 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2655 /* 2656 Not great since it makes two copies of the submatrix, first an SeqAIJ 2657 in local and then by concatenating the local matrices the end result. 2658 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2659 */ 2660 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2661 { 2662 PetscErrorCode ierr; 2663 PetscMPIInt rank,size; 2664 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2665 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2666 Mat *local,M,Mreuse; 2667 PetscScalar *vwork,*aa; 2668 MPI_Comm comm = mat->comm; 2669 Mat_SeqAIJ *aij; 2670 2671 2672 PetscFunctionBegin; 2673 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2674 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2675 2676 if (call == MAT_REUSE_MATRIX) { 2677 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2678 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2679 local = &Mreuse; 2680 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2681 } else { 2682 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2683 Mreuse = *local; 2684 ierr = PetscFree(local);CHKERRQ(ierr); 2685 } 2686 2687 /* 2688 m - number of local rows 2689 n - number of columns (same on all processors) 2690 rstart - first row in new global matrix generated 2691 */ 2692 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2693 if (call == MAT_INITIAL_MATRIX) { 2694 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2695 ii = aij->i; 2696 jj = aij->j; 2697 2698 /* 2699 Determine the number of non-zeros in the diagonal and off-diagonal 2700 portions of the matrix in order to do correct preallocation 2701 */ 2702 2703 /* first get start and end of "diagonal" columns */ 2704 if (csize == PETSC_DECIDE) { 2705 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2706 if (mglobal == n) { /* square matrix */ 2707 nlocal = m; 2708 } else { 2709 nlocal = n/size + ((n % size) > rank); 2710 } 2711 } else { 2712 nlocal = csize; 2713 } 2714 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2715 rstart = rend - nlocal; 2716 if (rank == size - 1 && rend != n) { 2717 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2718 } 2719 2720 /* next, compute all the lengths */ 2721 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2722 olens = dlens + m; 2723 for (i=0; i<m; i++) { 2724 jend = ii[i+1] - ii[i]; 2725 olen = 0; 2726 dlen = 0; 2727 for (j=0; j<jend; j++) { 2728 if (*jj < rstart || *jj >= rend) olen++; 2729 else dlen++; 2730 jj++; 2731 } 2732 olens[i] = olen; 2733 dlens[i] = dlen; 2734 } 2735 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2736 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2737 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2738 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2739 ierr = PetscFree(dlens);CHKERRQ(ierr); 2740 } else { 2741 PetscInt ml,nl; 2742 2743 M = *newmat; 2744 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2745 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2746 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2747 /* 2748 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2749 rather than the slower MatSetValues(). 2750 */ 2751 M->was_assembled = PETSC_TRUE; 2752 M->assembled = PETSC_FALSE; 2753 } 2754 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2755 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2756 ii = aij->i; 2757 jj = aij->j; 2758 aa = aij->a; 2759 for (i=0; i<m; i++) { 2760 row = rstart + i; 2761 nz = ii[i+1] - ii[i]; 2762 cwork = jj; jj += nz; 2763 vwork = aa; aa += nz; 2764 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2765 } 2766 2767 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2768 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2769 *newmat = M; 2770 2771 /* save submatrix used in processor for next request */ 2772 if (call == MAT_INITIAL_MATRIX) { 2773 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2774 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2775 } 2776 2777 PetscFunctionReturn(0); 2778 } 2779 2780 EXTERN_C_BEGIN 2781 #undef __FUNCT__ 2782 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2783 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2784 { 2785 PetscInt m,cstart, cend,j,nnz,i,d; 2786 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2787 const PetscInt *JJ; 2788 PetscScalar *values; 2789 PetscErrorCode ierr; 2790 2791 PetscFunctionBegin; 2792 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2793 2794 B->rmap.bs = B->cmap.bs = 1; 2795 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2796 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2797 m = B->rmap.n; 2798 cstart = B->cmap.rstart; 2799 cend = B->cmap.rend; 2800 rstart = B->rmap.rstart; 2801 2802 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2803 o_nnz = d_nnz + m; 2804 2805 for (i=0; i<m; i++) { 2806 nnz = Ii[i+1]- Ii[i]; 2807 JJ = J + Ii[i]; 2808 nnz_max = PetscMax(nnz_max,nnz); 2809 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2810 for (j=0; j<nnz; j++) { 2811 if (*JJ >= cstart) break; 2812 JJ++; 2813 } 2814 d = 0; 2815 for (; j<nnz; j++) { 2816 if (*JJ++ >= cend) break; 2817 d++; 2818 } 2819 d_nnz[i] = d; 2820 o_nnz[i] = nnz - d; 2821 } 2822 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2823 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2824 2825 if (v) values = (PetscScalar*)v; 2826 else { 2827 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2828 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2829 } 2830 2831 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2832 for (i=0; i<m; i++) { 2833 ii = i + rstart; 2834 nnz = Ii[i+1]- Ii[i]; 2835 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2836 } 2837 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2838 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2839 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2840 2841 if (!v) { 2842 ierr = PetscFree(values);CHKERRQ(ierr); 2843 } 2844 PetscFunctionReturn(0); 2845 } 2846 EXTERN_C_END 2847 2848 #undef __FUNCT__ 2849 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2850 /*@ 2851 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2852 (the default parallel PETSc format). 2853 2854 Collective on MPI_Comm 2855 2856 Input Parameters: 2857 + B - the matrix 2858 . i - the indices into j for the start of each local row (starts with zero) 2859 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2860 - v - optional values in the matrix 2861 2862 Level: developer 2863 2864 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2865 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2866 2867 .keywords: matrix, aij, compressed row, sparse, parallel 2868 2869 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2870 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2871 @*/ 2872 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2873 { 2874 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2875 2876 PetscFunctionBegin; 2877 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2878 if (f) { 2879 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2880 } 2881 PetscFunctionReturn(0); 2882 } 2883 2884 #undef __FUNCT__ 2885 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2886 /*@C 2887 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2888 (the default parallel PETSc format). For good matrix assembly performance 2889 the user should preallocate the matrix storage by setting the parameters 2890 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2891 performance can be increased by more than a factor of 50. 2892 2893 Collective on MPI_Comm 2894 2895 Input Parameters: 2896 + A - the matrix 2897 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2898 (same value is used for all local rows) 2899 . d_nnz - array containing the number of nonzeros in the various rows of the 2900 DIAGONAL portion of the local submatrix (possibly different for each row) 2901 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2902 The size of this array is equal to the number of local rows, i.e 'm'. 2903 You must leave room for the diagonal entry even if it is zero. 2904 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2905 submatrix (same value is used for all local rows). 2906 - o_nnz - array containing the number of nonzeros in the various rows of the 2907 OFF-DIAGONAL portion of the local submatrix (possibly different for 2908 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2909 structure. The size of this array is equal to the number 2910 of local rows, i.e 'm'. 2911 2912 If the *_nnz parameter is given then the *_nz parameter is ignored 2913 2914 The AIJ format (also called the Yale sparse matrix format or 2915 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2916 storage. The stored row and column indices begin with zero. See the users manual for details. 2917 2918 The parallel matrix is partitioned such that the first m0 rows belong to 2919 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2920 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2921 2922 The DIAGONAL portion of the local submatrix of a processor can be defined 2923 as the submatrix which is obtained by extraction the part corresponding 2924 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2925 first row that belongs to the processor, and r2 is the last row belonging 2926 to the this processor. This is a square mxm matrix. The remaining portion 2927 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2928 2929 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2930 2931 Example usage: 2932 2933 Consider the following 8x8 matrix with 34 non-zero values, that is 2934 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2935 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2936 as follows: 2937 2938 .vb 2939 1 2 0 | 0 3 0 | 0 4 2940 Proc0 0 5 6 | 7 0 0 | 8 0 2941 9 0 10 | 11 0 0 | 12 0 2942 ------------------------------------- 2943 13 0 14 | 15 16 17 | 0 0 2944 Proc1 0 18 0 | 19 20 21 | 0 0 2945 0 0 0 | 22 23 0 | 24 0 2946 ------------------------------------- 2947 Proc2 25 26 27 | 0 0 28 | 29 0 2948 30 0 0 | 31 32 33 | 0 34 2949 .ve 2950 2951 This can be represented as a collection of submatrices as: 2952 2953 .vb 2954 A B C 2955 D E F 2956 G H I 2957 .ve 2958 2959 Where the submatrices A,B,C are owned by proc0, D,E,F are 2960 owned by proc1, G,H,I are owned by proc2. 2961 2962 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2963 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2964 The 'M','N' parameters are 8,8, and have the same values on all procs. 2965 2966 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2967 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2968 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2969 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2970 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2971 matrix, ans [DF] as another SeqAIJ matrix. 2972 2973 When d_nz, o_nz parameters are specified, d_nz storage elements are 2974 allocated for every row of the local diagonal submatrix, and o_nz 2975 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2976 One way to choose d_nz and o_nz is to use the max nonzerors per local 2977 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2978 In this case, the values of d_nz,o_nz are: 2979 .vb 2980 proc0 : dnz = 2, o_nz = 2 2981 proc1 : dnz = 3, o_nz = 2 2982 proc2 : dnz = 1, o_nz = 4 2983 .ve 2984 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2985 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2986 for proc3. i.e we are using 12+15+10=37 storage locations to store 2987 34 values. 2988 2989 When d_nnz, o_nnz parameters are specified, the storage is specified 2990 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2991 In the above case the values for d_nnz,o_nnz are: 2992 .vb 2993 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2994 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2995 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2996 .ve 2997 Here the space allocated is sum of all the above values i.e 34, and 2998 hence pre-allocation is perfect. 2999 3000 Level: intermediate 3001 3002 .keywords: matrix, aij, compressed row, sparse, parallel 3003 3004 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3005 MPIAIJ 3006 @*/ 3007 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3008 { 3009 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3010 3011 PetscFunctionBegin; 3012 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3013 if (f) { 3014 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3015 } 3016 PetscFunctionReturn(0); 3017 } 3018 3019 #undef __FUNCT__ 3020 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3021 /*@C 3022 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3023 CSR format the local rows. 3024 3025 Collective on MPI_Comm 3026 3027 Input Parameters: 3028 + comm - MPI communicator 3029 . m - number of local rows (Cannot be PETSC_DECIDE) 3030 . n - This value should be the same as the local size used in creating the 3031 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3032 calculated if N is given) For square matrices n is almost always m. 3033 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3034 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3035 . i - row indices 3036 . j - column indices 3037 - a - matrix values 3038 3039 Output Parameter: 3040 . mat - the matrix 3041 3042 Level: intermediate 3043 3044 Notes: 3045 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3046 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3047 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3048 3049 The i and j indices are 0 based 3050 3051 .keywords: matrix, aij, compressed row, sparse, parallel 3052 3053 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3054 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3055 @*/ 3056 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3057 { 3058 PetscErrorCode ierr; 3059 3060 PetscFunctionBegin; 3061 if (i[0]) { 3062 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3063 } 3064 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3065 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3066 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3067 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3068 PetscFunctionReturn(0); 3069 } 3070 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatCreateMPIAIJ" 3073 /*@C 3074 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3075 (the default parallel PETSc format). For good matrix assembly performance 3076 the user should preallocate the matrix storage by setting the parameters 3077 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3078 performance can be increased by more than a factor of 50. 3079 3080 Collective on MPI_Comm 3081 3082 Input Parameters: 3083 + comm - MPI communicator 3084 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3085 This value should be the same as the local size used in creating the 3086 y vector for the matrix-vector product y = Ax. 3087 . n - This value should be the same as the local size used in creating the 3088 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3089 calculated if N is given) For square matrices n is almost always m. 3090 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3091 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3092 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3093 (same value is used for all local rows) 3094 . d_nnz - array containing the number of nonzeros in the various rows of the 3095 DIAGONAL portion of the local submatrix (possibly different for each row) 3096 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3097 The size of this array is equal to the number of local rows, i.e 'm'. 3098 You must leave room for the diagonal entry even if it is zero. 3099 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3100 submatrix (same value is used for all local rows). 3101 - o_nnz - array containing the number of nonzeros in the various rows of the 3102 OFF-DIAGONAL portion of the local submatrix (possibly different for 3103 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3104 structure. The size of this array is equal to the number 3105 of local rows, i.e 'm'. 3106 3107 Output Parameter: 3108 . A - the matrix 3109 3110 Notes: 3111 If the *_nnz parameter is given then the *_nz parameter is ignored 3112 3113 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3114 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3115 storage requirements for this matrix. 3116 3117 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3118 processor than it must be used on all processors that share the object for 3119 that argument. 3120 3121 The user MUST specify either the local or global matrix dimensions 3122 (possibly both). 3123 3124 The parallel matrix is partitioned across processors such that the 3125 first m0 rows belong to process 0, the next m1 rows belong to 3126 process 1, the next m2 rows belong to process 2 etc.. where 3127 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3128 values corresponding to [m x N] submatrix. 3129 3130 The columns are logically partitioned with the n0 columns belonging 3131 to 0th partition, the next n1 columns belonging to the next 3132 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3133 3134 The DIAGONAL portion of the local submatrix on any given processor 3135 is the submatrix corresponding to the rows and columns m,n 3136 corresponding to the given processor. i.e diagonal matrix on 3137 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3138 etc. The remaining portion of the local submatrix [m x (N-n)] 3139 constitute the OFF-DIAGONAL portion. The example below better 3140 illustrates this concept. 3141 3142 For a square global matrix we define each processor's diagonal portion 3143 to be its local rows and the corresponding columns (a square submatrix); 3144 each processor's off-diagonal portion encompasses the remainder of the 3145 local matrix (a rectangular submatrix). 3146 3147 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3148 3149 When calling this routine with a single process communicator, a matrix of 3150 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3151 type of communicator, use the construction mechanism: 3152 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3153 3154 By default, this format uses inodes (identical nodes) when possible. 3155 We search for consecutive rows with the same nonzero structure, thereby 3156 reusing matrix information to achieve increased efficiency. 3157 3158 Options Database Keys: 3159 + -mat_no_inode - Do not use inodes 3160 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3161 - -mat_aij_oneindex - Internally use indexing starting at 1 3162 rather than 0. Note that when calling MatSetValues(), 3163 the user still MUST index entries starting at 0! 3164 3165 3166 Example usage: 3167 3168 Consider the following 8x8 matrix with 34 non-zero values, that is 3169 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3170 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3171 as follows: 3172 3173 .vb 3174 1 2 0 | 0 3 0 | 0 4 3175 Proc0 0 5 6 | 7 0 0 | 8 0 3176 9 0 10 | 11 0 0 | 12 0 3177 ------------------------------------- 3178 13 0 14 | 15 16 17 | 0 0 3179 Proc1 0 18 0 | 19 20 21 | 0 0 3180 0 0 0 | 22 23 0 | 24 0 3181 ------------------------------------- 3182 Proc2 25 26 27 | 0 0 28 | 29 0 3183 30 0 0 | 31 32 33 | 0 34 3184 .ve 3185 3186 This can be represented as a collection of submatrices as: 3187 3188 .vb 3189 A B C 3190 D E F 3191 G H I 3192 .ve 3193 3194 Where the submatrices A,B,C are owned by proc0, D,E,F are 3195 owned by proc1, G,H,I are owned by proc2. 3196 3197 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3198 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3199 The 'M','N' parameters are 8,8, and have the same values on all procs. 3200 3201 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3202 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3203 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3204 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3205 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3206 matrix, ans [DF] as another SeqAIJ matrix. 3207 3208 When d_nz, o_nz parameters are specified, d_nz storage elements are 3209 allocated for every row of the local diagonal submatrix, and o_nz 3210 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3211 One way to choose d_nz and o_nz is to use the max nonzerors per local 3212 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3213 In this case, the values of d_nz,o_nz are: 3214 .vb 3215 proc0 : dnz = 2, o_nz = 2 3216 proc1 : dnz = 3, o_nz = 2 3217 proc2 : dnz = 1, o_nz = 4 3218 .ve 3219 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3220 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3221 for proc3. i.e we are using 12+15+10=37 storage locations to store 3222 34 values. 3223 3224 When d_nnz, o_nnz parameters are specified, the storage is specified 3225 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3226 In the above case the values for d_nnz,o_nnz are: 3227 .vb 3228 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3229 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3230 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3231 .ve 3232 Here the space allocated is sum of all the above values i.e 34, and 3233 hence pre-allocation is perfect. 3234 3235 Level: intermediate 3236 3237 .keywords: matrix, aij, compressed row, sparse, parallel 3238 3239 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3240 MPIAIJ, MatCreateMPIAIJWithArrays() 3241 @*/ 3242 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3243 { 3244 PetscErrorCode ierr; 3245 PetscMPIInt size; 3246 3247 PetscFunctionBegin; 3248 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3249 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3250 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3251 if (size > 1) { 3252 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3253 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3254 } else { 3255 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3256 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3257 } 3258 PetscFunctionReturn(0); 3259 } 3260 3261 #undef __FUNCT__ 3262 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3263 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3264 { 3265 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3266 3267 PetscFunctionBegin; 3268 *Ad = a->A; 3269 *Ao = a->B; 3270 *colmap = a->garray; 3271 PetscFunctionReturn(0); 3272 } 3273 3274 #undef __FUNCT__ 3275 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3276 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3277 { 3278 PetscErrorCode ierr; 3279 PetscInt i; 3280 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3281 3282 PetscFunctionBegin; 3283 if (coloring->ctype == IS_COLORING_GLOBAL) { 3284 ISColoringValue *allcolors,*colors; 3285 ISColoring ocoloring; 3286 3287 /* set coloring for diagonal portion */ 3288 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3289 3290 /* set coloring for off-diagonal portion */ 3291 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3292 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3293 for (i=0; i<a->B->cmap.n; i++) { 3294 colors[i] = allcolors[a->garray[i]]; 3295 } 3296 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3297 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3298 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3299 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3300 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3301 ISColoringValue *colors; 3302 PetscInt *larray; 3303 ISColoring ocoloring; 3304 3305 /* set coloring for diagonal portion */ 3306 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3307 for (i=0; i<a->A->cmap.n; i++) { 3308 larray[i] = i + A->cmap.rstart; 3309 } 3310 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3311 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3312 for (i=0; i<a->A->cmap.n; i++) { 3313 colors[i] = coloring->colors[larray[i]]; 3314 } 3315 ierr = PetscFree(larray);CHKERRQ(ierr); 3316 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3317 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3318 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3319 3320 /* set coloring for off-diagonal portion */ 3321 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3322 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3323 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3324 for (i=0; i<a->B->cmap.n; i++) { 3325 colors[i] = coloring->colors[larray[i]]; 3326 } 3327 ierr = PetscFree(larray);CHKERRQ(ierr); 3328 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3329 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3330 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3331 } else { 3332 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3333 } 3334 3335 PetscFunctionReturn(0); 3336 } 3337 3338 #if defined(PETSC_HAVE_ADIC) 3339 #undef __FUNCT__ 3340 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3341 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3342 { 3343 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3344 PetscErrorCode ierr; 3345 3346 PetscFunctionBegin; 3347 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3348 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3349 PetscFunctionReturn(0); 3350 } 3351 #endif 3352 3353 #undef __FUNCT__ 3354 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3355 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3356 { 3357 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3358 PetscErrorCode ierr; 3359 3360 PetscFunctionBegin; 3361 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3362 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3363 PetscFunctionReturn(0); 3364 } 3365 3366 #undef __FUNCT__ 3367 #define __FUNCT__ "MatMerge" 3368 /*@C 3369 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3370 matrices from each processor 3371 3372 Collective on MPI_Comm 3373 3374 Input Parameters: 3375 + comm - the communicators the parallel matrix will live on 3376 . inmat - the input sequential matrices 3377 . n - number of local columns (or PETSC_DECIDE) 3378 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3379 3380 Output Parameter: 3381 . outmat - the parallel matrix generated 3382 3383 Level: advanced 3384 3385 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3386 3387 @*/ 3388 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3389 { 3390 PetscErrorCode ierr; 3391 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3392 PetscInt *indx; 3393 PetscScalar *values; 3394 3395 PetscFunctionBegin; 3396 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3397 if (scall == MAT_INITIAL_MATRIX){ 3398 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3399 if (n == PETSC_DECIDE){ 3400 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3401 } 3402 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3403 rstart -= m; 3404 3405 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3406 for (i=0;i<m;i++) { 3407 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3408 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3409 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3410 } 3411 /* This routine will ONLY return MPIAIJ type matrix */ 3412 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3413 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3414 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3415 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3416 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3417 3418 } else if (scall == MAT_REUSE_MATRIX){ 3419 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3420 } else { 3421 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3422 } 3423 3424 for (i=0;i<m;i++) { 3425 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3426 Ii = i + rstart; 3427 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3428 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3429 } 3430 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3431 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3432 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3433 3434 PetscFunctionReturn(0); 3435 } 3436 3437 #undef __FUNCT__ 3438 #define __FUNCT__ "MatFileSplit" 3439 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3440 { 3441 PetscErrorCode ierr; 3442 PetscMPIInt rank; 3443 PetscInt m,N,i,rstart,nnz; 3444 size_t len; 3445 const PetscInt *indx; 3446 PetscViewer out; 3447 char *name; 3448 Mat B; 3449 const PetscScalar *values; 3450 3451 PetscFunctionBegin; 3452 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3453 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3454 /* Should this be the type of the diagonal block of A? */ 3455 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3456 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3457 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3458 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3459 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3460 for (i=0;i<m;i++) { 3461 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3462 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3463 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3464 } 3465 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3466 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3467 3468 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3469 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3470 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3471 sprintf(name,"%s.%d",outfile,rank); 3472 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3473 ierr = PetscFree(name); 3474 ierr = MatView(B,out);CHKERRQ(ierr); 3475 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3476 ierr = MatDestroy(B);CHKERRQ(ierr); 3477 PetscFunctionReturn(0); 3478 } 3479 3480 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3481 #undef __FUNCT__ 3482 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3483 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3484 { 3485 PetscErrorCode ierr; 3486 Mat_Merge_SeqsToMPI *merge; 3487 PetscContainer container; 3488 3489 PetscFunctionBegin; 3490 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3491 if (container) { 3492 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3493 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3494 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3495 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3496 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3497 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3498 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3499 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3500 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3501 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3502 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3503 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3504 3505 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3506 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3507 } 3508 ierr = PetscFree(merge);CHKERRQ(ierr); 3509 3510 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3511 PetscFunctionReturn(0); 3512 } 3513 3514 #include "src/mat/utils/freespace.h" 3515 #include "petscbt.h" 3516 static PetscEvent logkey_seqstompinum = 0; 3517 #undef __FUNCT__ 3518 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3519 /*@C 3520 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3521 matrices from each processor 3522 3523 Collective on MPI_Comm 3524 3525 Input Parameters: 3526 + comm - the communicators the parallel matrix will live on 3527 . seqmat - the input sequential matrices 3528 . m - number of local rows (or PETSC_DECIDE) 3529 . n - number of local columns (or PETSC_DECIDE) 3530 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3531 3532 Output Parameter: 3533 . mpimat - the parallel matrix generated 3534 3535 Level: advanced 3536 3537 Notes: 3538 The dimensions of the sequential matrix in each processor MUST be the same. 3539 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3540 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3541 @*/ 3542 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3543 { 3544 PetscErrorCode ierr; 3545 MPI_Comm comm=mpimat->comm; 3546 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3547 PetscMPIInt size,rank,taga,*len_s; 3548 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3549 PetscInt proc,m; 3550 PetscInt **buf_ri,**buf_rj; 3551 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3552 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3553 MPI_Request *s_waits,*r_waits; 3554 MPI_Status *status; 3555 MatScalar *aa=a->a,**abuf_r,*ba_i; 3556 Mat_Merge_SeqsToMPI *merge; 3557 PetscContainer container; 3558 3559 PetscFunctionBegin; 3560 if (!logkey_seqstompinum) { 3561 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3562 } 3563 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3564 3565 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3566 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3567 3568 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3569 if (container) { 3570 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3571 } 3572 bi = merge->bi; 3573 bj = merge->bj; 3574 buf_ri = merge->buf_ri; 3575 buf_rj = merge->buf_rj; 3576 3577 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3578 owners = merge->rowmap.range; 3579 len_s = merge->len_s; 3580 3581 /* send and recv matrix values */ 3582 /*-----------------------------*/ 3583 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3584 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3585 3586 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3587 for (proc=0,k=0; proc<size; proc++){ 3588 if (!len_s[proc]) continue; 3589 i = owners[proc]; 3590 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3591 k++; 3592 } 3593 3594 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3595 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3596 ierr = PetscFree(status);CHKERRQ(ierr); 3597 3598 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3599 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3600 3601 /* insert mat values of mpimat */ 3602 /*----------------------------*/ 3603 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3604 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3605 nextrow = buf_ri_k + merge->nrecv; 3606 nextai = nextrow + merge->nrecv; 3607 3608 for (k=0; k<merge->nrecv; k++){ 3609 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3610 nrows = *(buf_ri_k[k]); 3611 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3612 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3613 } 3614 3615 /* set values of ba */ 3616 m = merge->rowmap.n; 3617 for (i=0; i<m; i++) { 3618 arow = owners[rank] + i; 3619 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3620 bnzi = bi[i+1] - bi[i]; 3621 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3622 3623 /* add local non-zero vals of this proc's seqmat into ba */ 3624 anzi = ai[arow+1] - ai[arow]; 3625 aj = a->j + ai[arow]; 3626 aa = a->a + ai[arow]; 3627 nextaj = 0; 3628 for (j=0; nextaj<anzi; j++){ 3629 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3630 ba_i[j] += aa[nextaj++]; 3631 } 3632 } 3633 3634 /* add received vals into ba */ 3635 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3636 /* i-th row */ 3637 if (i == *nextrow[k]) { 3638 anzi = *(nextai[k]+1) - *nextai[k]; 3639 aj = buf_rj[k] + *(nextai[k]); 3640 aa = abuf_r[k] + *(nextai[k]); 3641 nextaj = 0; 3642 for (j=0; nextaj<anzi; j++){ 3643 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3644 ba_i[j] += aa[nextaj++]; 3645 } 3646 } 3647 nextrow[k]++; nextai[k]++; 3648 } 3649 } 3650 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3651 } 3652 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3653 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3654 3655 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3656 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3657 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3658 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3659 PetscFunctionReturn(0); 3660 } 3661 3662 static PetscEvent logkey_seqstompisym = 0; 3663 #undef __FUNCT__ 3664 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3665 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3666 { 3667 PetscErrorCode ierr; 3668 Mat B_mpi; 3669 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3670 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3671 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3672 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3673 PetscInt len,proc,*dnz,*onz; 3674 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3675 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3676 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3677 MPI_Status *status; 3678 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3679 PetscBT lnkbt; 3680 Mat_Merge_SeqsToMPI *merge; 3681 PetscContainer container; 3682 3683 PetscFunctionBegin; 3684 if (!logkey_seqstompisym) { 3685 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3686 } 3687 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3688 3689 /* make sure it is a PETSc comm */ 3690 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3691 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3692 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3693 3694 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3695 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3696 3697 /* determine row ownership */ 3698 /*---------------------------------------------------------*/ 3699 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3700 merge->rowmap.n = m; 3701 merge->rowmap.N = M; 3702 merge->rowmap.bs = 1; 3703 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 3704 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3705 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3706 3707 m = merge->rowmap.n; 3708 M = merge->rowmap.N; 3709 owners = merge->rowmap.range; 3710 3711 /* determine the number of messages to send, their lengths */ 3712 /*---------------------------------------------------------*/ 3713 len_s = merge->len_s; 3714 3715 len = 0; /* length of buf_si[] */ 3716 merge->nsend = 0; 3717 for (proc=0; proc<size; proc++){ 3718 len_si[proc] = 0; 3719 if (proc == rank){ 3720 len_s[proc] = 0; 3721 } else { 3722 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3723 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3724 } 3725 if (len_s[proc]) { 3726 merge->nsend++; 3727 nrows = 0; 3728 for (i=owners[proc]; i<owners[proc+1]; i++){ 3729 if (ai[i+1] > ai[i]) nrows++; 3730 } 3731 len_si[proc] = 2*(nrows+1); 3732 len += len_si[proc]; 3733 } 3734 } 3735 3736 /* determine the number and length of messages to receive for ij-structure */ 3737 /*-------------------------------------------------------------------------*/ 3738 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3739 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3740 3741 /* post the Irecv of j-structure */ 3742 /*-------------------------------*/ 3743 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3744 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3745 3746 /* post the Isend of j-structure */ 3747 /*--------------------------------*/ 3748 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3749 sj_waits = si_waits + merge->nsend; 3750 3751 for (proc=0, k=0; proc<size; proc++){ 3752 if (!len_s[proc]) continue; 3753 i = owners[proc]; 3754 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3755 k++; 3756 } 3757 3758 /* receives and sends of j-structure are complete */ 3759 /*------------------------------------------------*/ 3760 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3761 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3762 3763 /* send and recv i-structure */ 3764 /*---------------------------*/ 3765 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3766 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3767 3768 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3769 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3770 for (proc=0,k=0; proc<size; proc++){ 3771 if (!len_s[proc]) continue; 3772 /* form outgoing message for i-structure: 3773 buf_si[0]: nrows to be sent 3774 [1:nrows]: row index (global) 3775 [nrows+1:2*nrows+1]: i-structure index 3776 */ 3777 /*-------------------------------------------*/ 3778 nrows = len_si[proc]/2 - 1; 3779 buf_si_i = buf_si + nrows+1; 3780 buf_si[0] = nrows; 3781 buf_si_i[0] = 0; 3782 nrows = 0; 3783 for (i=owners[proc]; i<owners[proc+1]; i++){ 3784 anzi = ai[i+1] - ai[i]; 3785 if (anzi) { 3786 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3787 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3788 nrows++; 3789 } 3790 } 3791 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3792 k++; 3793 buf_si += len_si[proc]; 3794 } 3795 3796 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3797 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3798 3799 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3800 for (i=0; i<merge->nrecv; i++){ 3801 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3802 } 3803 3804 ierr = PetscFree(len_si);CHKERRQ(ierr); 3805 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3806 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3807 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3808 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3809 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3810 ierr = PetscFree(status);CHKERRQ(ierr); 3811 3812 /* compute a local seq matrix in each processor */ 3813 /*----------------------------------------------*/ 3814 /* allocate bi array and free space for accumulating nonzero column info */ 3815 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3816 bi[0] = 0; 3817 3818 /* create and initialize a linked list */ 3819 nlnk = N+1; 3820 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3821 3822 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3823 len = 0; 3824 len = ai[owners[rank+1]] - ai[owners[rank]]; 3825 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3826 current_space = free_space; 3827 3828 /* determine symbolic info for each local row */ 3829 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3830 nextrow = buf_ri_k + merge->nrecv; 3831 nextai = nextrow + merge->nrecv; 3832 for (k=0; k<merge->nrecv; k++){ 3833 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3834 nrows = *buf_ri_k[k]; 3835 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3836 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3837 } 3838 3839 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3840 len = 0; 3841 for (i=0;i<m;i++) { 3842 bnzi = 0; 3843 /* add local non-zero cols of this proc's seqmat into lnk */ 3844 arow = owners[rank] + i; 3845 anzi = ai[arow+1] - ai[arow]; 3846 aj = a->j + ai[arow]; 3847 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3848 bnzi += nlnk; 3849 /* add received col data into lnk */ 3850 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3851 if (i == *nextrow[k]) { /* i-th row */ 3852 anzi = *(nextai[k]+1) - *nextai[k]; 3853 aj = buf_rj[k] + *nextai[k]; 3854 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3855 bnzi += nlnk; 3856 nextrow[k]++; nextai[k]++; 3857 } 3858 } 3859 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3860 3861 /* if free space is not available, make more free space */ 3862 if (current_space->local_remaining<bnzi) { 3863 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3864 nspacedouble++; 3865 } 3866 /* copy data into free space, then initialize lnk */ 3867 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3868 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3869 3870 current_space->array += bnzi; 3871 current_space->local_used += bnzi; 3872 current_space->local_remaining -= bnzi; 3873 3874 bi[i+1] = bi[i] + bnzi; 3875 } 3876 3877 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3878 3879 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3880 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3881 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3882 3883 /* create symbolic parallel matrix B_mpi */ 3884 /*---------------------------------------*/ 3885 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3886 if (n==PETSC_DECIDE) { 3887 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3888 } else { 3889 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3890 } 3891 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3892 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3893 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3894 3895 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3896 B_mpi->assembled = PETSC_FALSE; 3897 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3898 merge->bi = bi; 3899 merge->bj = bj; 3900 merge->buf_ri = buf_ri; 3901 merge->buf_rj = buf_rj; 3902 merge->coi = PETSC_NULL; 3903 merge->coj = PETSC_NULL; 3904 merge->owners_co = PETSC_NULL; 3905 3906 /* attach the supporting struct to B_mpi for reuse */ 3907 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3908 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 3909 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3910 *mpimat = B_mpi; 3911 3912 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3913 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3914 PetscFunctionReturn(0); 3915 } 3916 3917 static PetscEvent logkey_seqstompi = 0; 3918 #undef __FUNCT__ 3919 #define __FUNCT__ "MatMerge_SeqsToMPI" 3920 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3921 { 3922 PetscErrorCode ierr; 3923 3924 PetscFunctionBegin; 3925 if (!logkey_seqstompi) { 3926 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3927 } 3928 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3929 if (scall == MAT_INITIAL_MATRIX){ 3930 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3931 } 3932 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3933 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3934 PetscFunctionReturn(0); 3935 } 3936 static PetscEvent logkey_getlocalmat = 0; 3937 #undef __FUNCT__ 3938 #define __FUNCT__ "MatGetLocalMat" 3939 /*@C 3940 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3941 3942 Not Collective 3943 3944 Input Parameters: 3945 + A - the matrix 3946 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3947 3948 Output Parameter: 3949 . A_loc - the local sequential matrix generated 3950 3951 Level: developer 3952 3953 @*/ 3954 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3955 { 3956 PetscErrorCode ierr; 3957 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3958 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3959 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3960 PetscScalar *aa=a->a,*ba=b->a,*ca; 3961 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3962 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3963 3964 PetscFunctionBegin; 3965 if (!logkey_getlocalmat) { 3966 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3967 } 3968 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3969 if (scall == MAT_INITIAL_MATRIX){ 3970 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3971 ci[0] = 0; 3972 for (i=0; i<am; i++){ 3973 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3974 } 3975 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3976 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3977 k = 0; 3978 for (i=0; i<am; i++) { 3979 ncols_o = bi[i+1] - bi[i]; 3980 ncols_d = ai[i+1] - ai[i]; 3981 /* off-diagonal portion of A */ 3982 for (jo=0; jo<ncols_o; jo++) { 3983 col = cmap[*bj]; 3984 if (col >= cstart) break; 3985 cj[k] = col; bj++; 3986 ca[k++] = *ba++; 3987 } 3988 /* diagonal portion of A */ 3989 for (j=0; j<ncols_d; j++) { 3990 cj[k] = cstart + *aj++; 3991 ca[k++] = *aa++; 3992 } 3993 /* off-diagonal portion of A */ 3994 for (j=jo; j<ncols_o; j++) { 3995 cj[k] = cmap[*bj++]; 3996 ca[k++] = *ba++; 3997 } 3998 } 3999 /* put together the new matrix */ 4000 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4001 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4002 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4003 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4004 mat->free_a = PETSC_TRUE; 4005 mat->free_ij = PETSC_TRUE; 4006 mat->nonew = 0; 4007 } else if (scall == MAT_REUSE_MATRIX){ 4008 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4009 ci = mat->i; cj = mat->j; ca = mat->a; 4010 for (i=0; i<am; i++) { 4011 /* off-diagonal portion of A */ 4012 ncols_o = bi[i+1] - bi[i]; 4013 for (jo=0; jo<ncols_o; jo++) { 4014 col = cmap[*bj]; 4015 if (col >= cstart) break; 4016 *ca++ = *ba++; bj++; 4017 } 4018 /* diagonal portion of A */ 4019 ncols_d = ai[i+1] - ai[i]; 4020 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 4021 /* off-diagonal portion of A */ 4022 for (j=jo; j<ncols_o; j++) { 4023 *ca++ = *ba++; bj++; 4024 } 4025 } 4026 } else { 4027 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4028 } 4029 4030 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 4031 PetscFunctionReturn(0); 4032 } 4033 4034 static PetscEvent logkey_getlocalmatcondensed = 0; 4035 #undef __FUNCT__ 4036 #define __FUNCT__ "MatGetLocalMatCondensed" 4037 /*@C 4038 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4039 4040 Not Collective 4041 4042 Input Parameters: 4043 + A - the matrix 4044 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4045 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4046 4047 Output Parameter: 4048 . A_loc - the local sequential matrix generated 4049 4050 Level: developer 4051 4052 @*/ 4053 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4054 { 4055 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4056 PetscErrorCode ierr; 4057 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4058 IS isrowa,iscola; 4059 Mat *aloc; 4060 4061 PetscFunctionBegin; 4062 if (!logkey_getlocalmatcondensed) { 4063 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 4064 } 4065 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4066 if (!row){ 4067 start = A->rmap.rstart; end = A->rmap.rend; 4068 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4069 } else { 4070 isrowa = *row; 4071 } 4072 if (!col){ 4073 start = A->cmap.rstart; 4074 cmap = a->garray; 4075 nzA = a->A->cmap.n; 4076 nzB = a->B->cmap.n; 4077 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4078 ncols = 0; 4079 for (i=0; i<nzB; i++) { 4080 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4081 else break; 4082 } 4083 imark = i; 4084 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4085 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4086 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4087 ierr = PetscFree(idx);CHKERRQ(ierr); 4088 } else { 4089 iscola = *col; 4090 } 4091 if (scall != MAT_INITIAL_MATRIX){ 4092 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4093 aloc[0] = *A_loc; 4094 } 4095 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4096 *A_loc = aloc[0]; 4097 ierr = PetscFree(aloc);CHKERRQ(ierr); 4098 if (!row){ 4099 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4100 } 4101 if (!col){ 4102 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4103 } 4104 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4105 PetscFunctionReturn(0); 4106 } 4107 4108 static PetscEvent logkey_GetBrowsOfAcols = 0; 4109 #undef __FUNCT__ 4110 #define __FUNCT__ "MatGetBrowsOfAcols" 4111 /*@C 4112 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4113 4114 Collective on Mat 4115 4116 Input Parameters: 4117 + A,B - the matrices in mpiaij format 4118 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4119 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4120 4121 Output Parameter: 4122 + rowb, colb - index sets of rows and columns of B to extract 4123 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4124 - B_seq - the sequential matrix generated 4125 4126 Level: developer 4127 4128 @*/ 4129 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4130 { 4131 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4132 PetscErrorCode ierr; 4133 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4134 IS isrowb,iscolb; 4135 Mat *bseq; 4136 4137 PetscFunctionBegin; 4138 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4139 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4140 } 4141 if (!logkey_GetBrowsOfAcols) { 4142 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 4143 } 4144 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4145 4146 if (scall == MAT_INITIAL_MATRIX){ 4147 start = A->cmap.rstart; 4148 cmap = a->garray; 4149 nzA = a->A->cmap.n; 4150 nzB = a->B->cmap.n; 4151 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4152 ncols = 0; 4153 for (i=0; i<nzB; i++) { /* row < local row index */ 4154 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4155 else break; 4156 } 4157 imark = i; 4158 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4159 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4160 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4161 ierr = PetscFree(idx);CHKERRQ(ierr); 4162 *brstart = imark; 4163 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4164 } else { 4165 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4166 isrowb = *rowb; iscolb = *colb; 4167 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4168 bseq[0] = *B_seq; 4169 } 4170 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4171 *B_seq = bseq[0]; 4172 ierr = PetscFree(bseq);CHKERRQ(ierr); 4173 if (!rowb){ 4174 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4175 } else { 4176 *rowb = isrowb; 4177 } 4178 if (!colb){ 4179 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4180 } else { 4181 *colb = iscolb; 4182 } 4183 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4184 PetscFunctionReturn(0); 4185 } 4186 4187 static PetscEvent logkey_GetBrowsOfAocols = 0; 4188 #undef __FUNCT__ 4189 #define __FUNCT__ "MatGetBrowsOfAoCols" 4190 /*@C 4191 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4192 of the OFF-DIAGONAL portion of local A 4193 4194 Collective on Mat 4195 4196 Input Parameters: 4197 + A,B - the matrices in mpiaij format 4198 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4199 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4200 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4201 4202 Output Parameter: 4203 + B_oth - the sequential matrix generated 4204 4205 Level: developer 4206 4207 @*/ 4208 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 4209 { 4210 VecScatter_MPI_General *gen_to,*gen_from; 4211 PetscErrorCode ierr; 4212 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4213 Mat_SeqAIJ *b_oth; 4214 VecScatter ctx=a->Mvctx; 4215 MPI_Comm comm=ctx->comm; 4216 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 4217 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4218 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 4219 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4220 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4221 MPI_Status *sstatus,rstatus; 4222 PetscInt *cols,sbs,rbs; 4223 PetscScalar *vals; 4224 4225 PetscFunctionBegin; 4226 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4227 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4228 } 4229 if (!logkey_GetBrowsOfAocols) { 4230 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 4231 } 4232 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4233 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4234 4235 gen_to = (VecScatter_MPI_General*)ctx->todata; 4236 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4237 rvalues = gen_from->values; /* holds the length of receiving row */ 4238 svalues = gen_to->values; /* holds the length of sending row */ 4239 nrecvs = gen_from->n; 4240 nsends = gen_to->n; 4241 4242 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4243 srow = gen_to->indices; /* local row index to be sent */ 4244 sstarts = gen_to->starts; 4245 sprocs = gen_to->procs; 4246 sstatus = gen_to->sstatus; 4247 sbs = gen_to->bs; 4248 rstarts = gen_from->starts; 4249 rprocs = gen_from->procs; 4250 rbs = gen_from->bs; 4251 4252 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4253 if (scall == MAT_INITIAL_MATRIX){ 4254 /* i-array */ 4255 /*---------*/ 4256 /* post receives */ 4257 for (i=0; i<nrecvs; i++){ 4258 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4259 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4260 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4261 } 4262 4263 /* pack the outgoing message */ 4264 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4265 rstartsj = sstartsj + nsends +1; 4266 sstartsj[0] = 0; rstartsj[0] = 0; 4267 len = 0; /* total length of j or a array to be sent */ 4268 k = 0; 4269 for (i=0; i<nsends; i++){ 4270 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4271 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4272 for (j=0; j<nrows; j++) { 4273 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4274 for (l=0; l<sbs; l++){ 4275 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4276 rowlen[j*sbs+l] = ncols; 4277 len += ncols; 4278 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4279 } 4280 k++; 4281 } 4282 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4283 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4284 } 4285 /* recvs and sends of i-array are completed */ 4286 i = nrecvs; 4287 while (i--) { 4288 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4289 } 4290 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4291 4292 /* allocate buffers for sending j and a arrays */ 4293 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4294 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4295 4296 /* create i-array of B_oth */ 4297 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4298 b_othi[0] = 0; 4299 len = 0; /* total length of j or a array to be received */ 4300 k = 0; 4301 for (i=0; i<nrecvs; i++){ 4302 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4303 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4304 for (j=0; j<nrows; j++) { 4305 b_othi[k+1] = b_othi[k] + rowlen[j]; 4306 len += rowlen[j]; k++; 4307 } 4308 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4309 } 4310 4311 /* allocate space for j and a arrrays of B_oth */ 4312 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4313 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 4314 4315 /* j-array */ 4316 /*---------*/ 4317 /* post receives of j-array */ 4318 for (i=0; i<nrecvs; i++){ 4319 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4320 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4321 } 4322 4323 /* pack the outgoing message j-array */ 4324 k = 0; 4325 for (i=0; i<nsends; i++){ 4326 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4327 bufJ = bufj+sstartsj[i]; 4328 for (j=0; j<nrows; j++) { 4329 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4330 for (ll=0; ll<sbs; ll++){ 4331 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4332 for (l=0; l<ncols; l++){ 4333 *bufJ++ = cols[l]; 4334 } 4335 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4336 } 4337 } 4338 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4339 } 4340 4341 /* recvs and sends of j-array are completed */ 4342 i = nrecvs; 4343 while (i--) { 4344 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4345 } 4346 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4347 } else if (scall == MAT_REUSE_MATRIX){ 4348 sstartsj = *startsj; 4349 rstartsj = sstartsj + nsends +1; 4350 bufa = *bufa_ptr; 4351 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4352 b_otha = b_oth->a; 4353 } else { 4354 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4355 } 4356 4357 /* a-array */ 4358 /*---------*/ 4359 /* post receives of a-array */ 4360 for (i=0; i<nrecvs; i++){ 4361 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4362 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4363 } 4364 4365 /* pack the outgoing message a-array */ 4366 k = 0; 4367 for (i=0; i<nsends; i++){ 4368 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4369 bufA = bufa+sstartsj[i]; 4370 for (j=0; j<nrows; j++) { 4371 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4372 for (ll=0; ll<sbs; ll++){ 4373 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4374 for (l=0; l<ncols; l++){ 4375 *bufA++ = vals[l]; 4376 } 4377 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4378 } 4379 } 4380 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4381 } 4382 /* recvs and sends of a-array are completed */ 4383 i = nrecvs; 4384 while (i--) { 4385 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4386 } 4387 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4388 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4389 4390 if (scall == MAT_INITIAL_MATRIX){ 4391 /* put together the new matrix */ 4392 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4393 4394 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4395 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4396 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4397 b_oth->free_a = PETSC_TRUE; 4398 b_oth->free_ij = PETSC_TRUE; 4399 b_oth->nonew = 0; 4400 4401 ierr = PetscFree(bufj);CHKERRQ(ierr); 4402 if (!startsj || !bufa_ptr){ 4403 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4404 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4405 } else { 4406 *startsj = sstartsj; 4407 *bufa_ptr = bufa; 4408 } 4409 } 4410 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4411 PetscFunctionReturn(0); 4412 } 4413 4414 #undef __FUNCT__ 4415 #define __FUNCT__ "MatGetCommunicationStructs" 4416 /*@C 4417 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4418 4419 Not Collective 4420 4421 Input Parameters: 4422 . A - The matrix in mpiaij format 4423 4424 Output Parameter: 4425 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4426 . colmap - A map from global column index to local index into lvec 4427 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4428 4429 Level: developer 4430 4431 @*/ 4432 #if defined (PETSC_USE_CTABLE) 4433 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4434 #else 4435 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4436 #endif 4437 { 4438 Mat_MPIAIJ *a; 4439 4440 PetscFunctionBegin; 4441 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4442 PetscValidPointer(lvec, 2) 4443 PetscValidPointer(colmap, 3) 4444 PetscValidPointer(multScatter, 4) 4445 a = (Mat_MPIAIJ *) A->data; 4446 if (lvec) *lvec = a->lvec; 4447 if (colmap) *colmap = a->colmap; 4448 if (multScatter) *multScatter = a->Mvctx; 4449 PetscFunctionReturn(0); 4450 } 4451 4452 EXTERN_C_BEGIN 4453 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4454 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4455 EXTERN_C_END 4456 4457 /*MC 4458 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4459 4460 Options Database Keys: 4461 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4462 4463 Level: beginner 4464 4465 .seealso: MatCreateMPIAIJ 4466 M*/ 4467 4468 EXTERN_C_BEGIN 4469 #undef __FUNCT__ 4470 #define __FUNCT__ "MatCreate_MPIAIJ" 4471 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4472 { 4473 Mat_MPIAIJ *b; 4474 PetscErrorCode ierr; 4475 PetscMPIInt size; 4476 4477 PetscFunctionBegin; 4478 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4479 4480 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 4481 B->data = (void*)b; 4482 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4483 B->factor = 0; 4484 B->rmap.bs = 1; 4485 B->assembled = PETSC_FALSE; 4486 B->mapping = 0; 4487 4488 B->insertmode = NOT_SET_VALUES; 4489 b->size = size; 4490 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4491 4492 /* build cache for off array entries formed */ 4493 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4494 b->donotstash = PETSC_FALSE; 4495 b->colmap = 0; 4496 b->garray = 0; 4497 b->roworiented = PETSC_TRUE; 4498 4499 /* stuff used for matrix vector multiply */ 4500 b->lvec = PETSC_NULL; 4501 b->Mvctx = PETSC_NULL; 4502 4503 /* stuff for MatGetRow() */ 4504 b->rowindices = 0; 4505 b->rowvalues = 0; 4506 b->getrowactive = PETSC_FALSE; 4507 4508 4509 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4510 "MatStoreValues_MPIAIJ", 4511 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4512 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4513 "MatRetrieveValues_MPIAIJ", 4514 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4515 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4516 "MatGetDiagonalBlock_MPIAIJ", 4517 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4518 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4519 "MatIsTranspose_MPIAIJ", 4520 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4521 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4522 "MatMPIAIJSetPreallocation_MPIAIJ", 4523 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4524 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4525 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4526 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4527 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4528 "MatDiagonalScaleLocal_MPIAIJ", 4529 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4530 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4531 "MatConvert_MPIAIJ_MPICSRPERM", 4532 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4533 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4534 "MatConvert_MPIAIJ_MPICRL", 4535 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4536 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4537 PetscFunctionReturn(0); 4538 } 4539 EXTERN_C_END 4540 4541 #undef __FUNCT__ 4542 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4543 /*@C 4544 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4545 and "off-diagonal" part of the matrix in CSR format. 4546 4547 Collective on MPI_Comm 4548 4549 Input Parameters: 4550 + comm - MPI communicator 4551 . m - number of local rows (Cannot be PETSC_DECIDE) 4552 . n - This value should be the same as the local size used in creating the 4553 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4554 calculated if N is given) For square matrices n is almost always m. 4555 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4556 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4557 . i - row indices for "diagonal" portion of matrix 4558 . j - column indices 4559 . a - matrix values 4560 . oi - row indices for "off-diagonal" portion of matrix 4561 . oj - column indices 4562 - oa - matrix values 4563 4564 Output Parameter: 4565 . mat - the matrix 4566 4567 Level: advanced 4568 4569 Notes: 4570 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4571 4572 The i and j indices are 0 based 4573 4574 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4575 4576 4577 .keywords: matrix, aij, compressed row, sparse, parallel 4578 4579 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4580 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4581 @*/ 4582 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4583 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4584 { 4585 PetscErrorCode ierr; 4586 Mat_MPIAIJ *maij; 4587 4588 PetscFunctionBegin; 4589 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4590 if (i[0]) { 4591 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4592 } 4593 if (oi[0]) { 4594 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4595 } 4596 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4597 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4598 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4599 maij = (Mat_MPIAIJ*) (*mat)->data; 4600 maij->donotstash = PETSC_TRUE; 4601 (*mat)->preallocated = PETSC_TRUE; 4602 4603 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4604 ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr); 4605 ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr); 4606 4607 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4608 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4609 4610 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4611 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4612 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4613 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4614 4615 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4616 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4617 PetscFunctionReturn(0); 4618 } 4619 4620 /* 4621 Special version for direct calls from Fortran 4622 */ 4623 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4624 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4625 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4626 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4627 #endif 4628 4629 /* Change these macros so can be used in void function */ 4630 #undef CHKERRQ 4631 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4632 #undef SETERRQ2 4633 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4634 #undef SETERRQ 4635 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4636 4637 EXTERN_C_BEGIN 4638 #undef __FUNCT__ 4639 #define __FUNCT__ "matsetvaluesmpiaij_" 4640 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4641 { 4642 Mat mat = *mmat; 4643 PetscInt m = *mm, n = *mn; 4644 InsertMode addv = *maddv; 4645 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4646 PetscScalar value; 4647 PetscErrorCode ierr; 4648 4649 MatPreallocated(mat); 4650 if (mat->insertmode == NOT_SET_VALUES) { 4651 mat->insertmode = addv; 4652 } 4653 #if defined(PETSC_USE_DEBUG) 4654 else if (mat->insertmode != addv) { 4655 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4656 } 4657 #endif 4658 { 4659 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4660 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4661 PetscTruth roworiented = aij->roworiented; 4662 4663 /* Some Variables required in the macro */ 4664 Mat A = aij->A; 4665 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4666 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4667 PetscScalar *aa = a->a; 4668 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4669 Mat B = aij->B; 4670 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4671 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4672 PetscScalar *ba = b->a; 4673 4674 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4675 PetscInt nonew = a->nonew; 4676 PetscScalar *ap1,*ap2; 4677 4678 PetscFunctionBegin; 4679 for (i=0; i<m; i++) { 4680 if (im[i] < 0) continue; 4681 #if defined(PETSC_USE_DEBUG) 4682 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4683 #endif 4684 if (im[i] >= rstart && im[i] < rend) { 4685 row = im[i] - rstart; 4686 lastcol1 = -1; 4687 rp1 = aj + ai[row]; 4688 ap1 = aa + ai[row]; 4689 rmax1 = aimax[row]; 4690 nrow1 = ailen[row]; 4691 low1 = 0; 4692 high1 = nrow1; 4693 lastcol2 = -1; 4694 rp2 = bj + bi[row]; 4695 ap2 = ba + bi[row]; 4696 rmax2 = bimax[row]; 4697 nrow2 = bilen[row]; 4698 low2 = 0; 4699 high2 = nrow2; 4700 4701 for (j=0; j<n; j++) { 4702 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4703 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4704 if (in[j] >= cstart && in[j] < cend){ 4705 col = in[j] - cstart; 4706 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4707 } else if (in[j] < 0) continue; 4708 #if defined(PETSC_USE_DEBUG) 4709 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4710 #endif 4711 else { 4712 if (mat->was_assembled) { 4713 if (!aij->colmap) { 4714 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4715 } 4716 #if defined (PETSC_USE_CTABLE) 4717 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4718 col--; 4719 #else 4720 col = aij->colmap[in[j]] - 1; 4721 #endif 4722 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4723 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4724 col = in[j]; 4725 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4726 B = aij->B; 4727 b = (Mat_SeqAIJ*)B->data; 4728 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4729 rp2 = bj + bi[row]; 4730 ap2 = ba + bi[row]; 4731 rmax2 = bimax[row]; 4732 nrow2 = bilen[row]; 4733 low2 = 0; 4734 high2 = nrow2; 4735 bm = aij->B->rmap.n; 4736 ba = b->a; 4737 } 4738 } else col = in[j]; 4739 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4740 } 4741 } 4742 } else { 4743 if (!aij->donotstash) { 4744 if (roworiented) { 4745 if (ignorezeroentries && v[i*n] == 0.0) continue; 4746 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4747 } else { 4748 if (ignorezeroentries && v[i] == 0.0) continue; 4749 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4750 } 4751 } 4752 } 4753 }} 4754 PetscFunctionReturnVoid(); 4755 } 4756 EXTERN_C_END 4757 4758