1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetValues_MPIAIJ" 240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 241 { 242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 243 PetscErrorCode ierr; 244 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 245 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 246 247 PetscFunctionBegin; 248 for (i=0; i<m; i++) { 249 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 250 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 251 if (idxm[i] >= rstart && idxm[i] < rend) { 252 row = idxm[i] - rstart; 253 for (j=0; j<n; j++) { 254 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 255 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 256 if (idxn[j] >= cstart && idxn[j] < cend){ 257 col = idxn[j] - cstart; 258 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 259 } else { 260 if (!aij->colmap) { 261 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262 } 263 #if defined (PETSC_USE_CTABLE) 264 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 265 col --; 266 #else 267 col = aij->colmap[idxn[j]] - 1; 268 #endif 269 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 270 else { 271 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 272 } 273 } 274 } 275 } else { 276 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 277 } 278 } 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 if (aij->donotstash) { 293 PetscFunctionReturn(0); 294 } 295 296 /* make sure all processors are either in INSERTMODE or ADDMODE */ 297 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 298 if (addv == (ADD_VALUES|INSERT_VALUES)) { 299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 300 } 301 mat->insertmode = addv; /* in case this processor had no cache */ 302 303 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 304 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 305 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 312 { 313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 314 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 315 PetscErrorCode ierr; 316 PetscMPIInt n; 317 PetscInt i,j,rstart,ncols,flg; 318 PetscInt *row,*col,other_disassembled; 319 PetscScalar *val; 320 InsertMode addv = mat->insertmode; 321 322 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 358 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 359 } 360 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 361 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 362 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 363 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 364 365 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 366 aij->rowvalues = 0; 367 368 /* used by MatAXPY() */ 369 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 370 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 371 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 378 { 379 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 384 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatZeroRows_MPIAIJ" 390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 391 { 392 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 393 PetscErrorCode ierr; 394 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 395 PetscInt i,*owners = A->rmap.range; 396 PetscInt *nprocs,j,idx,nsends,row; 397 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 398 PetscInt *rvalues,count,base,slen,*source; 399 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 400 MPI_Comm comm = A->comm; 401 MPI_Request *send_waits,*recv_waits; 402 MPI_Status recv_status,*send_status; 403 #if defined(PETSC_DEBUG) 404 PetscTruth found = PETSC_FALSE; 405 #endif 406 407 PetscFunctionBegin; 408 /* first count number of contributors to each processor */ 409 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 410 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 411 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 412 j = 0; 413 for (i=0; i<N; i++) { 414 if (lastidx > (idx = rows[i])) j = 0; 415 lastidx = idx; 416 for (; j<size; j++) { 417 if (idx >= owners[j] && idx < owners[j+1]) { 418 nprocs[2*j]++; 419 nprocs[2*j+1] = 1; 420 owner[i] = j; 421 #if defined(PETSC_DEBUG) 422 found = PETSC_TRUE; 423 #endif 424 break; 425 } 426 } 427 #if defined(PETSC_DEBUG) 428 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 429 found = PETSC_FALSE; 430 #endif 431 } 432 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 433 434 /* inform other processors of number of messages and max length*/ 435 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 436 437 /* post receives: */ 438 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 439 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 440 for (i=0; i<nrecvs; i++) { 441 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 442 } 443 444 /* do sends: 445 1) starts[i] gives the starting index in svalues for stuff going to 446 the ith processor 447 */ 448 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 449 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 450 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 451 starts[0] = 0; 452 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 453 for (i=0; i<N; i++) { 454 svalues[starts[owner[i]]++] = rows[i]; 455 } 456 457 starts[0] = 0; 458 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 459 count = 0; 460 for (i=0; i<size; i++) { 461 if (nprocs[2*i+1]) { 462 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 463 } 464 } 465 ierr = PetscFree(starts);CHKERRQ(ierr); 466 467 base = owners[rank]; 468 469 /* wait on receives */ 470 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 471 source = lens + nrecvs; 472 count = nrecvs; slen = 0; 473 while (count) { 474 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 475 /* unpack receives into our local space */ 476 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 477 source[imdex] = recv_status.MPI_SOURCE; 478 lens[imdex] = n; 479 slen += n; 480 count--; 481 } 482 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 483 484 /* move the data into the send scatter */ 485 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 486 count = 0; 487 for (i=0; i<nrecvs; i++) { 488 values = rvalues + i*nmax; 489 for (j=0; j<lens[i]; j++) { 490 lrows[count++] = values[j] - base; 491 } 492 } 493 ierr = PetscFree(rvalues);CHKERRQ(ierr); 494 ierr = PetscFree(lens);CHKERRQ(ierr); 495 ierr = PetscFree(owner);CHKERRQ(ierr); 496 ierr = PetscFree(nprocs);CHKERRQ(ierr); 497 498 /* actually zap the local rows */ 499 /* 500 Zero the required rows. If the "diagonal block" of the matrix 501 is square and the user wishes to set the diagonal we use separate 502 code so that MatSetValues() is not called for each diagonal allocating 503 new memory, thus calling lots of mallocs and slowing things down. 504 505 Contributed by: Matthew Knepley 506 */ 507 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 508 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 509 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 510 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 511 } else if (diag != 0.0) { 512 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 513 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 514 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 516 } 517 for (i = 0; i < slen; i++) { 518 row = lrows[i] + rstart; 519 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 520 } 521 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 522 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 } else { 524 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 525 } 526 ierr = PetscFree(lrows);CHKERRQ(ierr); 527 528 /* wait on sends */ 529 if (nsends) { 530 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 531 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 532 ierr = PetscFree(send_status);CHKERRQ(ierr); 533 } 534 ierr = PetscFree(send_waits);CHKERRQ(ierr); 535 ierr = PetscFree(svalues);CHKERRQ(ierr); 536 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatMult_MPIAIJ" 542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 543 { 544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 545 PetscErrorCode ierr; 546 PetscInt nt; 547 548 PetscFunctionBegin; 549 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 550 if (nt != A->cmap.n) { 551 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 552 } 553 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 554 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 555 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 556 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatMultAdd_MPIAIJ" 562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 563 { 564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 567 PetscFunctionBegin; 568 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 569 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 570 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 571 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 578 { 579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 580 PetscErrorCode ierr; 581 PetscTruth merged; 582 583 PetscFunctionBegin; 584 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 585 /* do nondiagonal part */ 586 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 587 if (!merged) { 588 /* send it on its way */ 589 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 590 /* do local part */ 591 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 592 /* receive remote parts: note this assumes the values are not actually */ 593 /* added in yy until the next line, */ 594 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 595 } else { 596 /* do local part */ 597 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 598 /* send it on its way */ 599 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 600 /* values actually were received in the Begin() but we need to call this nop */ 601 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 602 } 603 PetscFunctionReturn(0); 604 } 605 606 EXTERN_C_BEGIN 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 610 { 611 MPI_Comm comm; 612 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 613 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 614 IS Me,Notme; 615 PetscErrorCode ierr; 616 PetscInt M,N,first,last,*notme,i; 617 PetscMPIInt size; 618 619 PetscFunctionBegin; 620 621 /* Easy test: symmetric diagonal block */ 622 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 623 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 624 if (!*f) PetscFunctionReturn(0); 625 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 626 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 627 if (size == 1) PetscFunctionReturn(0); 628 629 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 630 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 631 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 632 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 633 for (i=0; i<first; i++) notme[i] = i; 634 for (i=last; i<M; i++) notme[i-last+first] = i; 635 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 636 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 637 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 638 Aoff = Aoffs[0]; 639 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 640 Boff = Boffs[0]; 641 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 642 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 644 ierr = ISDestroy(Me);CHKERRQ(ierr); 645 ierr = ISDestroy(Notme);CHKERRQ(ierr); 646 647 PetscFunctionReturn(0); 648 } 649 EXTERN_C_END 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 654 { 655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 656 PetscErrorCode ierr; 657 658 PetscFunctionBegin; 659 /* do nondiagonal part */ 660 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 661 /* send it on its way */ 662 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 663 /* do local part */ 664 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 665 /* receive remote parts */ 666 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 /* 671 This only works correctly for square matrices where the subblock A->A is the 672 diagonal block 673 */ 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 677 { 678 PetscErrorCode ierr; 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 681 PetscFunctionBegin; 682 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 683 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 684 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 685 } 686 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatScale_MPIAIJ" 692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 693 { 694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 699 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatDestroy_MPIAIJ" 705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 706 { 707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 #if defined(PETSC_USE_LOG) 712 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 713 #endif 714 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 715 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 717 #if defined (PETSC_USE_CTABLE) 718 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 719 #else 720 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 721 #endif 722 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 723 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 724 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 725 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 726 ierr = PetscFree(aij);CHKERRQ(ierr); 727 728 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 729 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatView_MPIAIJ_Binary" 741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 744 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 745 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 746 PetscErrorCode ierr; 747 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 748 int fd; 749 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 750 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 751 PetscScalar *column_values; 752 753 PetscFunctionBegin; 754 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 755 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 756 nz = A->nz + B->nz; 757 if (!rank) { 758 header[0] = MAT_FILE_COOKIE; 759 header[1] = mat->rmap.N; 760 header[2] = mat->cmap.N; 761 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 762 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 763 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 764 /* get largest number of rows any processor has */ 765 rlen = mat->rmap.n; 766 range = mat->rmap.range; 767 for (i=1; i<size; i++) { 768 rlen = PetscMax(rlen,range[i+1] - range[i]); 769 } 770 } else { 771 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 772 rlen = mat->rmap.n; 773 } 774 775 /* load up the local row counts */ 776 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 777 for (i=0; i<mat->rmap.n; i++) { 778 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 779 } 780 781 /* store the row lengths to the file */ 782 if (!rank) { 783 MPI_Status status; 784 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 785 for (i=1; i<size; i++) { 786 rlen = range[i+1] - range[i]; 787 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 788 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 789 } 790 } else { 791 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 792 } 793 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 794 795 /* load up the local column indices */ 796 nzmax = nz; /* )th processor needs space a largest processor needs */ 797 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 798 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 799 cnt = 0; 800 for (i=0; i<mat->rmap.n; i++) { 801 for (j=B->i[i]; j<B->i[i+1]; j++) { 802 if ( (col = garray[B->j[j]]) > cstart) break; 803 column_indices[cnt++] = col; 804 } 805 for (k=A->i[i]; k<A->i[i+1]; k++) { 806 column_indices[cnt++] = A->j[k] + cstart; 807 } 808 for (; j<B->i[i+1]; j++) { 809 column_indices[cnt++] = garray[B->j[j]]; 810 } 811 } 812 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 813 814 /* store the column indices to the file */ 815 if (!rank) { 816 MPI_Status status; 817 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 818 for (i=1; i<size; i++) { 819 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 820 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 821 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 823 } 824 } else { 825 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 826 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 } 828 ierr = PetscFree(column_indices);CHKERRQ(ierr); 829 830 /* load up the local column values */ 831 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 832 cnt = 0; 833 for (i=0; i<mat->rmap.n; i++) { 834 for (j=B->i[i]; j<B->i[i+1]; j++) { 835 if ( garray[B->j[j]] > cstart) break; 836 column_values[cnt++] = B->a[j]; 837 } 838 for (k=A->i[i]; k<A->i[i+1]; k++) { 839 column_values[cnt++] = A->a[k]; 840 } 841 for (; j<B->i[i+1]; j++) { 842 column_values[cnt++] = B->a[j]; 843 } 844 } 845 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 846 847 /* store the column values to the file */ 848 if (!rank) { 849 MPI_Status status; 850 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 851 for (i=1; i<size; i++) { 852 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 853 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 854 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 856 } 857 } else { 858 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 859 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 860 } 861 ierr = PetscFree(column_values);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 868 { 869 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 870 PetscErrorCode ierr; 871 PetscMPIInt rank = aij->rank,size = aij->size; 872 PetscTruth isdraw,iascii,isbinary; 873 PetscViewer sviewer; 874 PetscViewerFormat format; 875 876 PetscFunctionBegin; 877 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 880 if (iascii) { 881 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 882 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 883 MatInfo info; 884 PetscTruth inodes; 885 886 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 887 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 889 if (!inodes) { 890 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 891 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 892 } else { 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 894 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 895 } 896 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 898 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 900 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 901 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } else if (format == PETSC_VIEWER_ASCII_INFO) { 904 PetscInt inodecount,inodelimit,*inodes; 905 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 906 if (inodes) { 907 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 908 } else { 909 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 910 } 911 PetscFunctionReturn(0); 912 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 913 PetscFunctionReturn(0); 914 } 915 } else if (isbinary) { 916 if (size == 1) { 917 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 918 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 919 } else { 920 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 921 } 922 PetscFunctionReturn(0); 923 } else if (isdraw) { 924 PetscDraw draw; 925 PetscTruth isnull; 926 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 927 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 928 } 929 930 if (size == 1) { 931 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 932 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 933 } else { 934 /* assemble the entire matrix onto first processor. */ 935 Mat A; 936 Mat_SeqAIJ *Aloc; 937 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 938 PetscScalar *a; 939 940 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 941 if (!rank) { 942 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 943 } else { 944 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 945 } 946 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 947 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 948 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 949 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 950 951 /* copy over the A part */ 952 Aloc = (Mat_SeqAIJ*)aij->A->data; 953 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 954 row = mat->rmap.rstart; 955 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 956 for (i=0; i<m; i++) { 957 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 958 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 959 } 960 aj = Aloc->j; 961 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 962 963 /* copy over the B part */ 964 Aloc = (Mat_SeqAIJ*)aij->B->data; 965 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 966 row = mat->rmap.rstart; 967 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 968 ct = cols; 969 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 970 for (i=0; i<m; i++) { 971 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 972 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 973 } 974 ierr = PetscFree(ct);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 /* 978 Everyone has to call to draw the matrix since the graphics waits are 979 synchronized across all processors that share the PetscDraw object 980 */ 981 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 982 if (!rank) { 983 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 984 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 985 } 986 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 ierr = MatDestroy(A);CHKERRQ(ierr); 988 } 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatView_MPIAIJ" 994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 995 { 996 PetscErrorCode ierr; 997 PetscTruth iascii,isdraw,issocket,isbinary; 998 999 PetscFunctionBegin; 1000 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1004 if (iascii || isdraw || isbinary || issocket) { 1005 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1006 } else { 1007 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatRelax_MPIAIJ" 1014 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1015 { 1016 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1017 PetscErrorCode ierr; 1018 Vec bb1; 1019 1020 PetscFunctionBegin; 1021 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1022 1023 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1024 1025 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1026 if (flag & SOR_ZERO_INITIAL_GUESS) { 1027 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1028 its--; 1029 } 1030 1031 while (its--) { 1032 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1034 1035 /* update rhs: bb1 = bb - B*x */ 1036 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1037 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1038 1039 /* local sweep */ 1040 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1041 CHKERRQ(ierr); 1042 } 1043 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1044 if (flag & SOR_ZERO_INITIAL_GUESS) { 1045 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1046 its--; 1047 } 1048 while (its--) { 1049 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1050 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1051 1052 /* update rhs: bb1 = bb - B*x */ 1053 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1054 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1055 1056 /* local sweep */ 1057 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1058 CHKERRQ(ierr); 1059 } 1060 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1061 if (flag & SOR_ZERO_INITIAL_GUESS) { 1062 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1063 its--; 1064 } 1065 while (its--) { 1066 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1067 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1068 1069 /* update rhs: bb1 = bb - B*x */ 1070 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1071 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1072 1073 /* local sweep */ 1074 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1075 CHKERRQ(ierr); 1076 } 1077 } else { 1078 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1079 } 1080 1081 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatPermute_MPIAIJ" 1087 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1088 { 1089 MPI_Comm comm,pcomm; 1090 PetscInt first,local_size,nrows,*rows; 1091 int ntids; 1092 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1093 PetscErrorCode ierr; 1094 1095 PetscFunctionBegin; 1096 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1097 /* make a collective version of 'rowp' */ 1098 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1099 if (pcomm==comm) { 1100 crowp = rowp; 1101 } else { 1102 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1103 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1104 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1105 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1106 } 1107 /* collect the global row permutation and invert it */ 1108 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1109 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1110 if (pcomm!=comm) { 1111 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1112 } 1113 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1114 /* get the local target indices */ 1115 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1116 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1117 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1118 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1119 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1121 /* the column permutation is so much easier; 1122 make a local version of 'colp' and invert it */ 1123 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1124 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1125 if (ntids==1) { 1126 lcolp = colp; 1127 } else { 1128 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1129 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1130 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1131 } 1132 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1133 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1134 if (ntids>1) { 1135 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1136 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1137 } 1138 /* now we just get the submatrix */ 1139 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1140 /* clean up */ 1141 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1142 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1148 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1149 { 1150 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1151 Mat A = mat->A,B = mat->B; 1152 PetscErrorCode ierr; 1153 PetscReal isend[5],irecv[5]; 1154 1155 PetscFunctionBegin; 1156 info->block_size = 1.0; 1157 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1158 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1159 isend[3] = info->memory; isend[4] = info->mallocs; 1160 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1161 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1162 isend[3] += info->memory; isend[4] += info->mallocs; 1163 if (flag == MAT_LOCAL) { 1164 info->nz_used = isend[0]; 1165 info->nz_allocated = isend[1]; 1166 info->nz_unneeded = isend[2]; 1167 info->memory = isend[3]; 1168 info->mallocs = isend[4]; 1169 } else if (flag == MAT_GLOBAL_MAX) { 1170 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1171 info->nz_used = irecv[0]; 1172 info->nz_allocated = irecv[1]; 1173 info->nz_unneeded = irecv[2]; 1174 info->memory = irecv[3]; 1175 info->mallocs = irecv[4]; 1176 } else if (flag == MAT_GLOBAL_SUM) { 1177 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1178 info->nz_used = irecv[0]; 1179 info->nz_allocated = irecv[1]; 1180 info->nz_unneeded = irecv[2]; 1181 info->memory = irecv[3]; 1182 info->mallocs = irecv[4]; 1183 } 1184 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1185 info->fill_ratio_needed = 0; 1186 info->factor_mallocs = 0; 1187 info->rows_global = (double)matin->rmap.N; 1188 info->columns_global = (double)matin->cmap.N; 1189 info->rows_local = (double)matin->rmap.n; 1190 info->columns_local = (double)matin->cmap.N; 1191 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatSetOption_MPIAIJ" 1197 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1198 { 1199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 switch (op) { 1204 case MAT_NO_NEW_NONZERO_LOCATIONS: 1205 case MAT_YES_NEW_NONZERO_LOCATIONS: 1206 case MAT_COLUMNS_UNSORTED: 1207 case MAT_COLUMNS_SORTED: 1208 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1209 case MAT_KEEP_ZEROED_ROWS: 1210 case MAT_NEW_NONZERO_LOCATION_ERR: 1211 case MAT_USE_INODES: 1212 case MAT_DO_NOT_USE_INODES: 1213 case MAT_IGNORE_ZERO_ENTRIES: 1214 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1215 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1216 break; 1217 case MAT_ROW_ORIENTED: 1218 a->roworiented = PETSC_TRUE; 1219 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1220 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1221 break; 1222 case MAT_ROWS_SORTED: 1223 case MAT_ROWS_UNSORTED: 1224 case MAT_YES_NEW_DIAGONALS: 1225 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1226 break; 1227 case MAT_COLUMN_ORIENTED: 1228 a->roworiented = PETSC_FALSE; 1229 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1230 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1231 break; 1232 case MAT_IGNORE_OFF_PROC_ENTRIES: 1233 a->donotstash = PETSC_TRUE; 1234 break; 1235 case MAT_NO_NEW_DIAGONALS: 1236 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1237 case MAT_SYMMETRIC: 1238 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1239 break; 1240 case MAT_STRUCTURALLY_SYMMETRIC: 1241 case MAT_HERMITIAN: 1242 case MAT_SYMMETRY_ETERNAL: 1243 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1244 break; 1245 case MAT_NOT_SYMMETRIC: 1246 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1247 case MAT_NOT_HERMITIAN: 1248 case MAT_NOT_SYMMETRY_ETERNAL: 1249 break; 1250 default: 1251 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1252 } 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatGetRow_MPIAIJ" 1258 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1259 { 1260 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1261 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1262 PetscErrorCode ierr; 1263 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1264 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1265 PetscInt *cmap,*idx_p; 1266 1267 PetscFunctionBegin; 1268 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1269 mat->getrowactive = PETSC_TRUE; 1270 1271 if (!mat->rowvalues && (idx || v)) { 1272 /* 1273 allocate enough space to hold information from the longest row. 1274 */ 1275 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1276 PetscInt max = 1,tmp; 1277 for (i=0; i<matin->rmap.n; i++) { 1278 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1279 if (max < tmp) { max = tmp; } 1280 } 1281 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1282 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1283 } 1284 1285 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1286 lrow = row - rstart; 1287 1288 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1289 if (!v) {pvA = 0; pvB = 0;} 1290 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1291 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1292 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1293 nztot = nzA + nzB; 1294 1295 cmap = mat->garray; 1296 if (v || idx) { 1297 if (nztot) { 1298 /* Sort by increasing column numbers, assuming A and B already sorted */ 1299 PetscInt imark = -1; 1300 if (v) { 1301 *v = v_p = mat->rowvalues; 1302 for (i=0; i<nzB; i++) { 1303 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1304 else break; 1305 } 1306 imark = i; 1307 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1308 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1309 } 1310 if (idx) { 1311 *idx = idx_p = mat->rowindices; 1312 if (imark > -1) { 1313 for (i=0; i<imark; i++) { 1314 idx_p[i] = cmap[cworkB[i]]; 1315 } 1316 } else { 1317 for (i=0; i<nzB; i++) { 1318 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1319 else break; 1320 } 1321 imark = i; 1322 } 1323 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1324 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1325 } 1326 } else { 1327 if (idx) *idx = 0; 1328 if (v) *v = 0; 1329 } 1330 } 1331 *nz = nztot; 1332 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1333 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1339 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1340 { 1341 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1342 1343 PetscFunctionBegin; 1344 if (!aij->getrowactive) { 1345 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1346 } 1347 aij->getrowactive = PETSC_FALSE; 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatNorm_MPIAIJ" 1353 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1354 { 1355 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1356 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1357 PetscErrorCode ierr; 1358 PetscInt i,j,cstart = mat->cmap.rstart; 1359 PetscReal sum = 0.0; 1360 PetscScalar *v; 1361 1362 PetscFunctionBegin; 1363 if (aij->size == 1) { 1364 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1365 } else { 1366 if (type == NORM_FROBENIUS) { 1367 v = amat->a; 1368 for (i=0; i<amat->nz; i++) { 1369 #if defined(PETSC_USE_COMPLEX) 1370 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1371 #else 1372 sum += (*v)*(*v); v++; 1373 #endif 1374 } 1375 v = bmat->a; 1376 for (i=0; i<bmat->nz; i++) { 1377 #if defined(PETSC_USE_COMPLEX) 1378 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1379 #else 1380 sum += (*v)*(*v); v++; 1381 #endif 1382 } 1383 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1384 *norm = sqrt(*norm); 1385 } else if (type == NORM_1) { /* max column norm */ 1386 PetscReal *tmp,*tmp2; 1387 PetscInt *jj,*garray = aij->garray; 1388 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1389 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1390 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1391 *norm = 0.0; 1392 v = amat->a; jj = amat->j; 1393 for (j=0; j<amat->nz; j++) { 1394 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1395 } 1396 v = bmat->a; jj = bmat->j; 1397 for (j=0; j<bmat->nz; j++) { 1398 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1399 } 1400 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1401 for (j=0; j<mat->cmap.N; j++) { 1402 if (tmp2[j] > *norm) *norm = tmp2[j]; 1403 } 1404 ierr = PetscFree(tmp);CHKERRQ(ierr); 1405 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1406 } else if (type == NORM_INFINITY) { /* max row norm */ 1407 PetscReal ntemp = 0.0; 1408 for (j=0; j<aij->A->rmap.n; j++) { 1409 v = amat->a + amat->i[j]; 1410 sum = 0.0; 1411 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1412 sum += PetscAbsScalar(*v); v++; 1413 } 1414 v = bmat->a + bmat->i[j]; 1415 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1416 sum += PetscAbsScalar(*v); v++; 1417 } 1418 if (sum > ntemp) ntemp = sum; 1419 } 1420 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1421 } else { 1422 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1423 } 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatTranspose_MPIAIJ" 1430 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1431 { 1432 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1433 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1434 PetscErrorCode ierr; 1435 PetscInt M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1436 Mat B; 1437 PetscScalar *array; 1438 1439 PetscFunctionBegin; 1440 if (!matout && M != N) { 1441 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1442 } 1443 1444 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1445 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1446 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1447 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1448 1449 /* copy over the A part */ 1450 Aloc = (Mat_SeqAIJ*)a->A->data; 1451 m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1452 row = A->rmap.rstart; 1453 for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;} 1454 for (i=0; i<m; i++) { 1455 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1456 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1457 } 1458 aj = Aloc->j; 1459 for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;} 1460 1461 /* copy over the B part */ 1462 Aloc = (Mat_SeqAIJ*)a->B->data; 1463 m = a->B->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1464 row = A->rmap.rstart; 1465 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1466 ct = cols; 1467 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1468 for (i=0; i<m; i++) { 1469 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1470 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1471 } 1472 ierr = PetscFree(ct);CHKERRQ(ierr); 1473 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1474 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1475 if (matout) { 1476 *matout = B; 1477 } else { 1478 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1479 } 1480 PetscFunctionReturn(0); 1481 } 1482 1483 #undef __FUNCT__ 1484 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1485 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1486 { 1487 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1488 Mat a = aij->A,b = aij->B; 1489 PetscErrorCode ierr; 1490 PetscInt s1,s2,s3; 1491 1492 PetscFunctionBegin; 1493 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1494 if (rr) { 1495 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1496 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1497 /* Overlap communication with computation. */ 1498 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1499 } 1500 if (ll) { 1501 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1502 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1503 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1504 } 1505 /* scale the diagonal block */ 1506 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1507 1508 if (rr) { 1509 /* Do a scatter end and then right scale the off-diagonal block */ 1510 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1511 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1512 } 1513 1514 PetscFunctionReturn(0); 1515 } 1516 1517 #undef __FUNCT__ 1518 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1519 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1520 { 1521 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1526 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 #undef __FUNCT__ 1530 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1531 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1532 { 1533 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatEqual_MPIAIJ" 1543 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1544 { 1545 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1546 Mat a,b,c,d; 1547 PetscTruth flg; 1548 PetscErrorCode ierr; 1549 1550 PetscFunctionBegin; 1551 a = matA->A; b = matA->B; 1552 c = matB->A; d = matB->B; 1553 1554 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1555 if (flg) { 1556 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1557 } 1558 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatCopy_MPIAIJ" 1564 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1565 { 1566 PetscErrorCode ierr; 1567 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1568 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1569 1570 PetscFunctionBegin; 1571 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1572 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1573 /* because of the column compression in the off-processor part of the matrix a->B, 1574 the number of columns in a->B and b->B may be different, hence we cannot call 1575 the MatCopy() directly on the two parts. If need be, we can provide a more 1576 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1577 then copying the submatrices */ 1578 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1579 } else { 1580 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1581 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1582 } 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1588 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1589 { 1590 PetscErrorCode ierr; 1591 1592 PetscFunctionBegin; 1593 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1594 PetscFunctionReturn(0); 1595 } 1596 1597 #include "petscblaslapack.h" 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatAXPY_MPIAIJ" 1600 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1601 { 1602 PetscErrorCode ierr; 1603 PetscInt i; 1604 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1605 PetscBLASInt bnz,one=1; 1606 Mat_SeqAIJ *x,*y; 1607 1608 PetscFunctionBegin; 1609 if (str == SAME_NONZERO_PATTERN) { 1610 PetscScalar alpha = a; 1611 x = (Mat_SeqAIJ *)xx->A->data; 1612 y = (Mat_SeqAIJ *)yy->A->data; 1613 bnz = (PetscBLASInt)x->nz; 1614 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1615 x = (Mat_SeqAIJ *)xx->B->data; 1616 y = (Mat_SeqAIJ *)yy->B->data; 1617 bnz = (PetscBLASInt)x->nz; 1618 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1619 } else if (str == SUBSET_NONZERO_PATTERN) { 1620 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1621 1622 x = (Mat_SeqAIJ *)xx->B->data; 1623 y = (Mat_SeqAIJ *)yy->B->data; 1624 if (y->xtoy && y->XtoY != xx->B) { 1625 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1626 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1627 } 1628 if (!y->xtoy) { /* get xtoy */ 1629 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1630 y->XtoY = xx->B; 1631 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1632 } 1633 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1634 } else { 1635 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1636 } 1637 PetscFunctionReturn(0); 1638 } 1639 1640 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1641 1642 #undef __FUNCT__ 1643 #define __FUNCT__ "MatConjugate_MPIAIJ" 1644 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1645 { 1646 #if defined(PETSC_USE_COMPLEX) 1647 PetscErrorCode ierr; 1648 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1649 1650 PetscFunctionBegin; 1651 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1652 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1653 #else 1654 PetscFunctionBegin; 1655 #endif 1656 PetscFunctionReturn(0); 1657 } 1658 1659 #undef __FUNCT__ 1660 #define __FUNCT__ "MatRealPart_MPIAIJ" 1661 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1662 { 1663 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1664 PetscErrorCode ierr; 1665 1666 PetscFunctionBegin; 1667 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1668 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1669 PetscFunctionReturn(0); 1670 } 1671 1672 #undef __FUNCT__ 1673 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1674 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1675 { 1676 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1677 PetscErrorCode ierr; 1678 1679 PetscFunctionBegin; 1680 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1681 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #ifdef PETSC_HAVE_PBGL 1686 1687 #include <boost/parallel/mpi/bsp_process_group.hpp> 1688 #include <boost/graph/distributed/ilu_default_graph.hpp> 1689 #include <boost/graph/distributed/ilu_0_block.hpp> 1690 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1691 #include <boost/graph/distributed/petsc/interface.hpp> 1692 #include <boost/multi_array.hpp> 1693 #include <boost/parallel/distributed_property_map.hpp> 1694 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1697 /* 1698 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1699 */ 1700 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1701 { 1702 namespace petsc = boost::distributed::petsc; 1703 1704 namespace graph_dist = boost::graph::distributed; 1705 using boost::graph::distributed::ilu_default::process_group_type; 1706 using boost::graph::ilu_permuted; 1707 1708 PetscTruth row_identity, col_identity; 1709 PetscContainer c; 1710 PetscInt m, n, M, N; 1711 PetscErrorCode ierr; 1712 1713 PetscFunctionBegin; 1714 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1715 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1716 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1717 if (!row_identity || !col_identity) { 1718 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1719 } 1720 1721 process_group_type pg; 1722 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1723 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1724 lgraph_type& level_graph = *lgraph_p; 1725 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1726 1727 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1728 ilu_permuted(level_graph); 1729 1730 /* put together the new matrix */ 1731 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1732 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1733 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1734 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1735 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1736 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1737 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1738 (*fact)->factor = FACTOR_LU; 1739 1740 ierr = PetscContainerCreate(A->comm, &c); 1741 ierr = PetscContainerSetPointer(c, lgraph_p); 1742 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1743 PetscFunctionReturn(0); 1744 } 1745 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1748 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1749 { 1750 PetscFunctionBegin; 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatSolve_MPIAIJ" 1756 /* 1757 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1758 */ 1759 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1760 { 1761 namespace graph_dist = boost::graph::distributed; 1762 1763 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1764 lgraph_type* lgraph_p; 1765 PetscContainer c; 1766 PetscErrorCode ierr; 1767 1768 PetscFunctionBegin; 1769 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1770 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1771 ierr = VecCopy(b, x); CHKERRQ(ierr); 1772 1773 PetscScalar* array_x; 1774 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1775 PetscInt sx; 1776 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1777 1778 PetscScalar* array_b; 1779 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1780 PetscInt sb; 1781 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1782 1783 lgraph_type& level_graph = *lgraph_p; 1784 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1785 1786 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1787 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1788 ref_x(array_x, boost::extents[num_vertices(graph)]); 1789 1790 typedef boost::iterator_property_map<array_ref_type::iterator, 1791 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1792 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1793 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1794 1795 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1796 1797 PetscFunctionReturn(0); 1798 } 1799 #endif 1800 1801 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1802 PetscInt nzlocal,nsends,nrecvs; 1803 PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j; 1804 PetscScalar *sbuf_a,**rbuf_a; 1805 PetscErrorCode (*MatDestroy)(Mat); 1806 } Mat_Redundant; 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1810 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1811 { 1812 PetscErrorCode ierr; 1813 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1814 PetscInt i; 1815 1816 PetscFunctionBegin; 1817 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1818 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1819 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1820 for (i=0; i<redund->nrecvs; i++){ 1821 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1822 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1823 } 1824 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1825 ierr = PetscFree(redund);CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 1829 #undef __FUNCT__ 1830 #define __FUNCT__ "MatDestroy_MatRedundant" 1831 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1832 { 1833 PetscErrorCode ierr; 1834 PetscContainer container; 1835 Mat_Redundant *redund=PETSC_NULL; 1836 1837 PetscFunctionBegin; 1838 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1839 if (container) { 1840 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1841 } else { 1842 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1843 } 1844 A->ops->destroy = redund->MatDestroy; 1845 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 1846 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 1847 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 1848 PetscFunctionReturn(0); 1849 } 1850 1851 #undef __FUNCT__ 1852 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 1853 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 1854 { 1855 PetscMPIInt rank,size; 1856 MPI_Comm comm=mat->comm; 1857 PetscErrorCode ierr; 1858 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 1859 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 1860 PetscInt *rowrange=mat->rmap.range; 1861 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1862 Mat A=aij->A,B=aij->B,C=*matredundant; 1863 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1864 PetscScalar *sbuf_a; 1865 PetscInt nzlocal=a->nz+b->nz; 1866 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 1867 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 1868 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 1869 PetscScalar *vals,*aworkA,*aworkB; 1870 PetscMPIInt tag1,tag2,tag3,imdex; 1871 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 1872 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 1873 MPI_Status recv_status,*send_status; 1874 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 1875 PetscInt **rbuf_j=PETSC_NULL; 1876 PetscScalar **rbuf_a=PETSC_NULL; 1877 Mat_Redundant *redund=PETSC_NULL; 1878 PetscContainer container; 1879 1880 PetscFunctionBegin; 1881 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1882 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1883 1884 if (reuse == MAT_REUSE_MATRIX) { 1885 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 1886 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 1887 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 1888 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 1889 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1890 if (container) { 1891 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1892 } else { 1893 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1894 } 1895 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 1896 1897 nsends = redund->nsends; 1898 nrecvs = redund->nrecvs; 1899 send_rank = redund->send_rank; recv_rank = send_rank + size; 1900 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 1901 sbuf_j = redund->sbuf_j; 1902 sbuf_a = redund->sbuf_a; 1903 rbuf_j = redund->rbuf_j; 1904 rbuf_a = redund->rbuf_a; 1905 } 1906 1907 if (reuse == MAT_INITIAL_MATRIX){ 1908 PetscMPIInt subrank,subsize; 1909 PetscInt nleftover,np_subcomm; 1910 /* get the destination processors' id send_rank, nsends and nrecvs */ 1911 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 1912 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 1913 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 1914 recv_rank = send_rank + size; 1915 np_subcomm = size/nsubcomm; 1916 nleftover = size - nsubcomm*np_subcomm; 1917 nsends = 0; nrecvs = 0; 1918 for (i=0; i<size; i++){ /* i=rank*/ 1919 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 1920 send_rank[nsends] = i; nsends++; 1921 recv_rank[nrecvs++] = i; 1922 } 1923 } 1924 if (rank >= size - nleftover){/* this proc is a leftover processor */ 1925 i = size-nleftover-1; 1926 j = 0; 1927 while (j < nsubcomm - nleftover){ 1928 send_rank[nsends++] = i; 1929 i--; j++; 1930 } 1931 } 1932 1933 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 1934 for (i=0; i<nleftover; i++){ 1935 recv_rank[nrecvs++] = size-nleftover+i; 1936 } 1937 } 1938 1939 /* allocate sbuf_j, sbuf_a */ 1940 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 1941 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 1942 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 1943 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1944 1945 /* copy mat's local entries into the buffers */ 1946 if (reuse == MAT_INITIAL_MATRIX){ 1947 rownz_max = 0; 1948 rptr = sbuf_j; 1949 cols = sbuf_j + rend-rstart + 1; 1950 vals = sbuf_a; 1951 rptr[0] = 0; 1952 for (i=0; i<rend-rstart; i++){ 1953 row = i + rstart; 1954 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1955 ncols = nzA + nzB; 1956 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1957 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1958 /* load the column indices for this row into cols */ 1959 lwrite = 0; 1960 for (l=0; l<nzB; l++) { 1961 if ((ctmp = bmap[cworkB[l]]) < cstart){ 1962 vals[lwrite] = aworkB[l]; 1963 cols[lwrite++] = ctmp; 1964 } 1965 } 1966 for (l=0; l<nzA; l++){ 1967 vals[lwrite] = aworkA[l]; 1968 cols[lwrite++] = cstart + cworkA[l]; 1969 } 1970 for (l=0; l<nzB; l++) { 1971 if ((ctmp = bmap[cworkB[l]]) >= cend){ 1972 vals[lwrite] = aworkB[l]; 1973 cols[lwrite++] = ctmp; 1974 } 1975 } 1976 vals += ncols; 1977 cols += ncols; 1978 rptr[i+1] = rptr[i] + ncols; 1979 if (rownz_max < ncols) rownz_max = ncols; 1980 } 1981 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 1982 } else { /* only copy matrix values into sbuf_a */ 1983 rptr = sbuf_j; 1984 vals = sbuf_a; 1985 rptr[0] = 0; 1986 for (i=0; i<rend-rstart; i++){ 1987 row = i + rstart; 1988 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1989 ncols = nzA + nzB; 1990 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1991 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1992 lwrite = 0; 1993 for (l=0; l<nzB; l++) { 1994 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 1995 } 1996 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 1997 for (l=0; l<nzB; l++) { 1998 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 1999 } 2000 vals += ncols; 2001 rptr[i+1] = rptr[i] + ncols; 2002 } 2003 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2004 2005 /* send nzlocal to others, and recv other's nzlocal */ 2006 /*--------------------------------------------------*/ 2007 if (reuse == MAT_INITIAL_MATRIX){ 2008 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2009 s_waits2 = s_waits3 + nsends; 2010 s_waits1 = s_waits2 + nsends; 2011 r_waits1 = s_waits1 + nsends; 2012 r_waits2 = r_waits1 + nrecvs; 2013 r_waits3 = r_waits2 + nrecvs; 2014 } else { 2015 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2016 r_waits3 = s_waits3 + nsends; 2017 } 2018 2019 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2020 if (reuse == MAT_INITIAL_MATRIX){ 2021 /* get new tags to keep the communication clean */ 2022 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2023 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2024 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2025 rbuf_nz = sbuf_nz + nsends; 2026 2027 /* post receives of other's nzlocal */ 2028 for (i=0; i<nrecvs; i++){ 2029 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2030 } 2031 /* send nzlocal to others */ 2032 for (i=0; i<nsends; i++){ 2033 sbuf_nz[i] = nzlocal; 2034 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2035 } 2036 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2037 count = nrecvs; 2038 while (count) { 2039 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2040 recv_rank[imdex] = recv_status.MPI_SOURCE; 2041 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2042 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2043 2044 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2045 rbuf_nz[imdex] += i + 2; 2046 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2047 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2048 count--; 2049 } 2050 /* wait on sends of nzlocal */ 2051 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2052 /* send mat->i,j to others, and recv from other's */ 2053 /*------------------------------------------------*/ 2054 for (i=0; i<nsends; i++){ 2055 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2056 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2057 } 2058 /* wait on receives of mat->i,j */ 2059 /*------------------------------*/ 2060 count = nrecvs; 2061 while (count) { 2062 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2063 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2064 count--; 2065 } 2066 /* wait on sends of mat->i,j */ 2067 /*---------------------------*/ 2068 if (nsends) { 2069 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2070 } 2071 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2072 2073 /* post receives, send and receive mat->a */ 2074 /*----------------------------------------*/ 2075 for (imdex=0; imdex<nrecvs; imdex++) { 2076 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2077 } 2078 for (i=0; i<nsends; i++){ 2079 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2080 } 2081 count = nrecvs; 2082 while (count) { 2083 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2084 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2085 count--; 2086 } 2087 if (nsends) { 2088 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2089 } 2090 2091 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2092 2093 /* create redundant matrix */ 2094 /*-------------------------*/ 2095 if (reuse == MAT_INITIAL_MATRIX){ 2096 /* compute rownz_max for preallocation */ 2097 for (imdex=0; imdex<nrecvs; imdex++){ 2098 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2099 rptr = rbuf_j[imdex]; 2100 for (i=0; i<j; i++){ 2101 ncols = rptr[i+1] - rptr[i]; 2102 if (rownz_max < ncols) rownz_max = ncols; 2103 } 2104 } 2105 2106 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2107 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2108 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2109 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2110 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2111 } else { 2112 C = *matredundant; 2113 } 2114 2115 /* insert local matrix entries */ 2116 rptr = sbuf_j; 2117 cols = sbuf_j + rend-rstart + 1; 2118 vals = sbuf_a; 2119 for (i=0; i<rend-rstart; i++){ 2120 row = i + rstart; 2121 ncols = rptr[i+1] - rptr[i]; 2122 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2123 vals += ncols; 2124 cols += ncols; 2125 } 2126 /* insert received matrix entries */ 2127 for (imdex=0; imdex<nrecvs; imdex++){ 2128 rstart = rowrange[recv_rank[imdex]]; 2129 rend = rowrange[recv_rank[imdex]+1]; 2130 rptr = rbuf_j[imdex]; 2131 cols = rbuf_j[imdex] + rend-rstart + 1; 2132 vals = rbuf_a[imdex]; 2133 for (i=0; i<rend-rstart; i++){ 2134 row = i + rstart; 2135 ncols = rptr[i+1] - rptr[i]; 2136 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2137 vals += ncols; 2138 cols += ncols; 2139 } 2140 } 2141 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2142 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2143 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2144 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2145 if (reuse == MAT_INITIAL_MATRIX){ 2146 PetscContainer container; 2147 *matredundant = C; 2148 /* create a supporting struct and attach it to C for reuse */ 2149 ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr); 2150 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2151 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2152 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2153 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2154 2155 redund->nzlocal = nzlocal; 2156 redund->nsends = nsends; 2157 redund->nrecvs = nrecvs; 2158 redund->send_rank = send_rank; 2159 redund->sbuf_nz = sbuf_nz; 2160 redund->sbuf_j = sbuf_j; 2161 redund->sbuf_a = sbuf_a; 2162 redund->rbuf_j = rbuf_j; 2163 redund->rbuf_a = rbuf_a; 2164 2165 redund->MatDestroy = C->ops->destroy; 2166 C->ops->destroy = MatDestroy_MatRedundant; 2167 } 2168 PetscFunctionReturn(0); 2169 } 2170 2171 /* -------------------------------------------------------------------*/ 2172 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2173 MatGetRow_MPIAIJ, 2174 MatRestoreRow_MPIAIJ, 2175 MatMult_MPIAIJ, 2176 /* 4*/ MatMultAdd_MPIAIJ, 2177 MatMultTranspose_MPIAIJ, 2178 MatMultTransposeAdd_MPIAIJ, 2179 #ifdef PETSC_HAVE_PBGL 2180 MatSolve_MPIAIJ, 2181 #else 2182 0, 2183 #endif 2184 0, 2185 0, 2186 /*10*/ 0, 2187 0, 2188 0, 2189 MatRelax_MPIAIJ, 2190 MatTranspose_MPIAIJ, 2191 /*15*/ MatGetInfo_MPIAIJ, 2192 MatEqual_MPIAIJ, 2193 MatGetDiagonal_MPIAIJ, 2194 MatDiagonalScale_MPIAIJ, 2195 MatNorm_MPIAIJ, 2196 /*20*/ MatAssemblyBegin_MPIAIJ, 2197 MatAssemblyEnd_MPIAIJ, 2198 0, 2199 MatSetOption_MPIAIJ, 2200 MatZeroEntries_MPIAIJ, 2201 /*25*/ MatZeroRows_MPIAIJ, 2202 0, 2203 #ifdef PETSC_HAVE_PBGL 2204 MatLUFactorNumeric_MPIAIJ, 2205 #else 2206 0, 2207 #endif 2208 0, 2209 0, 2210 /*30*/ MatSetUpPreallocation_MPIAIJ, 2211 #ifdef PETSC_HAVE_PBGL 2212 MatILUFactorSymbolic_MPIAIJ, 2213 #else 2214 0, 2215 #endif 2216 0, 2217 0, 2218 0, 2219 /*35*/ MatDuplicate_MPIAIJ, 2220 0, 2221 0, 2222 0, 2223 0, 2224 /*40*/ MatAXPY_MPIAIJ, 2225 MatGetSubMatrices_MPIAIJ, 2226 MatIncreaseOverlap_MPIAIJ, 2227 MatGetValues_MPIAIJ, 2228 MatCopy_MPIAIJ, 2229 /*45*/ 0, 2230 MatScale_MPIAIJ, 2231 0, 2232 0, 2233 0, 2234 /*50*/ MatSetBlockSize_MPIAIJ, 2235 0, 2236 0, 2237 0, 2238 0, 2239 /*55*/ MatFDColoringCreate_MPIAIJ, 2240 0, 2241 MatSetUnfactored_MPIAIJ, 2242 MatPermute_MPIAIJ, 2243 0, 2244 /*60*/ MatGetSubMatrix_MPIAIJ, 2245 MatDestroy_MPIAIJ, 2246 MatView_MPIAIJ, 2247 0, 2248 0, 2249 /*65*/ 0, 2250 0, 2251 0, 2252 0, 2253 0, 2254 /*70*/ 0, 2255 0, 2256 MatSetColoring_MPIAIJ, 2257 #if defined(PETSC_HAVE_ADIC) 2258 MatSetValuesAdic_MPIAIJ, 2259 #else 2260 0, 2261 #endif 2262 MatSetValuesAdifor_MPIAIJ, 2263 /*75*/ 0, 2264 0, 2265 0, 2266 0, 2267 0, 2268 /*80*/ 0, 2269 0, 2270 0, 2271 0, 2272 /*84*/ MatLoad_MPIAIJ, 2273 0, 2274 0, 2275 0, 2276 0, 2277 0, 2278 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2279 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2280 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2281 MatPtAP_Basic, 2282 MatPtAPSymbolic_MPIAIJ, 2283 /*95*/ MatPtAPNumeric_MPIAIJ, 2284 0, 2285 0, 2286 0, 2287 0, 2288 /*100*/0, 2289 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2290 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2291 MatConjugate_MPIAIJ, 2292 0, 2293 /*105*/MatSetValuesRow_MPIAIJ, 2294 MatRealPart_MPIAIJ, 2295 MatImaginaryPart_MPIAIJ, 2296 0, 2297 0, 2298 /*110*/0, 2299 MatGetRedundantMatrix_MPIAIJ}; 2300 2301 /* ----------------------------------------------------------------------------------------*/ 2302 2303 EXTERN_C_BEGIN 2304 #undef __FUNCT__ 2305 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2306 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2307 { 2308 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2309 PetscErrorCode ierr; 2310 2311 PetscFunctionBegin; 2312 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2313 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2314 PetscFunctionReturn(0); 2315 } 2316 EXTERN_C_END 2317 2318 EXTERN_C_BEGIN 2319 #undef __FUNCT__ 2320 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2321 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2322 { 2323 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2324 PetscErrorCode ierr; 2325 2326 PetscFunctionBegin; 2327 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2328 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2329 PetscFunctionReturn(0); 2330 } 2331 EXTERN_C_END 2332 2333 #include "petscpc.h" 2334 EXTERN_C_BEGIN 2335 #undef __FUNCT__ 2336 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2337 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2338 { 2339 Mat_MPIAIJ *b; 2340 PetscErrorCode ierr; 2341 PetscInt i; 2342 2343 PetscFunctionBegin; 2344 B->preallocated = PETSC_TRUE; 2345 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2346 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2347 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2348 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2349 2350 B->rmap.bs = B->cmap.bs = 1; 2351 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2352 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2353 if (d_nnz) { 2354 for (i=0; i<B->rmap.n; i++) { 2355 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2356 } 2357 } 2358 if (o_nnz) { 2359 for (i=0; i<B->rmap.n; i++) { 2360 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2361 } 2362 } 2363 b = (Mat_MPIAIJ*)B->data; 2364 2365 /* Explicitly create 2 MATSEQAIJ matrices. */ 2366 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2367 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2368 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2369 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2370 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2371 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2372 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2373 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2374 2375 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2376 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2377 2378 PetscFunctionReturn(0); 2379 } 2380 EXTERN_C_END 2381 2382 #undef __FUNCT__ 2383 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2384 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2385 { 2386 Mat mat; 2387 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 *newmat = 0; 2392 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2393 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2394 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2395 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2396 a = (Mat_MPIAIJ*)mat->data; 2397 2398 mat->factor = matin->factor; 2399 mat->rmap.bs = matin->rmap.bs; 2400 mat->assembled = PETSC_TRUE; 2401 mat->insertmode = NOT_SET_VALUES; 2402 mat->preallocated = PETSC_TRUE; 2403 2404 a->size = oldmat->size; 2405 a->rank = oldmat->rank; 2406 a->donotstash = oldmat->donotstash; 2407 a->roworiented = oldmat->roworiented; 2408 a->rowindices = 0; 2409 a->rowvalues = 0; 2410 a->getrowactive = PETSC_FALSE; 2411 2412 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2413 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2414 2415 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2416 if (oldmat->colmap) { 2417 #if defined (PETSC_USE_CTABLE) 2418 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2419 #else 2420 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2421 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2422 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2423 #endif 2424 } else a->colmap = 0; 2425 if (oldmat->garray) { 2426 PetscInt len; 2427 len = oldmat->B->cmap.n; 2428 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2429 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2430 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2431 } else a->garray = 0; 2432 2433 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2434 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2435 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2436 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2437 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2438 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2439 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2440 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2441 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2442 *newmat = mat; 2443 PetscFunctionReturn(0); 2444 } 2445 2446 #include "petscsys.h" 2447 2448 #undef __FUNCT__ 2449 #define __FUNCT__ "MatLoad_MPIAIJ" 2450 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2451 { 2452 Mat A; 2453 PetscScalar *vals,*svals; 2454 MPI_Comm comm = ((PetscObject)viewer)->comm; 2455 MPI_Status status; 2456 PetscErrorCode ierr; 2457 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2458 PetscInt i,nz,j,rstart,rend,mmax; 2459 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2460 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2461 PetscInt cend,cstart,n,*rowners; 2462 int fd; 2463 2464 PetscFunctionBegin; 2465 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2466 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2467 if (!rank) { 2468 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2469 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2470 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2471 } 2472 2473 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2474 M = header[1]; N = header[2]; 2475 /* determine ownership of all rows */ 2476 m = M/size + ((M % size) > rank); 2477 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2478 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2479 2480 /* First process needs enough room for process with most rows */ 2481 if (!rank) { 2482 mmax = rowners[1]; 2483 for (i=2; i<size; i++) { 2484 mmax = PetscMax(mmax,rowners[i]); 2485 } 2486 } else mmax = m; 2487 2488 rowners[0] = 0; 2489 for (i=2; i<=size; i++) { 2490 rowners[i] += rowners[i-1]; 2491 } 2492 rstart = rowners[rank]; 2493 rend = rowners[rank+1]; 2494 2495 /* distribute row lengths to all processors */ 2496 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2497 if (!rank) { 2498 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2499 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2500 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2501 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2502 for (j=0; j<m; j++) { 2503 procsnz[0] += ourlens[j]; 2504 } 2505 for (i=1; i<size; i++) { 2506 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2507 /* calculate the number of nonzeros on each processor */ 2508 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2509 procsnz[i] += rowlengths[j]; 2510 } 2511 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2512 } 2513 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2514 } else { 2515 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2516 } 2517 2518 if (!rank) { 2519 /* determine max buffer needed and allocate it */ 2520 maxnz = 0; 2521 for (i=0; i<size; i++) { 2522 maxnz = PetscMax(maxnz,procsnz[i]); 2523 } 2524 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2525 2526 /* read in my part of the matrix column indices */ 2527 nz = procsnz[0]; 2528 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2529 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2530 2531 /* read in every one elses and ship off */ 2532 for (i=1; i<size; i++) { 2533 nz = procsnz[i]; 2534 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2535 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2536 } 2537 ierr = PetscFree(cols);CHKERRQ(ierr); 2538 } else { 2539 /* determine buffer space needed for message */ 2540 nz = 0; 2541 for (i=0; i<m; i++) { 2542 nz += ourlens[i]; 2543 } 2544 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2545 2546 /* receive message of column indices*/ 2547 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2548 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2549 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2550 } 2551 2552 /* determine column ownership if matrix is not square */ 2553 if (N != M) { 2554 n = N/size + ((N % size) > rank); 2555 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2556 cstart = cend - n; 2557 } else { 2558 cstart = rstart; 2559 cend = rend; 2560 n = cend - cstart; 2561 } 2562 2563 /* loop over local rows, determining number of off diagonal entries */ 2564 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2565 jj = 0; 2566 for (i=0; i<m; i++) { 2567 for (j=0; j<ourlens[i]; j++) { 2568 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2569 jj++; 2570 } 2571 } 2572 2573 /* create our matrix */ 2574 for (i=0; i<m; i++) { 2575 ourlens[i] -= offlens[i]; 2576 } 2577 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2578 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2579 ierr = MatSetType(A,type);CHKERRQ(ierr); 2580 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2581 2582 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2583 for (i=0; i<m; i++) { 2584 ourlens[i] += offlens[i]; 2585 } 2586 2587 if (!rank) { 2588 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2589 2590 /* read in my part of the matrix numerical values */ 2591 nz = procsnz[0]; 2592 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2593 2594 /* insert into matrix */ 2595 jj = rstart; 2596 smycols = mycols; 2597 svals = vals; 2598 for (i=0; i<m; i++) { 2599 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2600 smycols += ourlens[i]; 2601 svals += ourlens[i]; 2602 jj++; 2603 } 2604 2605 /* read in other processors and ship out */ 2606 for (i=1; i<size; i++) { 2607 nz = procsnz[i]; 2608 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2609 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2610 } 2611 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2612 } else { 2613 /* receive numeric values */ 2614 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2615 2616 /* receive message of values*/ 2617 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2618 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2619 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2620 2621 /* insert into matrix */ 2622 jj = rstart; 2623 smycols = mycols; 2624 svals = vals; 2625 for (i=0; i<m; i++) { 2626 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2627 smycols += ourlens[i]; 2628 svals += ourlens[i]; 2629 jj++; 2630 } 2631 } 2632 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2633 ierr = PetscFree(vals);CHKERRQ(ierr); 2634 ierr = PetscFree(mycols);CHKERRQ(ierr); 2635 ierr = PetscFree(rowners);CHKERRQ(ierr); 2636 2637 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2638 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2639 *newmat = A; 2640 PetscFunctionReturn(0); 2641 } 2642 2643 #undef __FUNCT__ 2644 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2645 /* 2646 Not great since it makes two copies of the submatrix, first an SeqAIJ 2647 in local and then by concatenating the local matrices the end result. 2648 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2649 */ 2650 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2651 { 2652 PetscErrorCode ierr; 2653 PetscMPIInt rank,size; 2654 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2655 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2656 Mat *local,M,Mreuse; 2657 PetscScalar *vwork,*aa; 2658 MPI_Comm comm = mat->comm; 2659 Mat_SeqAIJ *aij; 2660 2661 2662 PetscFunctionBegin; 2663 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2664 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2665 2666 if (call == MAT_REUSE_MATRIX) { 2667 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2668 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2669 local = &Mreuse; 2670 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2671 } else { 2672 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2673 Mreuse = *local; 2674 ierr = PetscFree(local);CHKERRQ(ierr); 2675 } 2676 2677 /* 2678 m - number of local rows 2679 n - number of columns (same on all processors) 2680 rstart - first row in new global matrix generated 2681 */ 2682 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2683 if (call == MAT_INITIAL_MATRIX) { 2684 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2685 ii = aij->i; 2686 jj = aij->j; 2687 2688 /* 2689 Determine the number of non-zeros in the diagonal and off-diagonal 2690 portions of the matrix in order to do correct preallocation 2691 */ 2692 2693 /* first get start and end of "diagonal" columns */ 2694 if (csize == PETSC_DECIDE) { 2695 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2696 if (mglobal == n) { /* square matrix */ 2697 nlocal = m; 2698 } else { 2699 nlocal = n/size + ((n % size) > rank); 2700 } 2701 } else { 2702 nlocal = csize; 2703 } 2704 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2705 rstart = rend - nlocal; 2706 if (rank == size - 1 && rend != n) { 2707 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2708 } 2709 2710 /* next, compute all the lengths */ 2711 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2712 olens = dlens + m; 2713 for (i=0; i<m; i++) { 2714 jend = ii[i+1] - ii[i]; 2715 olen = 0; 2716 dlen = 0; 2717 for (j=0; j<jend; j++) { 2718 if (*jj < rstart || *jj >= rend) olen++; 2719 else dlen++; 2720 jj++; 2721 } 2722 olens[i] = olen; 2723 dlens[i] = dlen; 2724 } 2725 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2726 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2727 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2728 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2729 ierr = PetscFree(dlens);CHKERRQ(ierr); 2730 } else { 2731 PetscInt ml,nl; 2732 2733 M = *newmat; 2734 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2735 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2736 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2737 /* 2738 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2739 rather than the slower MatSetValues(). 2740 */ 2741 M->was_assembled = PETSC_TRUE; 2742 M->assembled = PETSC_FALSE; 2743 } 2744 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2745 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2746 ii = aij->i; 2747 jj = aij->j; 2748 aa = aij->a; 2749 for (i=0; i<m; i++) { 2750 row = rstart + i; 2751 nz = ii[i+1] - ii[i]; 2752 cwork = jj; jj += nz; 2753 vwork = aa; aa += nz; 2754 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2755 } 2756 2757 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2758 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2759 *newmat = M; 2760 2761 /* save submatrix used in processor for next request */ 2762 if (call == MAT_INITIAL_MATRIX) { 2763 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2764 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2765 } 2766 2767 PetscFunctionReturn(0); 2768 } 2769 2770 EXTERN_C_BEGIN 2771 #undef __FUNCT__ 2772 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2773 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2774 { 2775 PetscInt m,cstart, cend,j,nnz,i,d; 2776 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2777 const PetscInt *JJ; 2778 PetscScalar *values; 2779 PetscErrorCode ierr; 2780 2781 PetscFunctionBegin; 2782 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2783 2784 B->rmap.bs = B->cmap.bs = 1; 2785 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2786 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2787 m = B->rmap.n; 2788 cstart = B->cmap.rstart; 2789 cend = B->cmap.rend; 2790 rstart = B->rmap.rstart; 2791 2792 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2793 o_nnz = d_nnz + m; 2794 2795 for (i=0; i<m; i++) { 2796 nnz = Ii[i+1]- Ii[i]; 2797 JJ = J + Ii[i]; 2798 nnz_max = PetscMax(nnz_max,nnz); 2799 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2800 for (j=0; j<nnz; j++) { 2801 if (*JJ >= cstart) break; 2802 JJ++; 2803 } 2804 d = 0; 2805 for (; j<nnz; j++) { 2806 if (*JJ++ >= cend) break; 2807 d++; 2808 } 2809 d_nnz[i] = d; 2810 o_nnz[i] = nnz - d; 2811 } 2812 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2813 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2814 2815 if (v) values = (PetscScalar*)v; 2816 else { 2817 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2818 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2819 } 2820 2821 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2822 for (i=0; i<m; i++) { 2823 ii = i + rstart; 2824 nnz = Ii[i+1]- Ii[i]; 2825 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2826 } 2827 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2828 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2829 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2830 2831 if (!v) { 2832 ierr = PetscFree(values);CHKERRQ(ierr); 2833 } 2834 PetscFunctionReturn(0); 2835 } 2836 EXTERN_C_END 2837 2838 #undef __FUNCT__ 2839 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2840 /*@ 2841 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2842 (the default parallel PETSc format). 2843 2844 Collective on MPI_Comm 2845 2846 Input Parameters: 2847 + B - the matrix 2848 . i - the indices into j for the start of each local row (starts with zero) 2849 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2850 - v - optional values in the matrix 2851 2852 Level: developer 2853 2854 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2855 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2856 2857 .keywords: matrix, aij, compressed row, sparse, parallel 2858 2859 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2860 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2861 @*/ 2862 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2863 { 2864 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2865 2866 PetscFunctionBegin; 2867 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2868 if (f) { 2869 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2870 } 2871 PetscFunctionReturn(0); 2872 } 2873 2874 #undef __FUNCT__ 2875 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2876 /*@C 2877 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2878 (the default parallel PETSc format). For good matrix assembly performance 2879 the user should preallocate the matrix storage by setting the parameters 2880 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2881 performance can be increased by more than a factor of 50. 2882 2883 Collective on MPI_Comm 2884 2885 Input Parameters: 2886 + A - the matrix 2887 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2888 (same value is used for all local rows) 2889 . d_nnz - array containing the number of nonzeros in the various rows of the 2890 DIAGONAL portion of the local submatrix (possibly different for each row) 2891 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2892 The size of this array is equal to the number of local rows, i.e 'm'. 2893 You must leave room for the diagonal entry even if it is zero. 2894 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2895 submatrix (same value is used for all local rows). 2896 - o_nnz - array containing the number of nonzeros in the various rows of the 2897 OFF-DIAGONAL portion of the local submatrix (possibly different for 2898 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2899 structure. The size of this array is equal to the number 2900 of local rows, i.e 'm'. 2901 2902 If the *_nnz parameter is given then the *_nz parameter is ignored 2903 2904 The AIJ format (also called the Yale sparse matrix format or 2905 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2906 storage. The stored row and column indices begin with zero. See the users manual for details. 2907 2908 The parallel matrix is partitioned such that the first m0 rows belong to 2909 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2910 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2911 2912 The DIAGONAL portion of the local submatrix of a processor can be defined 2913 as the submatrix which is obtained by extraction the part corresponding 2914 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2915 first row that belongs to the processor, and r2 is the last row belonging 2916 to the this processor. This is a square mxm matrix. The remaining portion 2917 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2918 2919 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2920 2921 Example usage: 2922 2923 Consider the following 8x8 matrix with 34 non-zero values, that is 2924 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2925 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2926 as follows: 2927 2928 .vb 2929 1 2 0 | 0 3 0 | 0 4 2930 Proc0 0 5 6 | 7 0 0 | 8 0 2931 9 0 10 | 11 0 0 | 12 0 2932 ------------------------------------- 2933 13 0 14 | 15 16 17 | 0 0 2934 Proc1 0 18 0 | 19 20 21 | 0 0 2935 0 0 0 | 22 23 0 | 24 0 2936 ------------------------------------- 2937 Proc2 25 26 27 | 0 0 28 | 29 0 2938 30 0 0 | 31 32 33 | 0 34 2939 .ve 2940 2941 This can be represented as a collection of submatrices as: 2942 2943 .vb 2944 A B C 2945 D E F 2946 G H I 2947 .ve 2948 2949 Where the submatrices A,B,C are owned by proc0, D,E,F are 2950 owned by proc1, G,H,I are owned by proc2. 2951 2952 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2953 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2954 The 'M','N' parameters are 8,8, and have the same values on all procs. 2955 2956 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2957 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2958 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2959 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2960 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2961 matrix, ans [DF] as another SeqAIJ matrix. 2962 2963 When d_nz, o_nz parameters are specified, d_nz storage elements are 2964 allocated for every row of the local diagonal submatrix, and o_nz 2965 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2966 One way to choose d_nz and o_nz is to use the max nonzerors per local 2967 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2968 In this case, the values of d_nz,o_nz are: 2969 .vb 2970 proc0 : dnz = 2, o_nz = 2 2971 proc1 : dnz = 3, o_nz = 2 2972 proc2 : dnz = 1, o_nz = 4 2973 .ve 2974 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2975 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2976 for proc3. i.e we are using 12+15+10=37 storage locations to store 2977 34 values. 2978 2979 When d_nnz, o_nnz parameters are specified, the storage is specified 2980 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2981 In the above case the values for d_nnz,o_nnz are: 2982 .vb 2983 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2984 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2985 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2986 .ve 2987 Here the space allocated is sum of all the above values i.e 34, and 2988 hence pre-allocation is perfect. 2989 2990 Level: intermediate 2991 2992 .keywords: matrix, aij, compressed row, sparse, parallel 2993 2994 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2995 MPIAIJ 2996 @*/ 2997 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2998 { 2999 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3000 3001 PetscFunctionBegin; 3002 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3003 if (f) { 3004 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3005 } 3006 PetscFunctionReturn(0); 3007 } 3008 3009 #undef __FUNCT__ 3010 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3011 /*@C 3012 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3013 CSR format the local rows. 3014 3015 Collective on MPI_Comm 3016 3017 Input Parameters: 3018 + comm - MPI communicator 3019 . m - number of local rows (Cannot be PETSC_DECIDE) 3020 . n - This value should be the same as the local size used in creating the 3021 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3022 calculated if N is given) For square matrices n is almost always m. 3023 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3024 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3025 . i - row indices 3026 . j - column indices 3027 - a - matrix values 3028 3029 Output Parameter: 3030 . mat - the matrix 3031 3032 Level: intermediate 3033 3034 Notes: 3035 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3036 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3037 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3038 3039 The i and j indices are 0 based 3040 3041 .keywords: matrix, aij, compressed row, sparse, parallel 3042 3043 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3044 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3045 @*/ 3046 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3047 { 3048 PetscErrorCode ierr; 3049 3050 PetscFunctionBegin; 3051 if (i[0]) { 3052 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3053 } 3054 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3055 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3056 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3057 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3058 PetscFunctionReturn(0); 3059 } 3060 3061 #undef __FUNCT__ 3062 #define __FUNCT__ "MatCreateMPIAIJ" 3063 /*@C 3064 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3065 (the default parallel PETSc format). For good matrix assembly performance 3066 the user should preallocate the matrix storage by setting the parameters 3067 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3068 performance can be increased by more than a factor of 50. 3069 3070 Collective on MPI_Comm 3071 3072 Input Parameters: 3073 + comm - MPI communicator 3074 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3075 This value should be the same as the local size used in creating the 3076 y vector for the matrix-vector product y = Ax. 3077 . n - This value should be the same as the local size used in creating the 3078 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3079 calculated if N is given) For square matrices n is almost always m. 3080 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3081 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3082 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3083 (same value is used for all local rows) 3084 . d_nnz - array containing the number of nonzeros in the various rows of the 3085 DIAGONAL portion of the local submatrix (possibly different for each row) 3086 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3087 The size of this array is equal to the number of local rows, i.e 'm'. 3088 You must leave room for the diagonal entry even if it is zero. 3089 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3090 submatrix (same value is used for all local rows). 3091 - o_nnz - array containing the number of nonzeros in the various rows of the 3092 OFF-DIAGONAL portion of the local submatrix (possibly different for 3093 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3094 structure. The size of this array is equal to the number 3095 of local rows, i.e 'm'. 3096 3097 Output Parameter: 3098 . A - the matrix 3099 3100 Notes: 3101 If the *_nnz parameter is given then the *_nz parameter is ignored 3102 3103 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3104 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3105 storage requirements for this matrix. 3106 3107 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3108 processor than it must be used on all processors that share the object for 3109 that argument. 3110 3111 The user MUST specify either the local or global matrix dimensions 3112 (possibly both). 3113 3114 The parallel matrix is partitioned such that the first m0 rows belong to 3115 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3116 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3117 3118 The DIAGONAL portion of the local submatrix of a processor can be defined 3119 as the submatrix which is obtained by extraction the part corresponding 3120 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3121 first row that belongs to the processor, and r2 is the last row belonging 3122 to the this processor. This is a square mxm matrix. The remaining portion 3123 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3124 3125 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3126 3127 When calling this routine with a single process communicator, a matrix of 3128 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3129 type of communicator, use the construction mechanism: 3130 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3131 3132 By default, this format uses inodes (identical nodes) when possible. 3133 We search for consecutive rows with the same nonzero structure, thereby 3134 reusing matrix information to achieve increased efficiency. 3135 3136 Options Database Keys: 3137 + -mat_no_inode - Do not use inodes 3138 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3139 - -mat_aij_oneindex - Internally use indexing starting at 1 3140 rather than 0. Note that when calling MatSetValues(), 3141 the user still MUST index entries starting at 0! 3142 3143 3144 Example usage: 3145 3146 Consider the following 8x8 matrix with 34 non-zero values, that is 3147 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3148 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3149 as follows: 3150 3151 .vb 3152 1 2 0 | 0 3 0 | 0 4 3153 Proc0 0 5 6 | 7 0 0 | 8 0 3154 9 0 10 | 11 0 0 | 12 0 3155 ------------------------------------- 3156 13 0 14 | 15 16 17 | 0 0 3157 Proc1 0 18 0 | 19 20 21 | 0 0 3158 0 0 0 | 22 23 0 | 24 0 3159 ------------------------------------- 3160 Proc2 25 26 27 | 0 0 28 | 29 0 3161 30 0 0 | 31 32 33 | 0 34 3162 .ve 3163 3164 This can be represented as a collection of submatrices as: 3165 3166 .vb 3167 A B C 3168 D E F 3169 G H I 3170 .ve 3171 3172 Where the submatrices A,B,C are owned by proc0, D,E,F are 3173 owned by proc1, G,H,I are owned by proc2. 3174 3175 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3176 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3177 The 'M','N' parameters are 8,8, and have the same values on all procs. 3178 3179 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3180 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3181 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3182 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3183 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3184 matrix, ans [DF] as another SeqAIJ matrix. 3185 3186 When d_nz, o_nz parameters are specified, d_nz storage elements are 3187 allocated for every row of the local diagonal submatrix, and o_nz 3188 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3189 One way to choose d_nz and o_nz is to use the max nonzerors per local 3190 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3191 In this case, the values of d_nz,o_nz are: 3192 .vb 3193 proc0 : dnz = 2, o_nz = 2 3194 proc1 : dnz = 3, o_nz = 2 3195 proc2 : dnz = 1, o_nz = 4 3196 .ve 3197 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3198 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3199 for proc3. i.e we are using 12+15+10=37 storage locations to store 3200 34 values. 3201 3202 When d_nnz, o_nnz parameters are specified, the storage is specified 3203 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3204 In the above case the values for d_nnz,o_nnz are: 3205 .vb 3206 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3207 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3208 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3209 .ve 3210 Here the space allocated is sum of all the above values i.e 34, and 3211 hence pre-allocation is perfect. 3212 3213 Level: intermediate 3214 3215 .keywords: matrix, aij, compressed row, sparse, parallel 3216 3217 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3218 MPIAIJ, MatCreateMPIAIJWithArrays() 3219 @*/ 3220 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3221 { 3222 PetscErrorCode ierr; 3223 PetscMPIInt size; 3224 3225 PetscFunctionBegin; 3226 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3227 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3228 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3229 if (size > 1) { 3230 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3231 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3232 } else { 3233 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3234 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3235 } 3236 PetscFunctionReturn(0); 3237 } 3238 3239 #undef __FUNCT__ 3240 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3241 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3242 { 3243 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3244 3245 PetscFunctionBegin; 3246 *Ad = a->A; 3247 *Ao = a->B; 3248 *colmap = a->garray; 3249 PetscFunctionReturn(0); 3250 } 3251 3252 #undef __FUNCT__ 3253 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3254 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3255 { 3256 PetscErrorCode ierr; 3257 PetscInt i; 3258 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3259 3260 PetscFunctionBegin; 3261 if (coloring->ctype == IS_COLORING_GLOBAL) { 3262 ISColoringValue *allcolors,*colors; 3263 ISColoring ocoloring; 3264 3265 /* set coloring for diagonal portion */ 3266 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3267 3268 /* set coloring for off-diagonal portion */ 3269 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3270 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3271 for (i=0; i<a->B->cmap.n; i++) { 3272 colors[i] = allcolors[a->garray[i]]; 3273 } 3274 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3275 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3276 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3277 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3278 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3279 ISColoringValue *colors; 3280 PetscInt *larray; 3281 ISColoring ocoloring; 3282 3283 /* set coloring for diagonal portion */ 3284 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3285 for (i=0; i<a->A->cmap.n; i++) { 3286 larray[i] = i + A->cmap.rstart; 3287 } 3288 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3289 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3290 for (i=0; i<a->A->cmap.n; i++) { 3291 colors[i] = coloring->colors[larray[i]]; 3292 } 3293 ierr = PetscFree(larray);CHKERRQ(ierr); 3294 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3295 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3296 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3297 3298 /* set coloring for off-diagonal portion */ 3299 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3300 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3301 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3302 for (i=0; i<a->B->cmap.n; i++) { 3303 colors[i] = coloring->colors[larray[i]]; 3304 } 3305 ierr = PetscFree(larray);CHKERRQ(ierr); 3306 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3307 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3308 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3309 } else { 3310 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3311 } 3312 3313 PetscFunctionReturn(0); 3314 } 3315 3316 #if defined(PETSC_HAVE_ADIC) 3317 #undef __FUNCT__ 3318 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3319 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3320 { 3321 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3322 PetscErrorCode ierr; 3323 3324 PetscFunctionBegin; 3325 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3326 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3327 PetscFunctionReturn(0); 3328 } 3329 #endif 3330 3331 #undef __FUNCT__ 3332 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3333 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3334 { 3335 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3336 PetscErrorCode ierr; 3337 3338 PetscFunctionBegin; 3339 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3340 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3341 PetscFunctionReturn(0); 3342 } 3343 3344 #undef __FUNCT__ 3345 #define __FUNCT__ "MatMerge" 3346 /*@C 3347 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3348 matrices from each processor 3349 3350 Collective on MPI_Comm 3351 3352 Input Parameters: 3353 + comm - the communicators the parallel matrix will live on 3354 . inmat - the input sequential matrices 3355 . n - number of local columns (or PETSC_DECIDE) 3356 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3357 3358 Output Parameter: 3359 . outmat - the parallel matrix generated 3360 3361 Level: advanced 3362 3363 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3364 3365 @*/ 3366 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3367 { 3368 PetscErrorCode ierr; 3369 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3370 PetscInt *indx; 3371 PetscScalar *values; 3372 3373 PetscFunctionBegin; 3374 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3375 if (scall == MAT_INITIAL_MATRIX){ 3376 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3377 if (n == PETSC_DECIDE){ 3378 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3379 } 3380 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3381 rstart -= m; 3382 3383 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3384 for (i=0;i<m;i++) { 3385 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3386 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3387 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3388 } 3389 /* This routine will ONLY return MPIAIJ type matrix */ 3390 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3391 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3392 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3393 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3394 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3395 3396 } else if (scall == MAT_REUSE_MATRIX){ 3397 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3398 } else { 3399 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3400 } 3401 3402 for (i=0;i<m;i++) { 3403 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3404 Ii = i + rstart; 3405 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3406 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3407 } 3408 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3409 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3410 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3411 3412 PetscFunctionReturn(0); 3413 } 3414 3415 #undef __FUNCT__ 3416 #define __FUNCT__ "MatFileSplit" 3417 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3418 { 3419 PetscErrorCode ierr; 3420 PetscMPIInt rank; 3421 PetscInt m,N,i,rstart,nnz; 3422 size_t len; 3423 const PetscInt *indx; 3424 PetscViewer out; 3425 char *name; 3426 Mat B; 3427 const PetscScalar *values; 3428 3429 PetscFunctionBegin; 3430 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3431 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3432 /* Should this be the type of the diagonal block of A? */ 3433 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3434 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3435 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3436 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3437 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3438 for (i=0;i<m;i++) { 3439 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3440 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3441 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3442 } 3443 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3444 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3445 3446 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3447 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3448 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3449 sprintf(name,"%s.%d",outfile,rank); 3450 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3451 ierr = PetscFree(name); 3452 ierr = MatView(B,out);CHKERRQ(ierr); 3453 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3454 ierr = MatDestroy(B);CHKERRQ(ierr); 3455 PetscFunctionReturn(0); 3456 } 3457 3458 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3459 #undef __FUNCT__ 3460 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3461 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3462 { 3463 PetscErrorCode ierr; 3464 Mat_Merge_SeqsToMPI *merge; 3465 PetscContainer container; 3466 3467 PetscFunctionBegin; 3468 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3469 if (container) { 3470 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3471 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3472 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3473 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3474 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3475 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3476 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3477 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3478 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3479 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3480 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3481 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3482 3483 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3484 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3485 } 3486 ierr = PetscFree(merge);CHKERRQ(ierr); 3487 3488 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3489 PetscFunctionReturn(0); 3490 } 3491 3492 #include "src/mat/utils/freespace.h" 3493 #include "petscbt.h" 3494 static PetscEvent logkey_seqstompinum = 0; 3495 #undef __FUNCT__ 3496 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3497 /*@C 3498 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3499 matrices from each processor 3500 3501 Collective on MPI_Comm 3502 3503 Input Parameters: 3504 + comm - the communicators the parallel matrix will live on 3505 . seqmat - the input sequential matrices 3506 . m - number of local rows (or PETSC_DECIDE) 3507 . n - number of local columns (or PETSC_DECIDE) 3508 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3509 3510 Output Parameter: 3511 . mpimat - the parallel matrix generated 3512 3513 Level: advanced 3514 3515 Notes: 3516 The dimensions of the sequential matrix in each processor MUST be the same. 3517 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3518 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3519 @*/ 3520 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3521 { 3522 PetscErrorCode ierr; 3523 MPI_Comm comm=mpimat->comm; 3524 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3525 PetscMPIInt size,rank,taga,*len_s; 3526 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3527 PetscInt proc,m; 3528 PetscInt **buf_ri,**buf_rj; 3529 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3530 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3531 MPI_Request *s_waits,*r_waits; 3532 MPI_Status *status; 3533 MatScalar *aa=a->a,**abuf_r,*ba_i; 3534 Mat_Merge_SeqsToMPI *merge; 3535 PetscContainer container; 3536 3537 PetscFunctionBegin; 3538 if (!logkey_seqstompinum) { 3539 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3540 } 3541 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3542 3543 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3544 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3545 3546 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3547 if (container) { 3548 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3549 } 3550 bi = merge->bi; 3551 bj = merge->bj; 3552 buf_ri = merge->buf_ri; 3553 buf_rj = merge->buf_rj; 3554 3555 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3556 owners = merge->rowmap.range; 3557 len_s = merge->len_s; 3558 3559 /* send and recv matrix values */ 3560 /*-----------------------------*/ 3561 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3562 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3563 3564 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3565 for (proc=0,k=0; proc<size; proc++){ 3566 if (!len_s[proc]) continue; 3567 i = owners[proc]; 3568 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3569 k++; 3570 } 3571 3572 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3573 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3574 ierr = PetscFree(status);CHKERRQ(ierr); 3575 3576 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3577 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3578 3579 /* insert mat values of mpimat */ 3580 /*----------------------------*/ 3581 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3582 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3583 nextrow = buf_ri_k + merge->nrecv; 3584 nextai = nextrow + merge->nrecv; 3585 3586 for (k=0; k<merge->nrecv; k++){ 3587 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3588 nrows = *(buf_ri_k[k]); 3589 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3590 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3591 } 3592 3593 /* set values of ba */ 3594 m = merge->rowmap.n; 3595 for (i=0; i<m; i++) { 3596 arow = owners[rank] + i; 3597 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3598 bnzi = bi[i+1] - bi[i]; 3599 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3600 3601 /* add local non-zero vals of this proc's seqmat into ba */ 3602 anzi = ai[arow+1] - ai[arow]; 3603 aj = a->j + ai[arow]; 3604 aa = a->a + ai[arow]; 3605 nextaj = 0; 3606 for (j=0; nextaj<anzi; j++){ 3607 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3608 ba_i[j] += aa[nextaj++]; 3609 } 3610 } 3611 3612 /* add received vals into ba */ 3613 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3614 /* i-th row */ 3615 if (i == *nextrow[k]) { 3616 anzi = *(nextai[k]+1) - *nextai[k]; 3617 aj = buf_rj[k] + *(nextai[k]); 3618 aa = abuf_r[k] + *(nextai[k]); 3619 nextaj = 0; 3620 for (j=0; nextaj<anzi; j++){ 3621 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3622 ba_i[j] += aa[nextaj++]; 3623 } 3624 } 3625 nextrow[k]++; nextai[k]++; 3626 } 3627 } 3628 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3629 } 3630 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3631 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3632 3633 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3634 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3635 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3636 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3637 PetscFunctionReturn(0); 3638 } 3639 3640 static PetscEvent logkey_seqstompisym = 0; 3641 #undef __FUNCT__ 3642 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3643 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3644 { 3645 PetscErrorCode ierr; 3646 Mat B_mpi; 3647 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3648 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3649 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3650 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3651 PetscInt len,proc,*dnz,*onz; 3652 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3653 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3654 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3655 MPI_Status *status; 3656 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3657 PetscBT lnkbt; 3658 Mat_Merge_SeqsToMPI *merge; 3659 PetscContainer container; 3660 3661 PetscFunctionBegin; 3662 if (!logkey_seqstompisym) { 3663 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3664 } 3665 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3666 3667 /* make sure it is a PETSc comm */ 3668 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3669 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3670 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3671 3672 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3673 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3674 3675 /* determine row ownership */ 3676 /*---------------------------------------------------------*/ 3677 merge->rowmap.n = m; 3678 merge->rowmap.N = M; 3679 merge->rowmap.bs = 1; 3680 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3681 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3682 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3683 3684 m = merge->rowmap.n; 3685 M = merge->rowmap.N; 3686 owners = merge->rowmap.range; 3687 3688 /* determine the number of messages to send, their lengths */ 3689 /*---------------------------------------------------------*/ 3690 len_s = merge->len_s; 3691 3692 len = 0; /* length of buf_si[] */ 3693 merge->nsend = 0; 3694 for (proc=0; proc<size; proc++){ 3695 len_si[proc] = 0; 3696 if (proc == rank){ 3697 len_s[proc] = 0; 3698 } else { 3699 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3700 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3701 } 3702 if (len_s[proc]) { 3703 merge->nsend++; 3704 nrows = 0; 3705 for (i=owners[proc]; i<owners[proc+1]; i++){ 3706 if (ai[i+1] > ai[i]) nrows++; 3707 } 3708 len_si[proc] = 2*(nrows+1); 3709 len += len_si[proc]; 3710 } 3711 } 3712 3713 /* determine the number and length of messages to receive for ij-structure */ 3714 /*-------------------------------------------------------------------------*/ 3715 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3716 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3717 3718 /* post the Irecv of j-structure */ 3719 /*-------------------------------*/ 3720 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3721 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3722 3723 /* post the Isend of j-structure */ 3724 /*--------------------------------*/ 3725 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3726 sj_waits = si_waits + merge->nsend; 3727 3728 for (proc=0, k=0; proc<size; proc++){ 3729 if (!len_s[proc]) continue; 3730 i = owners[proc]; 3731 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3732 k++; 3733 } 3734 3735 /* receives and sends of j-structure are complete */ 3736 /*------------------------------------------------*/ 3737 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3738 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3739 3740 /* send and recv i-structure */ 3741 /*---------------------------*/ 3742 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3743 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3744 3745 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3746 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3747 for (proc=0,k=0; proc<size; proc++){ 3748 if (!len_s[proc]) continue; 3749 /* form outgoing message for i-structure: 3750 buf_si[0]: nrows to be sent 3751 [1:nrows]: row index (global) 3752 [nrows+1:2*nrows+1]: i-structure index 3753 */ 3754 /*-------------------------------------------*/ 3755 nrows = len_si[proc]/2 - 1; 3756 buf_si_i = buf_si + nrows+1; 3757 buf_si[0] = nrows; 3758 buf_si_i[0] = 0; 3759 nrows = 0; 3760 for (i=owners[proc]; i<owners[proc+1]; i++){ 3761 anzi = ai[i+1] - ai[i]; 3762 if (anzi) { 3763 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3764 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3765 nrows++; 3766 } 3767 } 3768 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3769 k++; 3770 buf_si += len_si[proc]; 3771 } 3772 3773 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3774 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3775 3776 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3777 for (i=0; i<merge->nrecv; i++){ 3778 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3779 } 3780 3781 ierr = PetscFree(len_si);CHKERRQ(ierr); 3782 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3783 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3784 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3785 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3786 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3787 ierr = PetscFree(status);CHKERRQ(ierr); 3788 3789 /* compute a local seq matrix in each processor */ 3790 /*----------------------------------------------*/ 3791 /* allocate bi array and free space for accumulating nonzero column info */ 3792 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3793 bi[0] = 0; 3794 3795 /* create and initialize a linked list */ 3796 nlnk = N+1; 3797 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3798 3799 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3800 len = 0; 3801 len = ai[owners[rank+1]] - ai[owners[rank]]; 3802 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3803 current_space = free_space; 3804 3805 /* determine symbolic info for each local row */ 3806 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3807 nextrow = buf_ri_k + merge->nrecv; 3808 nextai = nextrow + merge->nrecv; 3809 for (k=0; k<merge->nrecv; k++){ 3810 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3811 nrows = *buf_ri_k[k]; 3812 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3813 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3814 } 3815 3816 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3817 len = 0; 3818 for (i=0;i<m;i++) { 3819 bnzi = 0; 3820 /* add local non-zero cols of this proc's seqmat into lnk */ 3821 arow = owners[rank] + i; 3822 anzi = ai[arow+1] - ai[arow]; 3823 aj = a->j + ai[arow]; 3824 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3825 bnzi += nlnk; 3826 /* add received col data into lnk */ 3827 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3828 if (i == *nextrow[k]) { /* i-th row */ 3829 anzi = *(nextai[k]+1) - *nextai[k]; 3830 aj = buf_rj[k] + *nextai[k]; 3831 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3832 bnzi += nlnk; 3833 nextrow[k]++; nextai[k]++; 3834 } 3835 } 3836 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3837 3838 /* if free space is not available, make more free space */ 3839 if (current_space->local_remaining<bnzi) { 3840 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3841 nspacedouble++; 3842 } 3843 /* copy data into free space, then initialize lnk */ 3844 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3845 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3846 3847 current_space->array += bnzi; 3848 current_space->local_used += bnzi; 3849 current_space->local_remaining -= bnzi; 3850 3851 bi[i+1] = bi[i] + bnzi; 3852 } 3853 3854 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3855 3856 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3857 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3858 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3859 3860 /* create symbolic parallel matrix B_mpi */ 3861 /*---------------------------------------*/ 3862 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3863 if (n==PETSC_DECIDE) { 3864 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3865 } else { 3866 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3867 } 3868 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3869 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3870 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3871 3872 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3873 B_mpi->assembled = PETSC_FALSE; 3874 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3875 merge->bi = bi; 3876 merge->bj = bj; 3877 merge->buf_ri = buf_ri; 3878 merge->buf_rj = buf_rj; 3879 merge->coi = PETSC_NULL; 3880 merge->coj = PETSC_NULL; 3881 merge->owners_co = PETSC_NULL; 3882 3883 /* attach the supporting struct to B_mpi for reuse */ 3884 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3885 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 3886 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3887 *mpimat = B_mpi; 3888 3889 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3890 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3891 PetscFunctionReturn(0); 3892 } 3893 3894 static PetscEvent logkey_seqstompi = 0; 3895 #undef __FUNCT__ 3896 #define __FUNCT__ "MatMerge_SeqsToMPI" 3897 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3898 { 3899 PetscErrorCode ierr; 3900 3901 PetscFunctionBegin; 3902 if (!logkey_seqstompi) { 3903 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3904 } 3905 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3906 if (scall == MAT_INITIAL_MATRIX){ 3907 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3908 } 3909 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3910 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3911 PetscFunctionReturn(0); 3912 } 3913 static PetscEvent logkey_getlocalmat = 0; 3914 #undef __FUNCT__ 3915 #define __FUNCT__ "MatGetLocalMat" 3916 /*@C 3917 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3918 3919 Not Collective 3920 3921 Input Parameters: 3922 + A - the matrix 3923 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3924 3925 Output Parameter: 3926 . A_loc - the local sequential matrix generated 3927 3928 Level: developer 3929 3930 @*/ 3931 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3932 { 3933 PetscErrorCode ierr; 3934 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3935 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3936 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3937 PetscScalar *aa=a->a,*ba=b->a,*ca; 3938 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3939 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3940 3941 PetscFunctionBegin; 3942 if (!logkey_getlocalmat) { 3943 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3944 } 3945 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3946 if (scall == MAT_INITIAL_MATRIX){ 3947 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3948 ci[0] = 0; 3949 for (i=0; i<am; i++){ 3950 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3951 } 3952 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3953 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3954 k = 0; 3955 for (i=0; i<am; i++) { 3956 ncols_o = bi[i+1] - bi[i]; 3957 ncols_d = ai[i+1] - ai[i]; 3958 /* off-diagonal portion of A */ 3959 for (jo=0; jo<ncols_o; jo++) { 3960 col = cmap[*bj]; 3961 if (col >= cstart) break; 3962 cj[k] = col; bj++; 3963 ca[k++] = *ba++; 3964 } 3965 /* diagonal portion of A */ 3966 for (j=0; j<ncols_d; j++) { 3967 cj[k] = cstart + *aj++; 3968 ca[k++] = *aa++; 3969 } 3970 /* off-diagonal portion of A */ 3971 for (j=jo; j<ncols_o; j++) { 3972 cj[k] = cmap[*bj++]; 3973 ca[k++] = *ba++; 3974 } 3975 } 3976 /* put together the new matrix */ 3977 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3978 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3979 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3980 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3981 mat->free_a = PETSC_TRUE; 3982 mat->free_ij = PETSC_TRUE; 3983 mat->nonew = 0; 3984 } else if (scall == MAT_REUSE_MATRIX){ 3985 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3986 ci = mat->i; cj = mat->j; ca = mat->a; 3987 for (i=0; i<am; i++) { 3988 /* off-diagonal portion of A */ 3989 ncols_o = bi[i+1] - bi[i]; 3990 for (jo=0; jo<ncols_o; jo++) { 3991 col = cmap[*bj]; 3992 if (col >= cstart) break; 3993 *ca++ = *ba++; bj++; 3994 } 3995 /* diagonal portion of A */ 3996 ncols_d = ai[i+1] - ai[i]; 3997 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3998 /* off-diagonal portion of A */ 3999 for (j=jo; j<ncols_o; j++) { 4000 *ca++ = *ba++; bj++; 4001 } 4002 } 4003 } else { 4004 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4005 } 4006 4007 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 4008 PetscFunctionReturn(0); 4009 } 4010 4011 static PetscEvent logkey_getlocalmatcondensed = 0; 4012 #undef __FUNCT__ 4013 #define __FUNCT__ "MatGetLocalMatCondensed" 4014 /*@C 4015 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4016 4017 Not Collective 4018 4019 Input Parameters: 4020 + A - the matrix 4021 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4022 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4023 4024 Output Parameter: 4025 . A_loc - the local sequential matrix generated 4026 4027 Level: developer 4028 4029 @*/ 4030 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4031 { 4032 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4033 PetscErrorCode ierr; 4034 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4035 IS isrowa,iscola; 4036 Mat *aloc; 4037 4038 PetscFunctionBegin; 4039 if (!logkey_getlocalmatcondensed) { 4040 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 4041 } 4042 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4043 if (!row){ 4044 start = A->rmap.rstart; end = A->rmap.rend; 4045 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4046 } else { 4047 isrowa = *row; 4048 } 4049 if (!col){ 4050 start = A->cmap.rstart; 4051 cmap = a->garray; 4052 nzA = a->A->cmap.n; 4053 nzB = a->B->cmap.n; 4054 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4055 ncols = 0; 4056 for (i=0; i<nzB; i++) { 4057 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4058 else break; 4059 } 4060 imark = i; 4061 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4062 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4063 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4064 ierr = PetscFree(idx);CHKERRQ(ierr); 4065 } else { 4066 iscola = *col; 4067 } 4068 if (scall != MAT_INITIAL_MATRIX){ 4069 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4070 aloc[0] = *A_loc; 4071 } 4072 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4073 *A_loc = aloc[0]; 4074 ierr = PetscFree(aloc);CHKERRQ(ierr); 4075 if (!row){ 4076 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4077 } 4078 if (!col){ 4079 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4080 } 4081 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4082 PetscFunctionReturn(0); 4083 } 4084 4085 static PetscEvent logkey_GetBrowsOfAcols = 0; 4086 #undef __FUNCT__ 4087 #define __FUNCT__ "MatGetBrowsOfAcols" 4088 /*@C 4089 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4090 4091 Collective on Mat 4092 4093 Input Parameters: 4094 + A,B - the matrices in mpiaij format 4095 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4096 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4097 4098 Output Parameter: 4099 + rowb, colb - index sets of rows and columns of B to extract 4100 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4101 - B_seq - the sequential matrix generated 4102 4103 Level: developer 4104 4105 @*/ 4106 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4107 { 4108 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4109 PetscErrorCode ierr; 4110 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4111 IS isrowb,iscolb; 4112 Mat *bseq; 4113 4114 PetscFunctionBegin; 4115 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4116 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4117 } 4118 if (!logkey_GetBrowsOfAcols) { 4119 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 4120 } 4121 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4122 4123 if (scall == MAT_INITIAL_MATRIX){ 4124 start = A->cmap.rstart; 4125 cmap = a->garray; 4126 nzA = a->A->cmap.n; 4127 nzB = a->B->cmap.n; 4128 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4129 ncols = 0; 4130 for (i=0; i<nzB; i++) { /* row < local row index */ 4131 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4132 else break; 4133 } 4134 imark = i; 4135 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4136 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4137 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4138 ierr = PetscFree(idx);CHKERRQ(ierr); 4139 *brstart = imark; 4140 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4141 } else { 4142 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4143 isrowb = *rowb; iscolb = *colb; 4144 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4145 bseq[0] = *B_seq; 4146 } 4147 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4148 *B_seq = bseq[0]; 4149 ierr = PetscFree(bseq);CHKERRQ(ierr); 4150 if (!rowb){ 4151 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4152 } else { 4153 *rowb = isrowb; 4154 } 4155 if (!colb){ 4156 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4157 } else { 4158 *colb = iscolb; 4159 } 4160 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4161 PetscFunctionReturn(0); 4162 } 4163 4164 static PetscEvent logkey_GetBrowsOfAocols = 0; 4165 #undef __FUNCT__ 4166 #define __FUNCT__ "MatGetBrowsOfAoCols" 4167 /*@C 4168 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4169 of the OFF-DIAGONAL portion of local A 4170 4171 Collective on Mat 4172 4173 Input Parameters: 4174 + A,B - the matrices in mpiaij format 4175 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4176 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4177 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4178 4179 Output Parameter: 4180 + B_oth - the sequential matrix generated 4181 4182 Level: developer 4183 4184 @*/ 4185 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 4186 { 4187 VecScatter_MPI_General *gen_to,*gen_from; 4188 PetscErrorCode ierr; 4189 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4190 Mat_SeqAIJ *b_oth; 4191 VecScatter ctx=a->Mvctx; 4192 MPI_Comm comm=ctx->comm; 4193 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 4194 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4195 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 4196 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4197 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4198 MPI_Status *sstatus,rstatus; 4199 PetscInt *cols,sbs,rbs; 4200 PetscScalar *vals; 4201 4202 PetscFunctionBegin; 4203 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4204 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4205 } 4206 if (!logkey_GetBrowsOfAocols) { 4207 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 4208 } 4209 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4210 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4211 4212 gen_to = (VecScatter_MPI_General*)ctx->todata; 4213 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4214 rvalues = gen_from->values; /* holds the length of receiving row */ 4215 svalues = gen_to->values; /* holds the length of sending row */ 4216 nrecvs = gen_from->n; 4217 nsends = gen_to->n; 4218 4219 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4220 srow = gen_to->indices; /* local row index to be sent */ 4221 sstarts = gen_to->starts; 4222 sprocs = gen_to->procs; 4223 sstatus = gen_to->sstatus; 4224 sbs = gen_to->bs; 4225 rstarts = gen_from->starts; 4226 rprocs = gen_from->procs; 4227 rbs = gen_from->bs; 4228 4229 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4230 if (scall == MAT_INITIAL_MATRIX){ 4231 /* i-array */ 4232 /*---------*/ 4233 /* post receives */ 4234 for (i=0; i<nrecvs; i++){ 4235 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4236 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4237 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4238 } 4239 4240 /* pack the outgoing message */ 4241 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4242 rstartsj = sstartsj + nsends +1; 4243 sstartsj[0] = 0; rstartsj[0] = 0; 4244 len = 0; /* total length of j or a array to be sent */ 4245 k = 0; 4246 for (i=0; i<nsends; i++){ 4247 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4248 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4249 for (j=0; j<nrows; j++) { 4250 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4251 for (l=0; l<sbs; l++){ 4252 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4253 rowlen[j*sbs+l] = ncols; 4254 len += ncols; 4255 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4256 } 4257 k++; 4258 } 4259 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4260 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4261 } 4262 /* recvs and sends of i-array are completed */ 4263 i = nrecvs; 4264 while (i--) { 4265 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4266 } 4267 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4268 4269 /* allocate buffers for sending j and a arrays */ 4270 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4271 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4272 4273 /* create i-array of B_oth */ 4274 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4275 b_othi[0] = 0; 4276 len = 0; /* total length of j or a array to be received */ 4277 k = 0; 4278 for (i=0; i<nrecvs; i++){ 4279 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4280 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4281 for (j=0; j<nrows; j++) { 4282 b_othi[k+1] = b_othi[k] + rowlen[j]; 4283 len += rowlen[j]; k++; 4284 } 4285 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4286 } 4287 4288 /* allocate space for j and a arrrays of B_oth */ 4289 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4290 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 4291 4292 /* j-array */ 4293 /*---------*/ 4294 /* post receives of j-array */ 4295 for (i=0; i<nrecvs; i++){ 4296 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4297 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4298 } 4299 4300 /* pack the outgoing message j-array */ 4301 k = 0; 4302 for (i=0; i<nsends; i++){ 4303 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4304 bufJ = bufj+sstartsj[i]; 4305 for (j=0; j<nrows; j++) { 4306 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4307 for (ll=0; ll<sbs; ll++){ 4308 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4309 for (l=0; l<ncols; l++){ 4310 *bufJ++ = cols[l]; 4311 } 4312 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4313 } 4314 } 4315 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4316 } 4317 4318 /* recvs and sends of j-array are completed */ 4319 i = nrecvs; 4320 while (i--) { 4321 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4322 } 4323 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4324 } else if (scall == MAT_REUSE_MATRIX){ 4325 sstartsj = *startsj; 4326 rstartsj = sstartsj + nsends +1; 4327 bufa = *bufa_ptr; 4328 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4329 b_otha = b_oth->a; 4330 } else { 4331 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4332 } 4333 4334 /* a-array */ 4335 /*---------*/ 4336 /* post receives of a-array */ 4337 for (i=0; i<nrecvs; i++){ 4338 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4339 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4340 } 4341 4342 /* pack the outgoing message a-array */ 4343 k = 0; 4344 for (i=0; i<nsends; i++){ 4345 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4346 bufA = bufa+sstartsj[i]; 4347 for (j=0; j<nrows; j++) { 4348 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4349 for (ll=0; ll<sbs; ll++){ 4350 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4351 for (l=0; l<ncols; l++){ 4352 *bufA++ = vals[l]; 4353 } 4354 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4355 } 4356 } 4357 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4358 } 4359 /* recvs and sends of a-array are completed */ 4360 i = nrecvs; 4361 while (i--) { 4362 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4363 } 4364 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4365 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4366 4367 if (scall == MAT_INITIAL_MATRIX){ 4368 /* put together the new matrix */ 4369 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4370 4371 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4372 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4373 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4374 b_oth->free_a = PETSC_TRUE; 4375 b_oth->free_ij = PETSC_TRUE; 4376 b_oth->nonew = 0; 4377 4378 ierr = PetscFree(bufj);CHKERRQ(ierr); 4379 if (!startsj || !bufa_ptr){ 4380 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4381 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4382 } else { 4383 *startsj = sstartsj; 4384 *bufa_ptr = bufa; 4385 } 4386 } 4387 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4388 PetscFunctionReturn(0); 4389 } 4390 4391 #undef __FUNCT__ 4392 #define __FUNCT__ "MatGetCommunicationStructs" 4393 /*@C 4394 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4395 4396 Not Collective 4397 4398 Input Parameters: 4399 . A - The matrix in mpiaij format 4400 4401 Output Parameter: 4402 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4403 . colmap - A map from global column index to local index into lvec 4404 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4405 4406 Level: developer 4407 4408 @*/ 4409 #if defined (PETSC_USE_CTABLE) 4410 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4411 #else 4412 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4413 #endif 4414 { 4415 Mat_MPIAIJ *a; 4416 4417 PetscFunctionBegin; 4418 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4419 PetscValidPointer(lvec, 2) 4420 PetscValidPointer(colmap, 3) 4421 PetscValidPointer(multScatter, 4) 4422 a = (Mat_MPIAIJ *) A->data; 4423 if (lvec) *lvec = a->lvec; 4424 if (colmap) *colmap = a->colmap; 4425 if (multScatter) *multScatter = a->Mvctx; 4426 PetscFunctionReturn(0); 4427 } 4428 4429 EXTERN_C_BEGIN 4430 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4431 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4432 EXTERN_C_END 4433 4434 /*MC 4435 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4436 4437 Options Database Keys: 4438 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4439 4440 Level: beginner 4441 4442 .seealso: MatCreateMPIAIJ 4443 M*/ 4444 4445 EXTERN_C_BEGIN 4446 #undef __FUNCT__ 4447 #define __FUNCT__ "MatCreate_MPIAIJ" 4448 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4449 { 4450 Mat_MPIAIJ *b; 4451 PetscErrorCode ierr; 4452 PetscMPIInt size; 4453 4454 PetscFunctionBegin; 4455 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4456 4457 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 4458 B->data = (void*)b; 4459 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4460 B->factor = 0; 4461 B->rmap.bs = 1; 4462 B->assembled = PETSC_FALSE; 4463 B->mapping = 0; 4464 4465 B->insertmode = NOT_SET_VALUES; 4466 b->size = size; 4467 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4468 4469 /* build cache for off array entries formed */ 4470 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4471 b->donotstash = PETSC_FALSE; 4472 b->colmap = 0; 4473 b->garray = 0; 4474 b->roworiented = PETSC_TRUE; 4475 4476 /* stuff used for matrix vector multiply */ 4477 b->lvec = PETSC_NULL; 4478 b->Mvctx = PETSC_NULL; 4479 4480 /* stuff for MatGetRow() */ 4481 b->rowindices = 0; 4482 b->rowvalues = 0; 4483 b->getrowactive = PETSC_FALSE; 4484 4485 4486 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4487 "MatStoreValues_MPIAIJ", 4488 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4489 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4490 "MatRetrieveValues_MPIAIJ", 4491 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4492 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4493 "MatGetDiagonalBlock_MPIAIJ", 4494 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4495 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4496 "MatIsTranspose_MPIAIJ", 4497 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4498 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4499 "MatMPIAIJSetPreallocation_MPIAIJ", 4500 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4501 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4502 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4503 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4504 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4505 "MatDiagonalScaleLocal_MPIAIJ", 4506 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4507 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4508 "MatConvert_MPIAIJ_MPICSRPERM", 4509 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4510 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4511 "MatConvert_MPIAIJ_MPICRL", 4512 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4513 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4514 PetscFunctionReturn(0); 4515 } 4516 EXTERN_C_END 4517 4518 #undef __FUNCT__ 4519 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4520 /*@C 4521 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4522 and "off-diagonal" part of the matrix in CSR format. 4523 4524 Collective on MPI_Comm 4525 4526 Input Parameters: 4527 + comm - MPI communicator 4528 . m - number of local rows (Cannot be PETSC_DECIDE) 4529 . n - This value should be the same as the local size used in creating the 4530 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4531 calculated if N is given) For square matrices n is almost always m. 4532 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4533 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4534 . i - row indices for "diagonal" portion of matrix 4535 . j - column indices 4536 . a - matrix values 4537 . oi - row indices for "off-diagonal" portion of matrix 4538 . oj - column indices 4539 - oa - matrix values 4540 4541 Output Parameter: 4542 . mat - the matrix 4543 4544 Level: advanced 4545 4546 Notes: 4547 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4548 4549 The i and j indices are 0 based 4550 4551 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4552 4553 4554 .keywords: matrix, aij, compressed row, sparse, parallel 4555 4556 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4557 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4558 @*/ 4559 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4560 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4561 { 4562 PetscErrorCode ierr; 4563 Mat_MPIAIJ *maij; 4564 4565 PetscFunctionBegin; 4566 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4567 if (i[0]) { 4568 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4569 } 4570 if (oi[0]) { 4571 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4572 } 4573 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4574 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4575 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4576 maij = (Mat_MPIAIJ*) (*mat)->data; 4577 maij->donotstash = PETSC_TRUE; 4578 (*mat)->preallocated = PETSC_TRUE; 4579 4580 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4581 ierr = PetscMapInitialize((*mat)->comm,&(*mat)->rmap);CHKERRQ(ierr); 4582 ierr = PetscMapInitialize((*mat)->comm,&(*mat)->cmap);CHKERRQ(ierr); 4583 4584 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4585 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4586 4587 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4588 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4589 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4590 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4591 4592 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4593 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4594 PetscFunctionReturn(0); 4595 } 4596 4597 /* 4598 Special version for direct calls from Fortran 4599 */ 4600 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4601 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4602 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4603 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4604 #endif 4605 4606 /* Change these macros so can be used in void function */ 4607 #undef CHKERRQ 4608 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4609 #undef SETERRQ2 4610 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4611 #undef SETERRQ 4612 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4613 4614 EXTERN_C_BEGIN 4615 #undef __FUNCT__ 4616 #define __FUNCT__ "matsetvaluesmpiaij_" 4617 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4618 { 4619 Mat mat = *mmat; 4620 PetscInt m = *mm, n = *mn; 4621 InsertMode addv = *maddv; 4622 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4623 PetscScalar value; 4624 PetscErrorCode ierr; 4625 4626 MatPreallocated(mat); 4627 if (mat->insertmode == NOT_SET_VALUES) { 4628 mat->insertmode = addv; 4629 } 4630 #if defined(PETSC_USE_DEBUG) 4631 else if (mat->insertmode != addv) { 4632 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4633 } 4634 #endif 4635 { 4636 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4637 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4638 PetscTruth roworiented = aij->roworiented; 4639 4640 /* Some Variables required in the macro */ 4641 Mat A = aij->A; 4642 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4643 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4644 PetscScalar *aa = a->a; 4645 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4646 Mat B = aij->B; 4647 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4648 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4649 PetscScalar *ba = b->a; 4650 4651 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4652 PetscInt nonew = a->nonew; 4653 PetscScalar *ap1,*ap2; 4654 4655 PetscFunctionBegin; 4656 for (i=0; i<m; i++) { 4657 if (im[i] < 0) continue; 4658 #if defined(PETSC_USE_DEBUG) 4659 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4660 #endif 4661 if (im[i] >= rstart && im[i] < rend) { 4662 row = im[i] - rstart; 4663 lastcol1 = -1; 4664 rp1 = aj + ai[row]; 4665 ap1 = aa + ai[row]; 4666 rmax1 = aimax[row]; 4667 nrow1 = ailen[row]; 4668 low1 = 0; 4669 high1 = nrow1; 4670 lastcol2 = -1; 4671 rp2 = bj + bi[row]; 4672 ap2 = ba + bi[row]; 4673 rmax2 = bimax[row]; 4674 nrow2 = bilen[row]; 4675 low2 = 0; 4676 high2 = nrow2; 4677 4678 for (j=0; j<n; j++) { 4679 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4680 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4681 if (in[j] >= cstart && in[j] < cend){ 4682 col = in[j] - cstart; 4683 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4684 } else if (in[j] < 0) continue; 4685 #if defined(PETSC_USE_DEBUG) 4686 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4687 #endif 4688 else { 4689 if (mat->was_assembled) { 4690 if (!aij->colmap) { 4691 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4692 } 4693 #if defined (PETSC_USE_CTABLE) 4694 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4695 col--; 4696 #else 4697 col = aij->colmap[in[j]] - 1; 4698 #endif 4699 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4700 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4701 col = in[j]; 4702 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4703 B = aij->B; 4704 b = (Mat_SeqAIJ*)B->data; 4705 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4706 rp2 = bj + bi[row]; 4707 ap2 = ba + bi[row]; 4708 rmax2 = bimax[row]; 4709 nrow2 = bilen[row]; 4710 low2 = 0; 4711 high2 = nrow2; 4712 bm = aij->B->rmap.n; 4713 ba = b->a; 4714 } 4715 } else col = in[j]; 4716 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4717 } 4718 } 4719 } else { 4720 if (!aij->donotstash) { 4721 if (roworiented) { 4722 if (ignorezeroentries && v[i*n] == 0.0) continue; 4723 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4724 } else { 4725 if (ignorezeroentries && v[i] == 0.0) continue; 4726 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4727 } 4728 } 4729 } 4730 }} 4731 PetscFunctionReturnVoid(); 4732 } 4733 EXTERN_C_END 4734 4735