1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetValues_MPIAIJ" 240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 241 { 242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 243 PetscErrorCode ierr; 244 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 245 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 246 247 PetscFunctionBegin; 248 for (i=0; i<m; i++) { 249 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 250 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 251 if (idxm[i] >= rstart && idxm[i] < rend) { 252 row = idxm[i] - rstart; 253 for (j=0; j<n; j++) { 254 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 255 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 256 if (idxn[j] >= cstart && idxn[j] < cend){ 257 col = idxn[j] - cstart; 258 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 259 } else { 260 if (!aij->colmap) { 261 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262 } 263 #if defined (PETSC_USE_CTABLE) 264 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 265 col --; 266 #else 267 col = aij->colmap[idxn[j]] - 1; 268 #endif 269 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 270 else { 271 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 272 } 273 } 274 } 275 } else { 276 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 277 } 278 } 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 if (aij->donotstash) { 293 PetscFunctionReturn(0); 294 } 295 296 /* make sure all processors are either in INSERTMODE or ADDMODE */ 297 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 298 if (addv == (ADD_VALUES|INSERT_VALUES)) { 299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 300 } 301 mat->insertmode = addv; /* in case this processor had no cache */ 302 303 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 304 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 305 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 312 { 313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 314 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 315 PetscErrorCode ierr; 316 PetscMPIInt n; 317 PetscInt i,j,rstart,ncols,flg; 318 PetscInt *row,*col,other_disassembled; 319 PetscScalar *val; 320 InsertMode addv = mat->insertmode; 321 322 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 358 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 359 } 360 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 361 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 362 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 363 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 364 365 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 366 aij->rowvalues = 0; 367 368 /* used by MatAXPY() */ 369 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 370 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 371 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 378 { 379 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 384 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatZeroRows_MPIAIJ" 390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 391 { 392 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 393 PetscErrorCode ierr; 394 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 395 PetscInt i,*owners = A->rmap.range; 396 PetscInt *nprocs,j,idx,nsends,row; 397 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 398 PetscInt *rvalues,count,base,slen,*source; 399 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 400 MPI_Comm comm = A->comm; 401 MPI_Request *send_waits,*recv_waits; 402 MPI_Status recv_status,*send_status; 403 #if defined(PETSC_DEBUG) 404 PetscTruth found = PETSC_FALSE; 405 #endif 406 407 PetscFunctionBegin; 408 /* first count number of contributors to each processor */ 409 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 410 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 411 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 412 j = 0; 413 for (i=0; i<N; i++) { 414 if (lastidx > (idx = rows[i])) j = 0; 415 lastidx = idx; 416 for (; j<size; j++) { 417 if (idx >= owners[j] && idx < owners[j+1]) { 418 nprocs[2*j]++; 419 nprocs[2*j+1] = 1; 420 owner[i] = j; 421 #if defined(PETSC_DEBUG) 422 found = PETSC_TRUE; 423 #endif 424 break; 425 } 426 } 427 #if defined(PETSC_DEBUG) 428 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 429 found = PETSC_FALSE; 430 #endif 431 } 432 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 433 434 /* inform other processors of number of messages and max length*/ 435 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 436 437 /* post receives: */ 438 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 439 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 440 for (i=0; i<nrecvs; i++) { 441 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 442 } 443 444 /* do sends: 445 1) starts[i] gives the starting index in svalues for stuff going to 446 the ith processor 447 */ 448 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 449 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 450 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 451 starts[0] = 0; 452 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 453 for (i=0; i<N; i++) { 454 svalues[starts[owner[i]]++] = rows[i]; 455 } 456 457 starts[0] = 0; 458 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 459 count = 0; 460 for (i=0; i<size; i++) { 461 if (nprocs[2*i+1]) { 462 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 463 } 464 } 465 ierr = PetscFree(starts);CHKERRQ(ierr); 466 467 base = owners[rank]; 468 469 /* wait on receives */ 470 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 471 source = lens + nrecvs; 472 count = nrecvs; slen = 0; 473 while (count) { 474 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 475 /* unpack receives into our local space */ 476 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 477 source[imdex] = recv_status.MPI_SOURCE; 478 lens[imdex] = n; 479 slen += n; 480 count--; 481 } 482 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 483 484 /* move the data into the send scatter */ 485 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 486 count = 0; 487 for (i=0; i<nrecvs; i++) { 488 values = rvalues + i*nmax; 489 for (j=0; j<lens[i]; j++) { 490 lrows[count++] = values[j] - base; 491 } 492 } 493 ierr = PetscFree(rvalues);CHKERRQ(ierr); 494 ierr = PetscFree(lens);CHKERRQ(ierr); 495 ierr = PetscFree(owner);CHKERRQ(ierr); 496 ierr = PetscFree(nprocs);CHKERRQ(ierr); 497 498 /* actually zap the local rows */ 499 /* 500 Zero the required rows. If the "diagonal block" of the matrix 501 is square and the user wishes to set the diagonal we use separate 502 code so that MatSetValues() is not called for each diagonal allocating 503 new memory, thus calling lots of mallocs and slowing things down. 504 505 Contributed by: Matthew Knepley 506 */ 507 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 508 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 509 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 510 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 511 } else if (diag != 0.0) { 512 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 513 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 514 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 516 } 517 for (i = 0; i < slen; i++) { 518 row = lrows[i] + rstart; 519 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 520 } 521 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 522 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 } else { 524 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 525 } 526 ierr = PetscFree(lrows);CHKERRQ(ierr); 527 528 /* wait on sends */ 529 if (nsends) { 530 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 531 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 532 ierr = PetscFree(send_status);CHKERRQ(ierr); 533 } 534 ierr = PetscFree(send_waits);CHKERRQ(ierr); 535 ierr = PetscFree(svalues);CHKERRQ(ierr); 536 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatMult_MPIAIJ" 542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 543 { 544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 545 PetscErrorCode ierr; 546 PetscInt nt; 547 548 PetscFunctionBegin; 549 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 550 if (nt != A->cmap.n) { 551 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 552 } 553 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 554 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 555 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 556 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatMultAdd_MPIAIJ" 562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 563 { 564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 567 PetscFunctionBegin; 568 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 569 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 570 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 571 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 578 { 579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 580 PetscErrorCode ierr; 581 PetscTruth merged; 582 583 PetscFunctionBegin; 584 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 585 /* do nondiagonal part */ 586 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 587 if (!merged) { 588 /* send it on its way */ 589 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 590 /* do local part */ 591 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 592 /* receive remote parts: note this assumes the values are not actually */ 593 /* added in yy until the next line, */ 594 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 595 } else { 596 /* do local part */ 597 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 598 /* send it on its way */ 599 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 600 /* values actually were received in the Begin() but we need to call this nop */ 601 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 602 } 603 PetscFunctionReturn(0); 604 } 605 606 EXTERN_C_BEGIN 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 610 { 611 MPI_Comm comm; 612 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 613 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 614 IS Me,Notme; 615 PetscErrorCode ierr; 616 PetscInt M,N,first,last,*notme,i; 617 PetscMPIInt size; 618 619 PetscFunctionBegin; 620 621 /* Easy test: symmetric diagonal block */ 622 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 623 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 624 if (!*f) PetscFunctionReturn(0); 625 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 626 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 627 if (size == 1) PetscFunctionReturn(0); 628 629 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 630 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 631 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 632 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 633 for (i=0; i<first; i++) notme[i] = i; 634 for (i=last; i<M; i++) notme[i-last+first] = i; 635 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 636 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 637 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 638 Aoff = Aoffs[0]; 639 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 640 Boff = Boffs[0]; 641 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 642 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 644 ierr = ISDestroy(Me);CHKERRQ(ierr); 645 ierr = ISDestroy(Notme);CHKERRQ(ierr); 646 647 PetscFunctionReturn(0); 648 } 649 EXTERN_C_END 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 654 { 655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 656 PetscErrorCode ierr; 657 658 PetscFunctionBegin; 659 /* do nondiagonal part */ 660 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 661 /* send it on its way */ 662 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 663 /* do local part */ 664 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 665 /* receive remote parts */ 666 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 /* 671 This only works correctly for square matrices where the subblock A->A is the 672 diagonal block 673 */ 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 677 { 678 PetscErrorCode ierr; 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 681 PetscFunctionBegin; 682 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 683 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 684 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 685 } 686 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatScale_MPIAIJ" 692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 693 { 694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 699 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatDestroy_MPIAIJ" 705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 706 { 707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 #if defined(PETSC_USE_LOG) 712 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 713 #endif 714 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 715 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 717 #if defined (PETSC_USE_CTABLE) 718 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 719 #else 720 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 721 #endif 722 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 723 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 724 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 725 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 726 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIAIJ_Binary" 742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 745 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 746 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 747 PetscErrorCode ierr; 748 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 749 int fd; 750 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 751 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (!rank) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->rmap.N; 761 header[2] = mat->cmap.N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->rmap.n; 767 range = mat->rmap.range; 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->rmap.n; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->rmap.n; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (!rank) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->rmap.n; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (!rank) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->rmap.n; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (!rank) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 PetscErrorCode ierr; 872 PetscMPIInt rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,iascii,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (iascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 PetscTruth inodes; 886 887 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 888 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 889 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 890 if (!inodes) { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 892 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 893 } else { 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 895 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 896 } 897 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 899 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 901 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 902 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } else if (format == PETSC_VIEWER_ASCII_INFO) { 905 PetscInt inodecount,inodelimit,*inodes; 906 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 907 if (inodes) { 908 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 909 } else { 910 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 911 } 912 PetscFunctionReturn(0); 913 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 914 PetscFunctionReturn(0); 915 } 916 } else if (isbinary) { 917 if (size == 1) { 918 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 919 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 920 } else { 921 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 922 } 923 PetscFunctionReturn(0); 924 } else if (isdraw) { 925 PetscDraw draw; 926 PetscTruth isnull; 927 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 928 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 929 } 930 931 if (size == 1) { 932 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 933 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 934 } else { 935 /* assemble the entire matrix onto first processor. */ 936 Mat A; 937 Mat_SeqAIJ *Aloc; 938 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 939 PetscScalar *a; 940 941 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 942 if (!rank) { 943 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 944 } else { 945 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 946 } 947 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 948 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 949 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 950 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 951 952 /* copy over the A part */ 953 Aloc = (Mat_SeqAIJ*)aij->A->data; 954 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 955 row = mat->rmap.rstart; 956 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 957 for (i=0; i<m; i++) { 958 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 959 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 960 } 961 aj = Aloc->j; 962 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 963 964 /* copy over the B part */ 965 Aloc = (Mat_SeqAIJ*)aij->B->data; 966 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 967 row = mat->rmap.rstart; 968 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 969 ct = cols; 970 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 971 for (i=0; i<m; i++) { 972 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 973 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 974 } 975 ierr = PetscFree(ct);CHKERRQ(ierr); 976 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 /* 979 Everyone has to call to draw the matrix since the graphics waits are 980 synchronized across all processors that share the PetscDraw object 981 */ 982 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 983 if (!rank) { 984 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 985 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 986 } 987 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 988 ierr = MatDestroy(A);CHKERRQ(ierr); 989 } 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatView_MPIAIJ" 995 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 996 { 997 PetscErrorCode ierr; 998 PetscTruth iascii,isdraw,issocket,isbinary; 999 1000 PetscFunctionBegin; 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1005 if (iascii || isdraw || isbinary || issocket) { 1006 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1007 } else { 1008 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1009 } 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatRelax_MPIAIJ" 1015 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1016 { 1017 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1018 PetscErrorCode ierr; 1019 Vec bb1; 1020 1021 PetscFunctionBegin; 1022 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1023 1024 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1025 1026 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1027 if (flag & SOR_ZERO_INITIAL_GUESS) { 1028 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1029 its--; 1030 } 1031 1032 while (its--) { 1033 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1034 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1035 1036 /* update rhs: bb1 = bb - B*x */ 1037 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1038 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1039 1040 /* local sweep */ 1041 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1042 CHKERRQ(ierr); 1043 } 1044 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1045 if (flag & SOR_ZERO_INITIAL_GUESS) { 1046 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1047 its--; 1048 } 1049 while (its--) { 1050 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1051 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1052 1053 /* update rhs: bb1 = bb - B*x */ 1054 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1055 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1056 1057 /* local sweep */ 1058 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1059 CHKERRQ(ierr); 1060 } 1061 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1062 if (flag & SOR_ZERO_INITIAL_GUESS) { 1063 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1064 its--; 1065 } 1066 while (its--) { 1067 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1068 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1069 1070 /* update rhs: bb1 = bb - B*x */ 1071 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1072 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1073 1074 /* local sweep */ 1075 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1076 CHKERRQ(ierr); 1077 } 1078 } else { 1079 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1080 } 1081 1082 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatPermute_MPIAIJ" 1088 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1089 { 1090 MPI_Comm comm,pcomm; 1091 PetscInt first,local_size,nrows,*rows; 1092 int ntids; 1093 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1098 /* make a collective version of 'rowp' */ 1099 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1100 if (pcomm==comm) { 1101 crowp = rowp; 1102 } else { 1103 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1104 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1105 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1106 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1107 } 1108 /* collect the global row permutation and invert it */ 1109 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1110 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1111 if (pcomm!=comm) { 1112 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1113 } 1114 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1115 /* get the local target indices */ 1116 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1117 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1118 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1119 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1120 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1121 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1122 /* the column permutation is so much easier; 1123 make a local version of 'colp' and invert it */ 1124 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1125 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1126 if (ntids==1) { 1127 lcolp = colp; 1128 } else { 1129 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1130 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1131 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1132 } 1133 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1134 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1135 if (ntids>1) { 1136 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1137 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1138 } 1139 /* now we just get the submatrix */ 1140 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1141 /* clean up */ 1142 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1143 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1149 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1150 { 1151 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1152 Mat A = mat->A,B = mat->B; 1153 PetscErrorCode ierr; 1154 PetscReal isend[5],irecv[5]; 1155 1156 PetscFunctionBegin; 1157 info->block_size = 1.0; 1158 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1159 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1160 isend[3] = info->memory; isend[4] = info->mallocs; 1161 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1162 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1163 isend[3] += info->memory; isend[4] += info->mallocs; 1164 if (flag == MAT_LOCAL) { 1165 info->nz_used = isend[0]; 1166 info->nz_allocated = isend[1]; 1167 info->nz_unneeded = isend[2]; 1168 info->memory = isend[3]; 1169 info->mallocs = isend[4]; 1170 } else if (flag == MAT_GLOBAL_MAX) { 1171 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1172 info->nz_used = irecv[0]; 1173 info->nz_allocated = irecv[1]; 1174 info->nz_unneeded = irecv[2]; 1175 info->memory = irecv[3]; 1176 info->mallocs = irecv[4]; 1177 } else if (flag == MAT_GLOBAL_SUM) { 1178 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1179 info->nz_used = irecv[0]; 1180 info->nz_allocated = irecv[1]; 1181 info->nz_unneeded = irecv[2]; 1182 info->memory = irecv[3]; 1183 info->mallocs = irecv[4]; 1184 } 1185 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1186 info->fill_ratio_needed = 0; 1187 info->factor_mallocs = 0; 1188 info->rows_global = (double)matin->rmap.N; 1189 info->columns_global = (double)matin->cmap.N; 1190 info->rows_local = (double)matin->rmap.n; 1191 info->columns_local = (double)matin->cmap.N; 1192 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatSetOption_MPIAIJ" 1198 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1199 { 1200 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1201 PetscErrorCode ierr; 1202 1203 PetscFunctionBegin; 1204 switch (op) { 1205 case MAT_NO_NEW_NONZERO_LOCATIONS: 1206 case MAT_YES_NEW_NONZERO_LOCATIONS: 1207 case MAT_COLUMNS_UNSORTED: 1208 case MAT_COLUMNS_SORTED: 1209 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1210 case MAT_KEEP_ZEROED_ROWS: 1211 case MAT_NEW_NONZERO_LOCATION_ERR: 1212 case MAT_USE_INODES: 1213 case MAT_DO_NOT_USE_INODES: 1214 case MAT_IGNORE_ZERO_ENTRIES: 1215 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1216 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1217 break; 1218 case MAT_ROW_ORIENTED: 1219 a->roworiented = PETSC_TRUE; 1220 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1221 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1222 break; 1223 case MAT_ROWS_SORTED: 1224 case MAT_ROWS_UNSORTED: 1225 case MAT_YES_NEW_DIAGONALS: 1226 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1227 break; 1228 case MAT_COLUMN_ORIENTED: 1229 a->roworiented = PETSC_FALSE; 1230 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1231 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1232 break; 1233 case MAT_IGNORE_OFF_PROC_ENTRIES: 1234 a->donotstash = PETSC_TRUE; 1235 break; 1236 case MAT_NO_NEW_DIAGONALS: 1237 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1238 case MAT_SYMMETRIC: 1239 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1240 break; 1241 case MAT_STRUCTURALLY_SYMMETRIC: 1242 case MAT_HERMITIAN: 1243 case MAT_SYMMETRY_ETERNAL: 1244 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1245 break; 1246 case MAT_NOT_SYMMETRIC: 1247 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1248 case MAT_NOT_HERMITIAN: 1249 case MAT_NOT_SYMMETRY_ETERNAL: 1250 break; 1251 default: 1252 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1253 } 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatGetRow_MPIAIJ" 1259 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1260 { 1261 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1262 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1263 PetscErrorCode ierr; 1264 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1265 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1266 PetscInt *cmap,*idx_p; 1267 1268 PetscFunctionBegin; 1269 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1270 mat->getrowactive = PETSC_TRUE; 1271 1272 if (!mat->rowvalues && (idx || v)) { 1273 /* 1274 allocate enough space to hold information from the longest row. 1275 */ 1276 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1277 PetscInt max = 1,tmp; 1278 for (i=0; i<matin->rmap.n; i++) { 1279 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1280 if (max < tmp) { max = tmp; } 1281 } 1282 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1283 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1284 } 1285 1286 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1287 lrow = row - rstart; 1288 1289 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1290 if (!v) {pvA = 0; pvB = 0;} 1291 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1292 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1293 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1294 nztot = nzA + nzB; 1295 1296 cmap = mat->garray; 1297 if (v || idx) { 1298 if (nztot) { 1299 /* Sort by increasing column numbers, assuming A and B already sorted */ 1300 PetscInt imark = -1; 1301 if (v) { 1302 *v = v_p = mat->rowvalues; 1303 for (i=0; i<nzB; i++) { 1304 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1305 else break; 1306 } 1307 imark = i; 1308 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1309 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1310 } 1311 if (idx) { 1312 *idx = idx_p = mat->rowindices; 1313 if (imark > -1) { 1314 for (i=0; i<imark; i++) { 1315 idx_p[i] = cmap[cworkB[i]]; 1316 } 1317 } else { 1318 for (i=0; i<nzB; i++) { 1319 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1320 else break; 1321 } 1322 imark = i; 1323 } 1324 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1325 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1326 } 1327 } else { 1328 if (idx) *idx = 0; 1329 if (v) *v = 0; 1330 } 1331 } 1332 *nz = nztot; 1333 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1334 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1340 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1341 { 1342 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1343 1344 PetscFunctionBegin; 1345 if (!aij->getrowactive) { 1346 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1347 } 1348 aij->getrowactive = PETSC_FALSE; 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatNorm_MPIAIJ" 1354 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1355 { 1356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1357 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1358 PetscErrorCode ierr; 1359 PetscInt i,j,cstart = mat->cmap.rstart; 1360 PetscReal sum = 0.0; 1361 PetscScalar *v; 1362 1363 PetscFunctionBegin; 1364 if (aij->size == 1) { 1365 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1366 } else { 1367 if (type == NORM_FROBENIUS) { 1368 v = amat->a; 1369 for (i=0; i<amat->nz; i++) { 1370 #if defined(PETSC_USE_COMPLEX) 1371 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1372 #else 1373 sum += (*v)*(*v); v++; 1374 #endif 1375 } 1376 v = bmat->a; 1377 for (i=0; i<bmat->nz; i++) { 1378 #if defined(PETSC_USE_COMPLEX) 1379 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1380 #else 1381 sum += (*v)*(*v); v++; 1382 #endif 1383 } 1384 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1385 *norm = sqrt(*norm); 1386 } else if (type == NORM_1) { /* max column norm */ 1387 PetscReal *tmp,*tmp2; 1388 PetscInt *jj,*garray = aij->garray; 1389 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1390 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1391 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1392 *norm = 0.0; 1393 v = amat->a; jj = amat->j; 1394 for (j=0; j<amat->nz; j++) { 1395 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1396 } 1397 v = bmat->a; jj = bmat->j; 1398 for (j=0; j<bmat->nz; j++) { 1399 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1400 } 1401 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1402 for (j=0; j<mat->cmap.N; j++) { 1403 if (tmp2[j] > *norm) *norm = tmp2[j]; 1404 } 1405 ierr = PetscFree(tmp);CHKERRQ(ierr); 1406 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1407 } else if (type == NORM_INFINITY) { /* max row norm */ 1408 PetscReal ntemp = 0.0; 1409 for (j=0; j<aij->A->rmap.n; j++) { 1410 v = amat->a + amat->i[j]; 1411 sum = 0.0; 1412 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1413 sum += PetscAbsScalar(*v); v++; 1414 } 1415 v = bmat->a + bmat->i[j]; 1416 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1417 sum += PetscAbsScalar(*v); v++; 1418 } 1419 if (sum > ntemp) ntemp = sum; 1420 } 1421 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1422 } else { 1423 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1424 } 1425 } 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatTranspose_MPIAIJ" 1431 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1432 { 1433 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1434 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1435 PetscErrorCode ierr; 1436 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz; 1437 PetscInt cstart=A->cmap.rstart,ncol; 1438 Mat B; 1439 PetscScalar *array; 1440 1441 PetscFunctionBegin; 1442 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1443 1444 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1445 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1446 ai = Aloc->i; aj = Aloc->j; 1447 bi = Bloc->i; bj = Bloc->j; 1448 ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1449 cols = d_nnz + na + 1; /* work space to be used by B part */ 1450 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1451 for (i=0; i<ai[ma]; i++){ 1452 d_nnz[aj[i]] ++; 1453 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1454 } 1455 1456 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1457 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1458 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1459 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1460 1461 /* copy over the A part */ 1462 array = Aloc->a; 1463 row = A->rmap.rstart; 1464 for (i=0; i<ma; i++) { 1465 ncol = ai[i+1]-ai[i]; 1466 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1467 row++; array += ncol; aj += ncol; 1468 } 1469 aj = Aloc->j; 1470 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1471 1472 /* copy over the B part */ 1473 array = Bloc->a; 1474 row = A->rmap.rstart; 1475 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1476 for (i=0; i<mb; i++) { 1477 ncol = bi[i+1]-bi[i]; 1478 ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1479 row++; array += ncol; cols += ncol; 1480 } 1481 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1482 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1483 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1484 if (matout) { 1485 *matout = B; 1486 } else { 1487 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1488 } 1489 PetscFunctionReturn(0); 1490 } 1491 1492 #undef __FUNCT__ 1493 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1494 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1495 { 1496 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1497 Mat a = aij->A,b = aij->B; 1498 PetscErrorCode ierr; 1499 PetscInt s1,s2,s3; 1500 1501 PetscFunctionBegin; 1502 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1503 if (rr) { 1504 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1505 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1506 /* Overlap communication with computation. */ 1507 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1508 } 1509 if (ll) { 1510 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1511 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1512 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1513 } 1514 /* scale the diagonal block */ 1515 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1516 1517 if (rr) { 1518 /* Do a scatter end and then right scale the off-diagonal block */ 1519 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1520 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1521 } 1522 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1528 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1529 { 1530 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1531 PetscErrorCode ierr; 1532 1533 PetscFunctionBegin; 1534 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1535 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1540 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1541 { 1542 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1543 PetscErrorCode ierr; 1544 1545 PetscFunctionBegin; 1546 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1547 PetscFunctionReturn(0); 1548 } 1549 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatEqual_MPIAIJ" 1552 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1553 { 1554 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1555 Mat a,b,c,d; 1556 PetscTruth flg; 1557 PetscErrorCode ierr; 1558 1559 PetscFunctionBegin; 1560 a = matA->A; b = matA->B; 1561 c = matB->A; d = matB->B; 1562 1563 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1564 if (flg) { 1565 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1566 } 1567 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatCopy_MPIAIJ" 1573 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1574 { 1575 PetscErrorCode ierr; 1576 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1577 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1578 1579 PetscFunctionBegin; 1580 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1581 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1582 /* because of the column compression in the off-processor part of the matrix a->B, 1583 the number of columns in a->B and b->B may be different, hence we cannot call 1584 the MatCopy() directly on the two parts. If need be, we can provide a more 1585 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1586 then copying the submatrices */ 1587 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1588 } else { 1589 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1590 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1591 } 1592 PetscFunctionReturn(0); 1593 } 1594 1595 #undef __FUNCT__ 1596 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1597 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1598 { 1599 PetscErrorCode ierr; 1600 1601 PetscFunctionBegin; 1602 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1603 PetscFunctionReturn(0); 1604 } 1605 1606 #include "petscblaslapack.h" 1607 #undef __FUNCT__ 1608 #define __FUNCT__ "MatAXPY_MPIAIJ" 1609 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1610 { 1611 PetscErrorCode ierr; 1612 PetscInt i; 1613 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1614 PetscBLASInt bnz,one=1; 1615 Mat_SeqAIJ *x,*y; 1616 1617 PetscFunctionBegin; 1618 if (str == SAME_NONZERO_PATTERN) { 1619 PetscScalar alpha = a; 1620 x = (Mat_SeqAIJ *)xx->A->data; 1621 y = (Mat_SeqAIJ *)yy->A->data; 1622 bnz = (PetscBLASInt)x->nz; 1623 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1624 x = (Mat_SeqAIJ *)xx->B->data; 1625 y = (Mat_SeqAIJ *)yy->B->data; 1626 bnz = (PetscBLASInt)x->nz; 1627 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1628 } else if (str == SUBSET_NONZERO_PATTERN) { 1629 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1630 1631 x = (Mat_SeqAIJ *)xx->B->data; 1632 y = (Mat_SeqAIJ *)yy->B->data; 1633 if (y->xtoy && y->XtoY != xx->B) { 1634 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1635 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1636 } 1637 if (!y->xtoy) { /* get xtoy */ 1638 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1639 y->XtoY = xx->B; 1640 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1641 } 1642 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1643 } else { 1644 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1645 } 1646 PetscFunctionReturn(0); 1647 } 1648 1649 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1650 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatConjugate_MPIAIJ" 1653 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1654 { 1655 #if defined(PETSC_USE_COMPLEX) 1656 PetscErrorCode ierr; 1657 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1658 1659 PetscFunctionBegin; 1660 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1661 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1662 #else 1663 PetscFunctionBegin; 1664 #endif 1665 PetscFunctionReturn(0); 1666 } 1667 1668 #undef __FUNCT__ 1669 #define __FUNCT__ "MatRealPart_MPIAIJ" 1670 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1671 { 1672 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1673 PetscErrorCode ierr; 1674 1675 PetscFunctionBegin; 1676 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1677 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1683 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1684 { 1685 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1686 PetscErrorCode ierr; 1687 1688 PetscFunctionBegin; 1689 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1690 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #ifdef PETSC_HAVE_PBGL 1695 1696 #include <boost/parallel/mpi/bsp_process_group.hpp> 1697 #include <boost/graph/distributed/ilu_default_graph.hpp> 1698 #include <boost/graph/distributed/ilu_0_block.hpp> 1699 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1700 #include <boost/graph/distributed/petsc/interface.hpp> 1701 #include <boost/multi_array.hpp> 1702 #include <boost/parallel/distributed_property_map.hpp> 1703 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1706 /* 1707 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1708 */ 1709 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1710 { 1711 namespace petsc = boost::distributed::petsc; 1712 1713 namespace graph_dist = boost::graph::distributed; 1714 using boost::graph::distributed::ilu_default::process_group_type; 1715 using boost::graph::ilu_permuted; 1716 1717 PetscTruth row_identity, col_identity; 1718 PetscContainer c; 1719 PetscInt m, n, M, N; 1720 PetscErrorCode ierr; 1721 1722 PetscFunctionBegin; 1723 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1724 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1725 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1726 if (!row_identity || !col_identity) { 1727 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1728 } 1729 1730 process_group_type pg; 1731 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1732 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1733 lgraph_type& level_graph = *lgraph_p; 1734 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1735 1736 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1737 ilu_permuted(level_graph); 1738 1739 /* put together the new matrix */ 1740 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1741 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1742 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1743 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1744 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1745 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1746 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1747 (*fact)->factor = FACTOR_LU; 1748 1749 ierr = PetscContainerCreate(A->comm, &c); 1750 ierr = PetscContainerSetPointer(c, lgraph_p); 1751 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1752 PetscFunctionReturn(0); 1753 } 1754 1755 #undef __FUNCT__ 1756 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1757 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1758 { 1759 PetscFunctionBegin; 1760 PetscFunctionReturn(0); 1761 } 1762 1763 #undef __FUNCT__ 1764 #define __FUNCT__ "MatSolve_MPIAIJ" 1765 /* 1766 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1767 */ 1768 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1769 { 1770 namespace graph_dist = boost::graph::distributed; 1771 1772 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1773 lgraph_type* lgraph_p; 1774 PetscContainer c; 1775 PetscErrorCode ierr; 1776 1777 PetscFunctionBegin; 1778 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1779 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1780 ierr = VecCopy(b, x); CHKERRQ(ierr); 1781 1782 PetscScalar* array_x; 1783 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1784 PetscInt sx; 1785 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1786 1787 PetscScalar* array_b; 1788 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1789 PetscInt sb; 1790 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1791 1792 lgraph_type& level_graph = *lgraph_p; 1793 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1794 1795 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1796 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1797 ref_x(array_x, boost::extents[num_vertices(graph)]); 1798 1799 typedef boost::iterator_property_map<array_ref_type::iterator, 1800 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1801 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1802 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1803 1804 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1805 1806 PetscFunctionReturn(0); 1807 } 1808 #endif 1809 1810 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1811 PetscInt nzlocal,nsends,nrecvs; 1812 PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j; 1813 PetscScalar *sbuf_a,**rbuf_a; 1814 PetscErrorCode (*MatDestroy)(Mat); 1815 } Mat_Redundant; 1816 1817 #undef __FUNCT__ 1818 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1819 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1820 { 1821 PetscErrorCode ierr; 1822 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1823 PetscInt i; 1824 1825 PetscFunctionBegin; 1826 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1827 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1828 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1829 for (i=0; i<redund->nrecvs; i++){ 1830 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1831 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1832 } 1833 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1834 ierr = PetscFree(redund);CHKERRQ(ierr); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "MatDestroy_MatRedundant" 1840 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1841 { 1842 PetscErrorCode ierr; 1843 PetscContainer container; 1844 Mat_Redundant *redund=PETSC_NULL; 1845 1846 PetscFunctionBegin; 1847 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1848 if (container) { 1849 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1850 } else { 1851 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1852 } 1853 A->ops->destroy = redund->MatDestroy; 1854 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 1855 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 1856 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 1857 PetscFunctionReturn(0); 1858 } 1859 1860 #undef __FUNCT__ 1861 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 1862 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 1863 { 1864 PetscMPIInt rank,size; 1865 MPI_Comm comm=mat->comm; 1866 PetscErrorCode ierr; 1867 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 1868 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 1869 PetscInt *rowrange=mat->rmap.range; 1870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1871 Mat A=aij->A,B=aij->B,C=*matredundant; 1872 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1873 PetscScalar *sbuf_a; 1874 PetscInt nzlocal=a->nz+b->nz; 1875 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 1876 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 1877 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 1878 PetscScalar *vals,*aworkA,*aworkB; 1879 PetscMPIInt tag1,tag2,tag3,imdex; 1880 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 1881 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 1882 MPI_Status recv_status,*send_status; 1883 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 1884 PetscInt **rbuf_j=PETSC_NULL; 1885 PetscScalar **rbuf_a=PETSC_NULL; 1886 Mat_Redundant *redund=PETSC_NULL; 1887 PetscContainer container; 1888 1889 PetscFunctionBegin; 1890 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1891 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1892 1893 if (reuse == MAT_REUSE_MATRIX) { 1894 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 1895 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 1896 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 1897 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 1898 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1899 if (container) { 1900 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1901 } else { 1902 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1903 } 1904 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 1905 1906 nsends = redund->nsends; 1907 nrecvs = redund->nrecvs; 1908 send_rank = redund->send_rank; recv_rank = send_rank + size; 1909 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 1910 sbuf_j = redund->sbuf_j; 1911 sbuf_a = redund->sbuf_a; 1912 rbuf_j = redund->rbuf_j; 1913 rbuf_a = redund->rbuf_a; 1914 } 1915 1916 if (reuse == MAT_INITIAL_MATRIX){ 1917 PetscMPIInt subrank,subsize; 1918 PetscInt nleftover,np_subcomm; 1919 /* get the destination processors' id send_rank, nsends and nrecvs */ 1920 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 1921 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 1922 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 1923 recv_rank = send_rank + size; 1924 np_subcomm = size/nsubcomm; 1925 nleftover = size - nsubcomm*np_subcomm; 1926 nsends = 0; nrecvs = 0; 1927 for (i=0; i<size; i++){ /* i=rank*/ 1928 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 1929 send_rank[nsends] = i; nsends++; 1930 recv_rank[nrecvs++] = i; 1931 } 1932 } 1933 if (rank >= size - nleftover){/* this proc is a leftover processor */ 1934 i = size-nleftover-1; 1935 j = 0; 1936 while (j < nsubcomm - nleftover){ 1937 send_rank[nsends++] = i; 1938 i--; j++; 1939 } 1940 } 1941 1942 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 1943 for (i=0; i<nleftover; i++){ 1944 recv_rank[nrecvs++] = size-nleftover+i; 1945 } 1946 } 1947 1948 /* allocate sbuf_j, sbuf_a */ 1949 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 1950 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 1951 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 1952 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1953 1954 /* copy mat's local entries into the buffers */ 1955 if (reuse == MAT_INITIAL_MATRIX){ 1956 rownz_max = 0; 1957 rptr = sbuf_j; 1958 cols = sbuf_j + rend-rstart + 1; 1959 vals = sbuf_a; 1960 rptr[0] = 0; 1961 for (i=0; i<rend-rstart; i++){ 1962 row = i + rstart; 1963 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1964 ncols = nzA + nzB; 1965 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1966 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1967 /* load the column indices for this row into cols */ 1968 lwrite = 0; 1969 for (l=0; l<nzB; l++) { 1970 if ((ctmp = bmap[cworkB[l]]) < cstart){ 1971 vals[lwrite] = aworkB[l]; 1972 cols[lwrite++] = ctmp; 1973 } 1974 } 1975 for (l=0; l<nzA; l++){ 1976 vals[lwrite] = aworkA[l]; 1977 cols[lwrite++] = cstart + cworkA[l]; 1978 } 1979 for (l=0; l<nzB; l++) { 1980 if ((ctmp = bmap[cworkB[l]]) >= cend){ 1981 vals[lwrite] = aworkB[l]; 1982 cols[lwrite++] = ctmp; 1983 } 1984 } 1985 vals += ncols; 1986 cols += ncols; 1987 rptr[i+1] = rptr[i] + ncols; 1988 if (rownz_max < ncols) rownz_max = ncols; 1989 } 1990 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 1991 } else { /* only copy matrix values into sbuf_a */ 1992 rptr = sbuf_j; 1993 vals = sbuf_a; 1994 rptr[0] = 0; 1995 for (i=0; i<rend-rstart; i++){ 1996 row = i + rstart; 1997 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1998 ncols = nzA + nzB; 1999 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2000 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2001 lwrite = 0; 2002 for (l=0; l<nzB; l++) { 2003 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2004 } 2005 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2006 for (l=0; l<nzB; l++) { 2007 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2008 } 2009 vals += ncols; 2010 rptr[i+1] = rptr[i] + ncols; 2011 } 2012 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2013 2014 /* send nzlocal to others, and recv other's nzlocal */ 2015 /*--------------------------------------------------*/ 2016 if (reuse == MAT_INITIAL_MATRIX){ 2017 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2018 s_waits2 = s_waits3 + nsends; 2019 s_waits1 = s_waits2 + nsends; 2020 r_waits1 = s_waits1 + nsends; 2021 r_waits2 = r_waits1 + nrecvs; 2022 r_waits3 = r_waits2 + nrecvs; 2023 } else { 2024 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2025 r_waits3 = s_waits3 + nsends; 2026 } 2027 2028 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2029 if (reuse == MAT_INITIAL_MATRIX){ 2030 /* get new tags to keep the communication clean */ 2031 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2032 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2033 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2034 rbuf_nz = sbuf_nz + nsends; 2035 2036 /* post receives of other's nzlocal */ 2037 for (i=0; i<nrecvs; i++){ 2038 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2039 } 2040 /* send nzlocal to others */ 2041 for (i=0; i<nsends; i++){ 2042 sbuf_nz[i] = nzlocal; 2043 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2044 } 2045 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2046 count = nrecvs; 2047 while (count) { 2048 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2049 recv_rank[imdex] = recv_status.MPI_SOURCE; 2050 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2051 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2052 2053 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2054 rbuf_nz[imdex] += i + 2; 2055 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2056 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2057 count--; 2058 } 2059 /* wait on sends of nzlocal */ 2060 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2061 /* send mat->i,j to others, and recv from other's */ 2062 /*------------------------------------------------*/ 2063 for (i=0; i<nsends; i++){ 2064 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2065 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2066 } 2067 /* wait on receives of mat->i,j */ 2068 /*------------------------------*/ 2069 count = nrecvs; 2070 while (count) { 2071 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2072 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2073 count--; 2074 } 2075 /* wait on sends of mat->i,j */ 2076 /*---------------------------*/ 2077 if (nsends) { 2078 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2079 } 2080 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2081 2082 /* post receives, send and receive mat->a */ 2083 /*----------------------------------------*/ 2084 for (imdex=0; imdex<nrecvs; imdex++) { 2085 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2086 } 2087 for (i=0; i<nsends; i++){ 2088 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2089 } 2090 count = nrecvs; 2091 while (count) { 2092 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2093 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2094 count--; 2095 } 2096 if (nsends) { 2097 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2098 } 2099 2100 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2101 2102 /* create redundant matrix */ 2103 /*-------------------------*/ 2104 if (reuse == MAT_INITIAL_MATRIX){ 2105 /* compute rownz_max for preallocation */ 2106 for (imdex=0; imdex<nrecvs; imdex++){ 2107 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2108 rptr = rbuf_j[imdex]; 2109 for (i=0; i<j; i++){ 2110 ncols = rptr[i+1] - rptr[i]; 2111 if (rownz_max < ncols) rownz_max = ncols; 2112 } 2113 } 2114 2115 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2116 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2117 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2118 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2119 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2120 } else { 2121 C = *matredundant; 2122 } 2123 2124 /* insert local matrix entries */ 2125 rptr = sbuf_j; 2126 cols = sbuf_j + rend-rstart + 1; 2127 vals = sbuf_a; 2128 for (i=0; i<rend-rstart; i++){ 2129 row = i + rstart; 2130 ncols = rptr[i+1] - rptr[i]; 2131 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2132 vals += ncols; 2133 cols += ncols; 2134 } 2135 /* insert received matrix entries */ 2136 for (imdex=0; imdex<nrecvs; imdex++){ 2137 rstart = rowrange[recv_rank[imdex]]; 2138 rend = rowrange[recv_rank[imdex]+1]; 2139 rptr = rbuf_j[imdex]; 2140 cols = rbuf_j[imdex] + rend-rstart + 1; 2141 vals = rbuf_a[imdex]; 2142 for (i=0; i<rend-rstart; i++){ 2143 row = i + rstart; 2144 ncols = rptr[i+1] - rptr[i]; 2145 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2146 vals += ncols; 2147 cols += ncols; 2148 } 2149 } 2150 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2151 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2152 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2153 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2154 if (reuse == MAT_INITIAL_MATRIX){ 2155 PetscContainer container; 2156 *matredundant = C; 2157 /* create a supporting struct and attach it to C for reuse */ 2158 ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr); 2159 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2160 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2161 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2162 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2163 2164 redund->nzlocal = nzlocal; 2165 redund->nsends = nsends; 2166 redund->nrecvs = nrecvs; 2167 redund->send_rank = send_rank; 2168 redund->sbuf_nz = sbuf_nz; 2169 redund->sbuf_j = sbuf_j; 2170 redund->sbuf_a = sbuf_a; 2171 redund->rbuf_j = rbuf_j; 2172 redund->rbuf_a = rbuf_a; 2173 2174 redund->MatDestroy = C->ops->destroy; 2175 C->ops->destroy = MatDestroy_MatRedundant; 2176 } 2177 PetscFunctionReturn(0); 2178 } 2179 2180 /* -------------------------------------------------------------------*/ 2181 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2182 MatGetRow_MPIAIJ, 2183 MatRestoreRow_MPIAIJ, 2184 MatMult_MPIAIJ, 2185 /* 4*/ MatMultAdd_MPIAIJ, 2186 MatMultTranspose_MPIAIJ, 2187 MatMultTransposeAdd_MPIAIJ, 2188 #ifdef PETSC_HAVE_PBGL 2189 MatSolve_MPIAIJ, 2190 #else 2191 0, 2192 #endif 2193 0, 2194 0, 2195 /*10*/ 0, 2196 0, 2197 0, 2198 MatRelax_MPIAIJ, 2199 MatTranspose_MPIAIJ, 2200 /*15*/ MatGetInfo_MPIAIJ, 2201 MatEqual_MPIAIJ, 2202 MatGetDiagonal_MPIAIJ, 2203 MatDiagonalScale_MPIAIJ, 2204 MatNorm_MPIAIJ, 2205 /*20*/ MatAssemblyBegin_MPIAIJ, 2206 MatAssemblyEnd_MPIAIJ, 2207 0, 2208 MatSetOption_MPIAIJ, 2209 MatZeroEntries_MPIAIJ, 2210 /*25*/ MatZeroRows_MPIAIJ, 2211 0, 2212 #ifdef PETSC_HAVE_PBGL 2213 MatLUFactorNumeric_MPIAIJ, 2214 #else 2215 0, 2216 #endif 2217 0, 2218 0, 2219 /*30*/ MatSetUpPreallocation_MPIAIJ, 2220 #ifdef PETSC_HAVE_PBGL 2221 MatILUFactorSymbolic_MPIAIJ, 2222 #else 2223 0, 2224 #endif 2225 0, 2226 0, 2227 0, 2228 /*35*/ MatDuplicate_MPIAIJ, 2229 0, 2230 0, 2231 0, 2232 0, 2233 /*40*/ MatAXPY_MPIAIJ, 2234 MatGetSubMatrices_MPIAIJ, 2235 MatIncreaseOverlap_MPIAIJ, 2236 MatGetValues_MPIAIJ, 2237 MatCopy_MPIAIJ, 2238 /*45*/ 0, 2239 MatScale_MPIAIJ, 2240 0, 2241 0, 2242 0, 2243 /*50*/ MatSetBlockSize_MPIAIJ, 2244 0, 2245 0, 2246 0, 2247 0, 2248 /*55*/ MatFDColoringCreate_MPIAIJ, 2249 0, 2250 MatSetUnfactored_MPIAIJ, 2251 MatPermute_MPIAIJ, 2252 0, 2253 /*60*/ MatGetSubMatrix_MPIAIJ, 2254 MatDestroy_MPIAIJ, 2255 MatView_MPIAIJ, 2256 0, 2257 0, 2258 /*65*/ 0, 2259 0, 2260 0, 2261 0, 2262 0, 2263 /*70*/ 0, 2264 0, 2265 MatSetColoring_MPIAIJ, 2266 #if defined(PETSC_HAVE_ADIC) 2267 MatSetValuesAdic_MPIAIJ, 2268 #else 2269 0, 2270 #endif 2271 MatSetValuesAdifor_MPIAIJ, 2272 /*75*/ 0, 2273 0, 2274 0, 2275 0, 2276 0, 2277 /*80*/ 0, 2278 0, 2279 0, 2280 0, 2281 /*84*/ MatLoad_MPIAIJ, 2282 0, 2283 0, 2284 0, 2285 0, 2286 0, 2287 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2288 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2289 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2290 MatPtAP_Basic, 2291 MatPtAPSymbolic_MPIAIJ, 2292 /*95*/ MatPtAPNumeric_MPIAIJ, 2293 0, 2294 0, 2295 0, 2296 0, 2297 /*100*/0, 2298 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2299 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2300 MatConjugate_MPIAIJ, 2301 0, 2302 /*105*/MatSetValuesRow_MPIAIJ, 2303 MatRealPart_MPIAIJ, 2304 MatImaginaryPart_MPIAIJ, 2305 0, 2306 0, 2307 /*110*/0, 2308 MatGetRedundantMatrix_MPIAIJ}; 2309 2310 /* ----------------------------------------------------------------------------------------*/ 2311 2312 EXTERN_C_BEGIN 2313 #undef __FUNCT__ 2314 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2315 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2316 { 2317 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2318 PetscErrorCode ierr; 2319 2320 PetscFunctionBegin; 2321 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2322 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2323 PetscFunctionReturn(0); 2324 } 2325 EXTERN_C_END 2326 2327 EXTERN_C_BEGIN 2328 #undef __FUNCT__ 2329 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2330 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2331 { 2332 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2333 PetscErrorCode ierr; 2334 2335 PetscFunctionBegin; 2336 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2337 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2338 PetscFunctionReturn(0); 2339 } 2340 EXTERN_C_END 2341 2342 #include "petscpc.h" 2343 EXTERN_C_BEGIN 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2346 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2347 { 2348 Mat_MPIAIJ *b; 2349 PetscErrorCode ierr; 2350 PetscInt i; 2351 2352 PetscFunctionBegin; 2353 B->preallocated = PETSC_TRUE; 2354 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2355 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2356 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2357 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2358 2359 B->rmap.bs = B->cmap.bs = 1; 2360 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2361 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2362 if (d_nnz) { 2363 for (i=0; i<B->rmap.n; i++) { 2364 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2365 } 2366 } 2367 if (o_nnz) { 2368 for (i=0; i<B->rmap.n; i++) { 2369 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2370 } 2371 } 2372 b = (Mat_MPIAIJ*)B->data; 2373 2374 /* Explicitly create 2 MATSEQAIJ matrices. */ 2375 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2376 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2377 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2378 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2379 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2380 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2381 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2382 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2383 2384 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2385 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2386 2387 PetscFunctionReturn(0); 2388 } 2389 EXTERN_C_END 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2393 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2394 { 2395 Mat mat; 2396 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2397 PetscErrorCode ierr; 2398 2399 PetscFunctionBegin; 2400 *newmat = 0; 2401 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2402 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2403 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2404 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2405 a = (Mat_MPIAIJ*)mat->data; 2406 2407 mat->factor = matin->factor; 2408 mat->rmap.bs = matin->rmap.bs; 2409 mat->assembled = PETSC_TRUE; 2410 mat->insertmode = NOT_SET_VALUES; 2411 mat->preallocated = PETSC_TRUE; 2412 2413 a->size = oldmat->size; 2414 a->rank = oldmat->rank; 2415 a->donotstash = oldmat->donotstash; 2416 a->roworiented = oldmat->roworiented; 2417 a->rowindices = 0; 2418 a->rowvalues = 0; 2419 a->getrowactive = PETSC_FALSE; 2420 2421 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2422 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2423 2424 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2425 if (oldmat->colmap) { 2426 #if defined (PETSC_USE_CTABLE) 2427 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2428 #else 2429 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2430 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2431 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2432 #endif 2433 } else a->colmap = 0; 2434 if (oldmat->garray) { 2435 PetscInt len; 2436 len = oldmat->B->cmap.n; 2437 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2438 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2439 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2440 } else a->garray = 0; 2441 2442 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2443 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2444 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2445 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2446 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2447 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2448 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2449 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2450 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2451 *newmat = mat; 2452 PetscFunctionReturn(0); 2453 } 2454 2455 #include "petscsys.h" 2456 2457 #undef __FUNCT__ 2458 #define __FUNCT__ "MatLoad_MPIAIJ" 2459 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2460 { 2461 Mat A; 2462 PetscScalar *vals,*svals; 2463 MPI_Comm comm = ((PetscObject)viewer)->comm; 2464 MPI_Status status; 2465 PetscErrorCode ierr; 2466 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2467 PetscInt i,nz,j,rstart,rend,mmax; 2468 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2469 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2470 PetscInt cend,cstart,n,*rowners; 2471 int fd; 2472 2473 PetscFunctionBegin; 2474 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2475 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2476 if (!rank) { 2477 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2478 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2479 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2480 } 2481 2482 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2483 M = header[1]; N = header[2]; 2484 /* determine ownership of all rows */ 2485 m = M/size + ((M % size) > rank); 2486 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2487 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2488 2489 /* First process needs enough room for process with most rows */ 2490 if (!rank) { 2491 mmax = rowners[1]; 2492 for (i=2; i<size; i++) { 2493 mmax = PetscMax(mmax,rowners[i]); 2494 } 2495 } else mmax = m; 2496 2497 rowners[0] = 0; 2498 for (i=2; i<=size; i++) { 2499 rowners[i] += rowners[i-1]; 2500 } 2501 rstart = rowners[rank]; 2502 rend = rowners[rank+1]; 2503 2504 /* distribute row lengths to all processors */ 2505 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2506 if (!rank) { 2507 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2508 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2509 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2510 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2511 for (j=0; j<m; j++) { 2512 procsnz[0] += ourlens[j]; 2513 } 2514 for (i=1; i<size; i++) { 2515 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2516 /* calculate the number of nonzeros on each processor */ 2517 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2518 procsnz[i] += rowlengths[j]; 2519 } 2520 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2521 } 2522 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2523 } else { 2524 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2525 } 2526 2527 if (!rank) { 2528 /* determine max buffer needed and allocate it */ 2529 maxnz = 0; 2530 for (i=0; i<size; i++) { 2531 maxnz = PetscMax(maxnz,procsnz[i]); 2532 } 2533 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2534 2535 /* read in my part of the matrix column indices */ 2536 nz = procsnz[0]; 2537 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2538 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2539 2540 /* read in every one elses and ship off */ 2541 for (i=1; i<size; i++) { 2542 nz = procsnz[i]; 2543 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2544 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2545 } 2546 ierr = PetscFree(cols);CHKERRQ(ierr); 2547 } else { 2548 /* determine buffer space needed for message */ 2549 nz = 0; 2550 for (i=0; i<m; i++) { 2551 nz += ourlens[i]; 2552 } 2553 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2554 2555 /* receive message of column indices*/ 2556 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2557 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2558 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2559 } 2560 2561 /* determine column ownership if matrix is not square */ 2562 if (N != M) { 2563 n = N/size + ((N % size) > rank); 2564 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2565 cstart = cend - n; 2566 } else { 2567 cstart = rstart; 2568 cend = rend; 2569 n = cend - cstart; 2570 } 2571 2572 /* loop over local rows, determining number of off diagonal entries */ 2573 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2574 jj = 0; 2575 for (i=0; i<m; i++) { 2576 for (j=0; j<ourlens[i]; j++) { 2577 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2578 jj++; 2579 } 2580 } 2581 2582 /* create our matrix */ 2583 for (i=0; i<m; i++) { 2584 ourlens[i] -= offlens[i]; 2585 } 2586 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2587 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2588 ierr = MatSetType(A,type);CHKERRQ(ierr); 2589 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2590 2591 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2592 for (i=0; i<m; i++) { 2593 ourlens[i] += offlens[i]; 2594 } 2595 2596 if (!rank) { 2597 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2598 2599 /* read in my part of the matrix numerical values */ 2600 nz = procsnz[0]; 2601 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2602 2603 /* insert into matrix */ 2604 jj = rstart; 2605 smycols = mycols; 2606 svals = vals; 2607 for (i=0; i<m; i++) { 2608 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2609 smycols += ourlens[i]; 2610 svals += ourlens[i]; 2611 jj++; 2612 } 2613 2614 /* read in other processors and ship out */ 2615 for (i=1; i<size; i++) { 2616 nz = procsnz[i]; 2617 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2618 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2619 } 2620 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2621 } else { 2622 /* receive numeric values */ 2623 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2624 2625 /* receive message of values*/ 2626 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2627 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2628 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2629 2630 /* insert into matrix */ 2631 jj = rstart; 2632 smycols = mycols; 2633 svals = vals; 2634 for (i=0; i<m; i++) { 2635 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2636 smycols += ourlens[i]; 2637 svals += ourlens[i]; 2638 jj++; 2639 } 2640 } 2641 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2642 ierr = PetscFree(vals);CHKERRQ(ierr); 2643 ierr = PetscFree(mycols);CHKERRQ(ierr); 2644 ierr = PetscFree(rowners);CHKERRQ(ierr); 2645 2646 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2647 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2648 *newmat = A; 2649 PetscFunctionReturn(0); 2650 } 2651 2652 #undef __FUNCT__ 2653 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2654 /* 2655 Not great since it makes two copies of the submatrix, first an SeqAIJ 2656 in local and then by concatenating the local matrices the end result. 2657 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2658 */ 2659 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2660 { 2661 PetscErrorCode ierr; 2662 PetscMPIInt rank,size; 2663 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2664 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2665 Mat *local,M,Mreuse; 2666 PetscScalar *vwork,*aa; 2667 MPI_Comm comm = mat->comm; 2668 Mat_SeqAIJ *aij; 2669 2670 2671 PetscFunctionBegin; 2672 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2673 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2674 2675 if (call == MAT_REUSE_MATRIX) { 2676 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2677 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2678 local = &Mreuse; 2679 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2680 } else { 2681 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2682 Mreuse = *local; 2683 ierr = PetscFree(local);CHKERRQ(ierr); 2684 } 2685 2686 /* 2687 m - number of local rows 2688 n - number of columns (same on all processors) 2689 rstart - first row in new global matrix generated 2690 */ 2691 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2692 if (call == MAT_INITIAL_MATRIX) { 2693 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2694 ii = aij->i; 2695 jj = aij->j; 2696 2697 /* 2698 Determine the number of non-zeros in the diagonal and off-diagonal 2699 portions of the matrix in order to do correct preallocation 2700 */ 2701 2702 /* first get start and end of "diagonal" columns */ 2703 if (csize == PETSC_DECIDE) { 2704 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2705 if (mglobal == n) { /* square matrix */ 2706 nlocal = m; 2707 } else { 2708 nlocal = n/size + ((n % size) > rank); 2709 } 2710 } else { 2711 nlocal = csize; 2712 } 2713 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2714 rstart = rend - nlocal; 2715 if (rank == size - 1 && rend != n) { 2716 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2717 } 2718 2719 /* next, compute all the lengths */ 2720 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2721 olens = dlens + m; 2722 for (i=0; i<m; i++) { 2723 jend = ii[i+1] - ii[i]; 2724 olen = 0; 2725 dlen = 0; 2726 for (j=0; j<jend; j++) { 2727 if (*jj < rstart || *jj >= rend) olen++; 2728 else dlen++; 2729 jj++; 2730 } 2731 olens[i] = olen; 2732 dlens[i] = dlen; 2733 } 2734 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2735 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2736 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2737 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2738 ierr = PetscFree(dlens);CHKERRQ(ierr); 2739 } else { 2740 PetscInt ml,nl; 2741 2742 M = *newmat; 2743 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2744 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2745 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2746 /* 2747 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2748 rather than the slower MatSetValues(). 2749 */ 2750 M->was_assembled = PETSC_TRUE; 2751 M->assembled = PETSC_FALSE; 2752 } 2753 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2754 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2755 ii = aij->i; 2756 jj = aij->j; 2757 aa = aij->a; 2758 for (i=0; i<m; i++) { 2759 row = rstart + i; 2760 nz = ii[i+1] - ii[i]; 2761 cwork = jj; jj += nz; 2762 vwork = aa; aa += nz; 2763 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2764 } 2765 2766 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2767 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2768 *newmat = M; 2769 2770 /* save submatrix used in processor for next request */ 2771 if (call == MAT_INITIAL_MATRIX) { 2772 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2773 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2774 } 2775 2776 PetscFunctionReturn(0); 2777 } 2778 2779 EXTERN_C_BEGIN 2780 #undef __FUNCT__ 2781 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2782 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2783 { 2784 PetscInt m,cstart, cend,j,nnz,i,d; 2785 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2786 const PetscInt *JJ; 2787 PetscScalar *values; 2788 PetscErrorCode ierr; 2789 2790 PetscFunctionBegin; 2791 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2792 2793 B->rmap.bs = B->cmap.bs = 1; 2794 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2795 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2796 m = B->rmap.n; 2797 cstart = B->cmap.rstart; 2798 cend = B->cmap.rend; 2799 rstart = B->rmap.rstart; 2800 2801 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2802 o_nnz = d_nnz + m; 2803 2804 for (i=0; i<m; i++) { 2805 nnz = Ii[i+1]- Ii[i]; 2806 JJ = J + Ii[i]; 2807 nnz_max = PetscMax(nnz_max,nnz); 2808 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2809 for (j=0; j<nnz; j++) { 2810 if (*JJ >= cstart) break; 2811 JJ++; 2812 } 2813 d = 0; 2814 for (; j<nnz; j++) { 2815 if (*JJ++ >= cend) break; 2816 d++; 2817 } 2818 d_nnz[i] = d; 2819 o_nnz[i] = nnz - d; 2820 } 2821 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2822 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2823 2824 if (v) values = (PetscScalar*)v; 2825 else { 2826 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2827 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2828 } 2829 2830 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2831 for (i=0; i<m; i++) { 2832 ii = i + rstart; 2833 nnz = Ii[i+1]- Ii[i]; 2834 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2835 } 2836 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2837 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2838 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2839 2840 if (!v) { 2841 ierr = PetscFree(values);CHKERRQ(ierr); 2842 } 2843 PetscFunctionReturn(0); 2844 } 2845 EXTERN_C_END 2846 2847 #undef __FUNCT__ 2848 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2849 /*@ 2850 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2851 (the default parallel PETSc format). 2852 2853 Collective on MPI_Comm 2854 2855 Input Parameters: 2856 + B - the matrix 2857 . i - the indices into j for the start of each local row (starts with zero) 2858 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2859 - v - optional values in the matrix 2860 2861 Level: developer 2862 2863 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2864 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2865 2866 .keywords: matrix, aij, compressed row, sparse, parallel 2867 2868 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2869 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2870 @*/ 2871 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2872 { 2873 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2874 2875 PetscFunctionBegin; 2876 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2877 if (f) { 2878 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2879 } 2880 PetscFunctionReturn(0); 2881 } 2882 2883 #undef __FUNCT__ 2884 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2885 /*@C 2886 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2887 (the default parallel PETSc format). For good matrix assembly performance 2888 the user should preallocate the matrix storage by setting the parameters 2889 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2890 performance can be increased by more than a factor of 50. 2891 2892 Collective on MPI_Comm 2893 2894 Input Parameters: 2895 + A - the matrix 2896 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2897 (same value is used for all local rows) 2898 . d_nnz - array containing the number of nonzeros in the various rows of the 2899 DIAGONAL portion of the local submatrix (possibly different for each row) 2900 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2901 The size of this array is equal to the number of local rows, i.e 'm'. 2902 You must leave room for the diagonal entry even if it is zero. 2903 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2904 submatrix (same value is used for all local rows). 2905 - o_nnz - array containing the number of nonzeros in the various rows of the 2906 OFF-DIAGONAL portion of the local submatrix (possibly different for 2907 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2908 structure. The size of this array is equal to the number 2909 of local rows, i.e 'm'. 2910 2911 If the *_nnz parameter is given then the *_nz parameter is ignored 2912 2913 The AIJ format (also called the Yale sparse matrix format or 2914 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2915 storage. The stored row and column indices begin with zero. See the users manual for details. 2916 2917 The parallel matrix is partitioned such that the first m0 rows belong to 2918 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2919 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2920 2921 The DIAGONAL portion of the local submatrix of a processor can be defined 2922 as the submatrix which is obtained by extraction the part corresponding 2923 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2924 first row that belongs to the processor, and r2 is the last row belonging 2925 to the this processor. This is a square mxm matrix. The remaining portion 2926 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2927 2928 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2929 2930 Example usage: 2931 2932 Consider the following 8x8 matrix with 34 non-zero values, that is 2933 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2934 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2935 as follows: 2936 2937 .vb 2938 1 2 0 | 0 3 0 | 0 4 2939 Proc0 0 5 6 | 7 0 0 | 8 0 2940 9 0 10 | 11 0 0 | 12 0 2941 ------------------------------------- 2942 13 0 14 | 15 16 17 | 0 0 2943 Proc1 0 18 0 | 19 20 21 | 0 0 2944 0 0 0 | 22 23 0 | 24 0 2945 ------------------------------------- 2946 Proc2 25 26 27 | 0 0 28 | 29 0 2947 30 0 0 | 31 32 33 | 0 34 2948 .ve 2949 2950 This can be represented as a collection of submatrices as: 2951 2952 .vb 2953 A B C 2954 D E F 2955 G H I 2956 .ve 2957 2958 Where the submatrices A,B,C are owned by proc0, D,E,F are 2959 owned by proc1, G,H,I are owned by proc2. 2960 2961 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2962 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2963 The 'M','N' parameters are 8,8, and have the same values on all procs. 2964 2965 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2966 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2967 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2968 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2969 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2970 matrix, ans [DF] as another SeqAIJ matrix. 2971 2972 When d_nz, o_nz parameters are specified, d_nz storage elements are 2973 allocated for every row of the local diagonal submatrix, and o_nz 2974 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2975 One way to choose d_nz and o_nz is to use the max nonzerors per local 2976 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2977 In this case, the values of d_nz,o_nz are: 2978 .vb 2979 proc0 : dnz = 2, o_nz = 2 2980 proc1 : dnz = 3, o_nz = 2 2981 proc2 : dnz = 1, o_nz = 4 2982 .ve 2983 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2984 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2985 for proc3. i.e we are using 12+15+10=37 storage locations to store 2986 34 values. 2987 2988 When d_nnz, o_nnz parameters are specified, the storage is specified 2989 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2990 In the above case the values for d_nnz,o_nnz are: 2991 .vb 2992 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2993 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2994 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2995 .ve 2996 Here the space allocated is sum of all the above values i.e 34, and 2997 hence pre-allocation is perfect. 2998 2999 Level: intermediate 3000 3001 .keywords: matrix, aij, compressed row, sparse, parallel 3002 3003 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3004 MPIAIJ 3005 @*/ 3006 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3007 { 3008 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3009 3010 PetscFunctionBegin; 3011 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3012 if (f) { 3013 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3014 } 3015 PetscFunctionReturn(0); 3016 } 3017 3018 #undef __FUNCT__ 3019 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3020 /*@C 3021 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3022 CSR format the local rows. 3023 3024 Collective on MPI_Comm 3025 3026 Input Parameters: 3027 + comm - MPI communicator 3028 . m - number of local rows (Cannot be PETSC_DECIDE) 3029 . n - This value should be the same as the local size used in creating the 3030 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3031 calculated if N is given) For square matrices n is almost always m. 3032 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3033 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3034 . i - row indices 3035 . j - column indices 3036 - a - matrix values 3037 3038 Output Parameter: 3039 . mat - the matrix 3040 3041 Level: intermediate 3042 3043 Notes: 3044 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3045 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3046 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3047 3048 The i and j indices are 0 based 3049 3050 .keywords: matrix, aij, compressed row, sparse, parallel 3051 3052 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3053 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3054 @*/ 3055 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3056 { 3057 PetscErrorCode ierr; 3058 3059 PetscFunctionBegin; 3060 if (i[0]) { 3061 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3062 } 3063 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3064 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3065 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3066 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3067 PetscFunctionReturn(0); 3068 } 3069 3070 #undef __FUNCT__ 3071 #define __FUNCT__ "MatCreateMPIAIJ" 3072 /*@C 3073 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3074 (the default parallel PETSc format). For good matrix assembly performance 3075 the user should preallocate the matrix storage by setting the parameters 3076 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3077 performance can be increased by more than a factor of 50. 3078 3079 Collective on MPI_Comm 3080 3081 Input Parameters: 3082 + comm - MPI communicator 3083 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3084 This value should be the same as the local size used in creating the 3085 y vector for the matrix-vector product y = Ax. 3086 . n - This value should be the same as the local size used in creating the 3087 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3088 calculated if N is given) For square matrices n is almost always m. 3089 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3090 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3091 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3092 (same value is used for all local rows) 3093 . d_nnz - array containing the number of nonzeros in the various rows of the 3094 DIAGONAL portion of the local submatrix (possibly different for each row) 3095 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3096 The size of this array is equal to the number of local rows, i.e 'm'. 3097 You must leave room for the diagonal entry even if it is zero. 3098 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3099 submatrix (same value is used for all local rows). 3100 - o_nnz - array containing the number of nonzeros in the various rows of the 3101 OFF-DIAGONAL portion of the local submatrix (possibly different for 3102 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3103 structure. The size of this array is equal to the number 3104 of local rows, i.e 'm'. 3105 3106 Output Parameter: 3107 . A - the matrix 3108 3109 Notes: 3110 If the *_nnz parameter is given then the *_nz parameter is ignored 3111 3112 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3113 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3114 storage requirements for this matrix. 3115 3116 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3117 processor than it must be used on all processors that share the object for 3118 that argument. 3119 3120 The user MUST specify either the local or global matrix dimensions 3121 (possibly both). 3122 3123 The parallel matrix is partitioned such that the first m0 rows belong to 3124 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3125 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3126 3127 The DIAGONAL portion of the local submatrix of a processor can be defined 3128 as the submatrix which is obtained by extraction the part corresponding 3129 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3130 first row that belongs to the processor, and r2 is the last row belonging 3131 to the this processor. This is a square mxm matrix. The remaining portion 3132 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3133 3134 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3135 3136 When calling this routine with a single process communicator, a matrix of 3137 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3138 type of communicator, use the construction mechanism: 3139 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3140 3141 By default, this format uses inodes (identical nodes) when possible. 3142 We search for consecutive rows with the same nonzero structure, thereby 3143 reusing matrix information to achieve increased efficiency. 3144 3145 Options Database Keys: 3146 + -mat_no_inode - Do not use inodes 3147 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3148 - -mat_aij_oneindex - Internally use indexing starting at 1 3149 rather than 0. Note that when calling MatSetValues(), 3150 the user still MUST index entries starting at 0! 3151 3152 3153 Example usage: 3154 3155 Consider the following 8x8 matrix with 34 non-zero values, that is 3156 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3157 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3158 as follows: 3159 3160 .vb 3161 1 2 0 | 0 3 0 | 0 4 3162 Proc0 0 5 6 | 7 0 0 | 8 0 3163 9 0 10 | 11 0 0 | 12 0 3164 ------------------------------------- 3165 13 0 14 | 15 16 17 | 0 0 3166 Proc1 0 18 0 | 19 20 21 | 0 0 3167 0 0 0 | 22 23 0 | 24 0 3168 ------------------------------------- 3169 Proc2 25 26 27 | 0 0 28 | 29 0 3170 30 0 0 | 31 32 33 | 0 34 3171 .ve 3172 3173 This can be represented as a collection of submatrices as: 3174 3175 .vb 3176 A B C 3177 D E F 3178 G H I 3179 .ve 3180 3181 Where the submatrices A,B,C are owned by proc0, D,E,F are 3182 owned by proc1, G,H,I are owned by proc2. 3183 3184 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3185 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3186 The 'M','N' parameters are 8,8, and have the same values on all procs. 3187 3188 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3189 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3190 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3191 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3192 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3193 matrix, ans [DF] as another SeqAIJ matrix. 3194 3195 When d_nz, o_nz parameters are specified, d_nz storage elements are 3196 allocated for every row of the local diagonal submatrix, and o_nz 3197 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3198 One way to choose d_nz and o_nz is to use the max nonzerors per local 3199 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3200 In this case, the values of d_nz,o_nz are: 3201 .vb 3202 proc0 : dnz = 2, o_nz = 2 3203 proc1 : dnz = 3, o_nz = 2 3204 proc2 : dnz = 1, o_nz = 4 3205 .ve 3206 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3207 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3208 for proc3. i.e we are using 12+15+10=37 storage locations to store 3209 34 values. 3210 3211 When d_nnz, o_nnz parameters are specified, the storage is specified 3212 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3213 In the above case the values for d_nnz,o_nnz are: 3214 .vb 3215 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3216 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3217 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3218 .ve 3219 Here the space allocated is sum of all the above values i.e 34, and 3220 hence pre-allocation is perfect. 3221 3222 Level: intermediate 3223 3224 .keywords: matrix, aij, compressed row, sparse, parallel 3225 3226 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3227 MPIAIJ, MatCreateMPIAIJWithArrays() 3228 @*/ 3229 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3230 { 3231 PetscErrorCode ierr; 3232 PetscMPIInt size; 3233 3234 PetscFunctionBegin; 3235 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3236 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3237 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3238 if (size > 1) { 3239 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3240 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3241 } else { 3242 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3243 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3244 } 3245 PetscFunctionReturn(0); 3246 } 3247 3248 #undef __FUNCT__ 3249 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3250 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3251 { 3252 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3253 3254 PetscFunctionBegin; 3255 *Ad = a->A; 3256 *Ao = a->B; 3257 *colmap = a->garray; 3258 PetscFunctionReturn(0); 3259 } 3260 3261 #undef __FUNCT__ 3262 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3263 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3264 { 3265 PetscErrorCode ierr; 3266 PetscInt i; 3267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3268 3269 PetscFunctionBegin; 3270 if (coloring->ctype == IS_COLORING_GLOBAL) { 3271 ISColoringValue *allcolors,*colors; 3272 ISColoring ocoloring; 3273 3274 /* set coloring for diagonal portion */ 3275 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3276 3277 /* set coloring for off-diagonal portion */ 3278 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3279 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3280 for (i=0; i<a->B->cmap.n; i++) { 3281 colors[i] = allcolors[a->garray[i]]; 3282 } 3283 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3284 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3285 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3286 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3287 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3288 ISColoringValue *colors; 3289 PetscInt *larray; 3290 ISColoring ocoloring; 3291 3292 /* set coloring for diagonal portion */ 3293 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3294 for (i=0; i<a->A->cmap.n; i++) { 3295 larray[i] = i + A->cmap.rstart; 3296 } 3297 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3298 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3299 for (i=0; i<a->A->cmap.n; i++) { 3300 colors[i] = coloring->colors[larray[i]]; 3301 } 3302 ierr = PetscFree(larray);CHKERRQ(ierr); 3303 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3304 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3305 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3306 3307 /* set coloring for off-diagonal portion */ 3308 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3309 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3310 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3311 for (i=0; i<a->B->cmap.n; i++) { 3312 colors[i] = coloring->colors[larray[i]]; 3313 } 3314 ierr = PetscFree(larray);CHKERRQ(ierr); 3315 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3316 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3317 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3318 } else { 3319 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3320 } 3321 3322 PetscFunctionReturn(0); 3323 } 3324 3325 #if defined(PETSC_HAVE_ADIC) 3326 #undef __FUNCT__ 3327 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3328 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3329 { 3330 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3331 PetscErrorCode ierr; 3332 3333 PetscFunctionBegin; 3334 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3335 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3336 PetscFunctionReturn(0); 3337 } 3338 #endif 3339 3340 #undef __FUNCT__ 3341 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3342 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3343 { 3344 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3345 PetscErrorCode ierr; 3346 3347 PetscFunctionBegin; 3348 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3349 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3350 PetscFunctionReturn(0); 3351 } 3352 3353 #undef __FUNCT__ 3354 #define __FUNCT__ "MatMerge" 3355 /*@C 3356 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3357 matrices from each processor 3358 3359 Collective on MPI_Comm 3360 3361 Input Parameters: 3362 + comm - the communicators the parallel matrix will live on 3363 . inmat - the input sequential matrices 3364 . n - number of local columns (or PETSC_DECIDE) 3365 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3366 3367 Output Parameter: 3368 . outmat - the parallel matrix generated 3369 3370 Level: advanced 3371 3372 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3373 3374 @*/ 3375 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3376 { 3377 PetscErrorCode ierr; 3378 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3379 PetscInt *indx; 3380 PetscScalar *values; 3381 3382 PetscFunctionBegin; 3383 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3384 if (scall == MAT_INITIAL_MATRIX){ 3385 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3386 if (n == PETSC_DECIDE){ 3387 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3388 } 3389 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3390 rstart -= m; 3391 3392 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3393 for (i=0;i<m;i++) { 3394 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3395 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3396 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3397 } 3398 /* This routine will ONLY return MPIAIJ type matrix */ 3399 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3400 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3401 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3402 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3403 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3404 3405 } else if (scall == MAT_REUSE_MATRIX){ 3406 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3407 } else { 3408 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3409 } 3410 3411 for (i=0;i<m;i++) { 3412 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3413 Ii = i + rstart; 3414 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3415 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3416 } 3417 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3418 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3419 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3420 3421 PetscFunctionReturn(0); 3422 } 3423 3424 #undef __FUNCT__ 3425 #define __FUNCT__ "MatFileSplit" 3426 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3427 { 3428 PetscErrorCode ierr; 3429 PetscMPIInt rank; 3430 PetscInt m,N,i,rstart,nnz; 3431 size_t len; 3432 const PetscInt *indx; 3433 PetscViewer out; 3434 char *name; 3435 Mat B; 3436 const PetscScalar *values; 3437 3438 PetscFunctionBegin; 3439 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3440 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3441 /* Should this be the type of the diagonal block of A? */ 3442 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3443 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3444 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3445 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3446 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3447 for (i=0;i<m;i++) { 3448 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3449 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3450 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3451 } 3452 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3453 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3454 3455 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3456 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3457 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3458 sprintf(name,"%s.%d",outfile,rank); 3459 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3460 ierr = PetscFree(name); 3461 ierr = MatView(B,out);CHKERRQ(ierr); 3462 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3463 ierr = MatDestroy(B);CHKERRQ(ierr); 3464 PetscFunctionReturn(0); 3465 } 3466 3467 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3468 #undef __FUNCT__ 3469 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3470 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3471 { 3472 PetscErrorCode ierr; 3473 Mat_Merge_SeqsToMPI *merge; 3474 PetscContainer container; 3475 3476 PetscFunctionBegin; 3477 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3478 if (container) { 3479 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3480 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3481 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3482 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3483 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3484 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3485 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3486 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3487 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3488 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3489 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3490 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3491 3492 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3493 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3494 } 3495 ierr = PetscFree(merge);CHKERRQ(ierr); 3496 3497 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3498 PetscFunctionReturn(0); 3499 } 3500 3501 #include "src/mat/utils/freespace.h" 3502 #include "petscbt.h" 3503 static PetscEvent logkey_seqstompinum = 0; 3504 #undef __FUNCT__ 3505 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3506 /*@C 3507 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3508 matrices from each processor 3509 3510 Collective on MPI_Comm 3511 3512 Input Parameters: 3513 + comm - the communicators the parallel matrix will live on 3514 . seqmat - the input sequential matrices 3515 . m - number of local rows (or PETSC_DECIDE) 3516 . n - number of local columns (or PETSC_DECIDE) 3517 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3518 3519 Output Parameter: 3520 . mpimat - the parallel matrix generated 3521 3522 Level: advanced 3523 3524 Notes: 3525 The dimensions of the sequential matrix in each processor MUST be the same. 3526 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3527 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3528 @*/ 3529 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3530 { 3531 PetscErrorCode ierr; 3532 MPI_Comm comm=mpimat->comm; 3533 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3534 PetscMPIInt size,rank,taga,*len_s; 3535 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3536 PetscInt proc,m; 3537 PetscInt **buf_ri,**buf_rj; 3538 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3539 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3540 MPI_Request *s_waits,*r_waits; 3541 MPI_Status *status; 3542 MatScalar *aa=a->a,**abuf_r,*ba_i; 3543 Mat_Merge_SeqsToMPI *merge; 3544 PetscContainer container; 3545 3546 PetscFunctionBegin; 3547 if (!logkey_seqstompinum) { 3548 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3549 } 3550 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3551 3552 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3553 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3554 3555 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3556 if (container) { 3557 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3558 } 3559 bi = merge->bi; 3560 bj = merge->bj; 3561 buf_ri = merge->buf_ri; 3562 buf_rj = merge->buf_rj; 3563 3564 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3565 owners = merge->rowmap.range; 3566 len_s = merge->len_s; 3567 3568 /* send and recv matrix values */ 3569 /*-----------------------------*/ 3570 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3571 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3572 3573 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3574 for (proc=0,k=0; proc<size; proc++){ 3575 if (!len_s[proc]) continue; 3576 i = owners[proc]; 3577 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3578 k++; 3579 } 3580 3581 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3582 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3583 ierr = PetscFree(status);CHKERRQ(ierr); 3584 3585 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3586 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3587 3588 /* insert mat values of mpimat */ 3589 /*----------------------------*/ 3590 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3591 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3592 nextrow = buf_ri_k + merge->nrecv; 3593 nextai = nextrow + merge->nrecv; 3594 3595 for (k=0; k<merge->nrecv; k++){ 3596 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3597 nrows = *(buf_ri_k[k]); 3598 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3599 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3600 } 3601 3602 /* set values of ba */ 3603 m = merge->rowmap.n; 3604 for (i=0; i<m; i++) { 3605 arow = owners[rank] + i; 3606 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3607 bnzi = bi[i+1] - bi[i]; 3608 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3609 3610 /* add local non-zero vals of this proc's seqmat into ba */ 3611 anzi = ai[arow+1] - ai[arow]; 3612 aj = a->j + ai[arow]; 3613 aa = a->a + ai[arow]; 3614 nextaj = 0; 3615 for (j=0; nextaj<anzi; j++){ 3616 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3617 ba_i[j] += aa[nextaj++]; 3618 } 3619 } 3620 3621 /* add received vals into ba */ 3622 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3623 /* i-th row */ 3624 if (i == *nextrow[k]) { 3625 anzi = *(nextai[k]+1) - *nextai[k]; 3626 aj = buf_rj[k] + *(nextai[k]); 3627 aa = abuf_r[k] + *(nextai[k]); 3628 nextaj = 0; 3629 for (j=0; nextaj<anzi; j++){ 3630 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3631 ba_i[j] += aa[nextaj++]; 3632 } 3633 } 3634 nextrow[k]++; nextai[k]++; 3635 } 3636 } 3637 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3638 } 3639 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3640 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3641 3642 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3643 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3644 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3645 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3646 PetscFunctionReturn(0); 3647 } 3648 3649 static PetscEvent logkey_seqstompisym = 0; 3650 #undef __FUNCT__ 3651 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3652 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3653 { 3654 PetscErrorCode ierr; 3655 Mat B_mpi; 3656 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3657 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3658 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3659 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3660 PetscInt len,proc,*dnz,*onz; 3661 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3662 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3663 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3664 MPI_Status *status; 3665 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3666 PetscBT lnkbt; 3667 Mat_Merge_SeqsToMPI *merge; 3668 PetscContainer container; 3669 3670 PetscFunctionBegin; 3671 if (!logkey_seqstompisym) { 3672 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3673 } 3674 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3675 3676 /* make sure it is a PETSc comm */ 3677 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3678 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3679 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3680 3681 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3682 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3683 3684 /* determine row ownership */ 3685 /*---------------------------------------------------------*/ 3686 merge->rowmap.n = m; 3687 merge->rowmap.N = M; 3688 merge->rowmap.bs = 1; 3689 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 3690 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3691 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3692 3693 m = merge->rowmap.n; 3694 M = merge->rowmap.N; 3695 owners = merge->rowmap.range; 3696 3697 /* determine the number of messages to send, their lengths */ 3698 /*---------------------------------------------------------*/ 3699 len_s = merge->len_s; 3700 3701 len = 0; /* length of buf_si[] */ 3702 merge->nsend = 0; 3703 for (proc=0; proc<size; proc++){ 3704 len_si[proc] = 0; 3705 if (proc == rank){ 3706 len_s[proc] = 0; 3707 } else { 3708 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3709 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3710 } 3711 if (len_s[proc]) { 3712 merge->nsend++; 3713 nrows = 0; 3714 for (i=owners[proc]; i<owners[proc+1]; i++){ 3715 if (ai[i+1] > ai[i]) nrows++; 3716 } 3717 len_si[proc] = 2*(nrows+1); 3718 len += len_si[proc]; 3719 } 3720 } 3721 3722 /* determine the number and length of messages to receive for ij-structure */ 3723 /*-------------------------------------------------------------------------*/ 3724 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3725 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3726 3727 /* post the Irecv of j-structure */ 3728 /*-------------------------------*/ 3729 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3730 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3731 3732 /* post the Isend of j-structure */ 3733 /*--------------------------------*/ 3734 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3735 sj_waits = si_waits + merge->nsend; 3736 3737 for (proc=0, k=0; proc<size; proc++){ 3738 if (!len_s[proc]) continue; 3739 i = owners[proc]; 3740 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3741 k++; 3742 } 3743 3744 /* receives and sends of j-structure are complete */ 3745 /*------------------------------------------------*/ 3746 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3747 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3748 3749 /* send and recv i-structure */ 3750 /*---------------------------*/ 3751 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3752 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3753 3754 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3755 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3756 for (proc=0,k=0; proc<size; proc++){ 3757 if (!len_s[proc]) continue; 3758 /* form outgoing message for i-structure: 3759 buf_si[0]: nrows to be sent 3760 [1:nrows]: row index (global) 3761 [nrows+1:2*nrows+1]: i-structure index 3762 */ 3763 /*-------------------------------------------*/ 3764 nrows = len_si[proc]/2 - 1; 3765 buf_si_i = buf_si + nrows+1; 3766 buf_si[0] = nrows; 3767 buf_si_i[0] = 0; 3768 nrows = 0; 3769 for (i=owners[proc]; i<owners[proc+1]; i++){ 3770 anzi = ai[i+1] - ai[i]; 3771 if (anzi) { 3772 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3773 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3774 nrows++; 3775 } 3776 } 3777 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3778 k++; 3779 buf_si += len_si[proc]; 3780 } 3781 3782 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3783 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3784 3785 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3786 for (i=0; i<merge->nrecv; i++){ 3787 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3788 } 3789 3790 ierr = PetscFree(len_si);CHKERRQ(ierr); 3791 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3792 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3793 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3794 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3795 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3796 ierr = PetscFree(status);CHKERRQ(ierr); 3797 3798 /* compute a local seq matrix in each processor */ 3799 /*----------------------------------------------*/ 3800 /* allocate bi array and free space for accumulating nonzero column info */ 3801 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3802 bi[0] = 0; 3803 3804 /* create and initialize a linked list */ 3805 nlnk = N+1; 3806 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3807 3808 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3809 len = 0; 3810 len = ai[owners[rank+1]] - ai[owners[rank]]; 3811 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3812 current_space = free_space; 3813 3814 /* determine symbolic info for each local row */ 3815 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3816 nextrow = buf_ri_k + merge->nrecv; 3817 nextai = nextrow + merge->nrecv; 3818 for (k=0; k<merge->nrecv; k++){ 3819 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3820 nrows = *buf_ri_k[k]; 3821 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3822 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3823 } 3824 3825 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3826 len = 0; 3827 for (i=0;i<m;i++) { 3828 bnzi = 0; 3829 /* add local non-zero cols of this proc's seqmat into lnk */ 3830 arow = owners[rank] + i; 3831 anzi = ai[arow+1] - ai[arow]; 3832 aj = a->j + ai[arow]; 3833 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3834 bnzi += nlnk; 3835 /* add received col data into lnk */ 3836 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3837 if (i == *nextrow[k]) { /* i-th row */ 3838 anzi = *(nextai[k]+1) - *nextai[k]; 3839 aj = buf_rj[k] + *nextai[k]; 3840 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3841 bnzi += nlnk; 3842 nextrow[k]++; nextai[k]++; 3843 } 3844 } 3845 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3846 3847 /* if free space is not available, make more free space */ 3848 if (current_space->local_remaining<bnzi) { 3849 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3850 nspacedouble++; 3851 } 3852 /* copy data into free space, then initialize lnk */ 3853 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3854 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3855 3856 current_space->array += bnzi; 3857 current_space->local_used += bnzi; 3858 current_space->local_remaining -= bnzi; 3859 3860 bi[i+1] = bi[i] + bnzi; 3861 } 3862 3863 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3864 3865 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3866 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3867 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3868 3869 /* create symbolic parallel matrix B_mpi */ 3870 /*---------------------------------------*/ 3871 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3872 if (n==PETSC_DECIDE) { 3873 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3874 } else { 3875 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3876 } 3877 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3878 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3879 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3880 3881 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3882 B_mpi->assembled = PETSC_FALSE; 3883 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3884 merge->bi = bi; 3885 merge->bj = bj; 3886 merge->buf_ri = buf_ri; 3887 merge->buf_rj = buf_rj; 3888 merge->coi = PETSC_NULL; 3889 merge->coj = PETSC_NULL; 3890 merge->owners_co = PETSC_NULL; 3891 3892 /* attach the supporting struct to B_mpi for reuse */ 3893 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3894 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 3895 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3896 *mpimat = B_mpi; 3897 3898 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3899 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3900 PetscFunctionReturn(0); 3901 } 3902 3903 static PetscEvent logkey_seqstompi = 0; 3904 #undef __FUNCT__ 3905 #define __FUNCT__ "MatMerge_SeqsToMPI" 3906 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3907 { 3908 PetscErrorCode ierr; 3909 3910 PetscFunctionBegin; 3911 if (!logkey_seqstompi) { 3912 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3913 } 3914 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3915 if (scall == MAT_INITIAL_MATRIX){ 3916 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3917 } 3918 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3919 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3920 PetscFunctionReturn(0); 3921 } 3922 static PetscEvent logkey_getlocalmat = 0; 3923 #undef __FUNCT__ 3924 #define __FUNCT__ "MatGetLocalMat" 3925 /*@C 3926 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3927 3928 Not Collective 3929 3930 Input Parameters: 3931 + A - the matrix 3932 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3933 3934 Output Parameter: 3935 . A_loc - the local sequential matrix generated 3936 3937 Level: developer 3938 3939 @*/ 3940 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3941 { 3942 PetscErrorCode ierr; 3943 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3944 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3945 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3946 PetscScalar *aa=a->a,*ba=b->a,*ca; 3947 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3948 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3949 3950 PetscFunctionBegin; 3951 if (!logkey_getlocalmat) { 3952 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3953 } 3954 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3955 if (scall == MAT_INITIAL_MATRIX){ 3956 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3957 ci[0] = 0; 3958 for (i=0; i<am; i++){ 3959 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3960 } 3961 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3962 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3963 k = 0; 3964 for (i=0; i<am; i++) { 3965 ncols_o = bi[i+1] - bi[i]; 3966 ncols_d = ai[i+1] - ai[i]; 3967 /* off-diagonal portion of A */ 3968 for (jo=0; jo<ncols_o; jo++) { 3969 col = cmap[*bj]; 3970 if (col >= cstart) break; 3971 cj[k] = col; bj++; 3972 ca[k++] = *ba++; 3973 } 3974 /* diagonal portion of A */ 3975 for (j=0; j<ncols_d; j++) { 3976 cj[k] = cstart + *aj++; 3977 ca[k++] = *aa++; 3978 } 3979 /* off-diagonal portion of A */ 3980 for (j=jo; j<ncols_o; j++) { 3981 cj[k] = cmap[*bj++]; 3982 ca[k++] = *ba++; 3983 } 3984 } 3985 /* put together the new matrix */ 3986 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3987 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3988 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3989 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3990 mat->free_a = PETSC_TRUE; 3991 mat->free_ij = PETSC_TRUE; 3992 mat->nonew = 0; 3993 } else if (scall == MAT_REUSE_MATRIX){ 3994 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3995 ci = mat->i; cj = mat->j; ca = mat->a; 3996 for (i=0; i<am; i++) { 3997 /* off-diagonal portion of A */ 3998 ncols_o = bi[i+1] - bi[i]; 3999 for (jo=0; jo<ncols_o; jo++) { 4000 col = cmap[*bj]; 4001 if (col >= cstart) break; 4002 *ca++ = *ba++; bj++; 4003 } 4004 /* diagonal portion of A */ 4005 ncols_d = ai[i+1] - ai[i]; 4006 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 4007 /* off-diagonal portion of A */ 4008 for (j=jo; j<ncols_o; j++) { 4009 *ca++ = *ba++; bj++; 4010 } 4011 } 4012 } else { 4013 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4014 } 4015 4016 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 4017 PetscFunctionReturn(0); 4018 } 4019 4020 static PetscEvent logkey_getlocalmatcondensed = 0; 4021 #undef __FUNCT__ 4022 #define __FUNCT__ "MatGetLocalMatCondensed" 4023 /*@C 4024 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4025 4026 Not Collective 4027 4028 Input Parameters: 4029 + A - the matrix 4030 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4031 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4032 4033 Output Parameter: 4034 . A_loc - the local sequential matrix generated 4035 4036 Level: developer 4037 4038 @*/ 4039 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4040 { 4041 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4042 PetscErrorCode ierr; 4043 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4044 IS isrowa,iscola; 4045 Mat *aloc; 4046 4047 PetscFunctionBegin; 4048 if (!logkey_getlocalmatcondensed) { 4049 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 4050 } 4051 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4052 if (!row){ 4053 start = A->rmap.rstart; end = A->rmap.rend; 4054 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4055 } else { 4056 isrowa = *row; 4057 } 4058 if (!col){ 4059 start = A->cmap.rstart; 4060 cmap = a->garray; 4061 nzA = a->A->cmap.n; 4062 nzB = a->B->cmap.n; 4063 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4064 ncols = 0; 4065 for (i=0; i<nzB; i++) { 4066 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4067 else break; 4068 } 4069 imark = i; 4070 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4071 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4072 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4073 ierr = PetscFree(idx);CHKERRQ(ierr); 4074 } else { 4075 iscola = *col; 4076 } 4077 if (scall != MAT_INITIAL_MATRIX){ 4078 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4079 aloc[0] = *A_loc; 4080 } 4081 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4082 *A_loc = aloc[0]; 4083 ierr = PetscFree(aloc);CHKERRQ(ierr); 4084 if (!row){ 4085 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4086 } 4087 if (!col){ 4088 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4089 } 4090 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4091 PetscFunctionReturn(0); 4092 } 4093 4094 static PetscEvent logkey_GetBrowsOfAcols = 0; 4095 #undef __FUNCT__ 4096 #define __FUNCT__ "MatGetBrowsOfAcols" 4097 /*@C 4098 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4099 4100 Collective on Mat 4101 4102 Input Parameters: 4103 + A,B - the matrices in mpiaij format 4104 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4105 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4106 4107 Output Parameter: 4108 + rowb, colb - index sets of rows and columns of B to extract 4109 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4110 - B_seq - the sequential matrix generated 4111 4112 Level: developer 4113 4114 @*/ 4115 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4116 { 4117 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4118 PetscErrorCode ierr; 4119 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4120 IS isrowb,iscolb; 4121 Mat *bseq; 4122 4123 PetscFunctionBegin; 4124 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4125 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4126 } 4127 if (!logkey_GetBrowsOfAcols) { 4128 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 4129 } 4130 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4131 4132 if (scall == MAT_INITIAL_MATRIX){ 4133 start = A->cmap.rstart; 4134 cmap = a->garray; 4135 nzA = a->A->cmap.n; 4136 nzB = a->B->cmap.n; 4137 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4138 ncols = 0; 4139 for (i=0; i<nzB; i++) { /* row < local row index */ 4140 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4141 else break; 4142 } 4143 imark = i; 4144 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4145 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4146 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4147 ierr = PetscFree(idx);CHKERRQ(ierr); 4148 *brstart = imark; 4149 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4150 } else { 4151 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4152 isrowb = *rowb; iscolb = *colb; 4153 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4154 bseq[0] = *B_seq; 4155 } 4156 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4157 *B_seq = bseq[0]; 4158 ierr = PetscFree(bseq);CHKERRQ(ierr); 4159 if (!rowb){ 4160 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4161 } else { 4162 *rowb = isrowb; 4163 } 4164 if (!colb){ 4165 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4166 } else { 4167 *colb = iscolb; 4168 } 4169 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4170 PetscFunctionReturn(0); 4171 } 4172 4173 static PetscEvent logkey_GetBrowsOfAocols = 0; 4174 #undef __FUNCT__ 4175 #define __FUNCT__ "MatGetBrowsOfAoCols" 4176 /*@C 4177 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4178 of the OFF-DIAGONAL portion of local A 4179 4180 Collective on Mat 4181 4182 Input Parameters: 4183 + A,B - the matrices in mpiaij format 4184 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4185 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4186 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4187 4188 Output Parameter: 4189 + B_oth - the sequential matrix generated 4190 4191 Level: developer 4192 4193 @*/ 4194 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 4195 { 4196 VecScatter_MPI_General *gen_to,*gen_from; 4197 PetscErrorCode ierr; 4198 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4199 Mat_SeqAIJ *b_oth; 4200 VecScatter ctx=a->Mvctx; 4201 MPI_Comm comm=ctx->comm; 4202 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 4203 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4204 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 4205 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4206 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4207 MPI_Status *sstatus,rstatus; 4208 PetscInt *cols,sbs,rbs; 4209 PetscScalar *vals; 4210 4211 PetscFunctionBegin; 4212 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4213 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4214 } 4215 if (!logkey_GetBrowsOfAocols) { 4216 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 4217 } 4218 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4219 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4220 4221 gen_to = (VecScatter_MPI_General*)ctx->todata; 4222 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4223 rvalues = gen_from->values; /* holds the length of receiving row */ 4224 svalues = gen_to->values; /* holds the length of sending row */ 4225 nrecvs = gen_from->n; 4226 nsends = gen_to->n; 4227 4228 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4229 srow = gen_to->indices; /* local row index to be sent */ 4230 sstarts = gen_to->starts; 4231 sprocs = gen_to->procs; 4232 sstatus = gen_to->sstatus; 4233 sbs = gen_to->bs; 4234 rstarts = gen_from->starts; 4235 rprocs = gen_from->procs; 4236 rbs = gen_from->bs; 4237 4238 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4239 if (scall == MAT_INITIAL_MATRIX){ 4240 /* i-array */ 4241 /*---------*/ 4242 /* post receives */ 4243 for (i=0; i<nrecvs; i++){ 4244 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4245 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4246 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4247 } 4248 4249 /* pack the outgoing message */ 4250 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4251 rstartsj = sstartsj + nsends +1; 4252 sstartsj[0] = 0; rstartsj[0] = 0; 4253 len = 0; /* total length of j or a array to be sent */ 4254 k = 0; 4255 for (i=0; i<nsends; i++){ 4256 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4257 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4258 for (j=0; j<nrows; j++) { 4259 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4260 for (l=0; l<sbs; l++){ 4261 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4262 rowlen[j*sbs+l] = ncols; 4263 len += ncols; 4264 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4265 } 4266 k++; 4267 } 4268 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4269 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4270 } 4271 /* recvs and sends of i-array are completed */ 4272 i = nrecvs; 4273 while (i--) { 4274 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4275 } 4276 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4277 4278 /* allocate buffers for sending j and a arrays */ 4279 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4280 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4281 4282 /* create i-array of B_oth */ 4283 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4284 b_othi[0] = 0; 4285 len = 0; /* total length of j or a array to be received */ 4286 k = 0; 4287 for (i=0; i<nrecvs; i++){ 4288 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4289 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4290 for (j=0; j<nrows; j++) { 4291 b_othi[k+1] = b_othi[k] + rowlen[j]; 4292 len += rowlen[j]; k++; 4293 } 4294 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4295 } 4296 4297 /* allocate space for j and a arrrays of B_oth */ 4298 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4299 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 4300 4301 /* j-array */ 4302 /*---------*/ 4303 /* post receives of j-array */ 4304 for (i=0; i<nrecvs; i++){ 4305 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4306 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4307 } 4308 4309 /* pack the outgoing message j-array */ 4310 k = 0; 4311 for (i=0; i<nsends; i++){ 4312 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4313 bufJ = bufj+sstartsj[i]; 4314 for (j=0; j<nrows; j++) { 4315 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4316 for (ll=0; ll<sbs; ll++){ 4317 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4318 for (l=0; l<ncols; l++){ 4319 *bufJ++ = cols[l]; 4320 } 4321 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4322 } 4323 } 4324 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4325 } 4326 4327 /* recvs and sends of j-array are completed */ 4328 i = nrecvs; 4329 while (i--) { 4330 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4331 } 4332 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4333 } else if (scall == MAT_REUSE_MATRIX){ 4334 sstartsj = *startsj; 4335 rstartsj = sstartsj + nsends +1; 4336 bufa = *bufa_ptr; 4337 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4338 b_otha = b_oth->a; 4339 } else { 4340 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4341 } 4342 4343 /* a-array */ 4344 /*---------*/ 4345 /* post receives of a-array */ 4346 for (i=0; i<nrecvs; i++){ 4347 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4348 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4349 } 4350 4351 /* pack the outgoing message a-array */ 4352 k = 0; 4353 for (i=0; i<nsends; i++){ 4354 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4355 bufA = bufa+sstartsj[i]; 4356 for (j=0; j<nrows; j++) { 4357 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4358 for (ll=0; ll<sbs; ll++){ 4359 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4360 for (l=0; l<ncols; l++){ 4361 *bufA++ = vals[l]; 4362 } 4363 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4364 } 4365 } 4366 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4367 } 4368 /* recvs and sends of a-array are completed */ 4369 i = nrecvs; 4370 while (i--) { 4371 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4372 } 4373 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4374 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4375 4376 if (scall == MAT_INITIAL_MATRIX){ 4377 /* put together the new matrix */ 4378 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4379 4380 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4381 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4382 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4383 b_oth->free_a = PETSC_TRUE; 4384 b_oth->free_ij = PETSC_TRUE; 4385 b_oth->nonew = 0; 4386 4387 ierr = PetscFree(bufj);CHKERRQ(ierr); 4388 if (!startsj || !bufa_ptr){ 4389 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4390 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4391 } else { 4392 *startsj = sstartsj; 4393 *bufa_ptr = bufa; 4394 } 4395 } 4396 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4397 PetscFunctionReturn(0); 4398 } 4399 4400 #undef __FUNCT__ 4401 #define __FUNCT__ "MatGetCommunicationStructs" 4402 /*@C 4403 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4404 4405 Not Collective 4406 4407 Input Parameters: 4408 . A - The matrix in mpiaij format 4409 4410 Output Parameter: 4411 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4412 . colmap - A map from global column index to local index into lvec 4413 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4414 4415 Level: developer 4416 4417 @*/ 4418 #if defined (PETSC_USE_CTABLE) 4419 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4420 #else 4421 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4422 #endif 4423 { 4424 Mat_MPIAIJ *a; 4425 4426 PetscFunctionBegin; 4427 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4428 PetscValidPointer(lvec, 2) 4429 PetscValidPointer(colmap, 3) 4430 PetscValidPointer(multScatter, 4) 4431 a = (Mat_MPIAIJ *) A->data; 4432 if (lvec) *lvec = a->lvec; 4433 if (colmap) *colmap = a->colmap; 4434 if (multScatter) *multScatter = a->Mvctx; 4435 PetscFunctionReturn(0); 4436 } 4437 4438 EXTERN_C_BEGIN 4439 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4440 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4441 EXTERN_C_END 4442 4443 /*MC 4444 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4445 4446 Options Database Keys: 4447 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4448 4449 Level: beginner 4450 4451 .seealso: MatCreateMPIAIJ 4452 M*/ 4453 4454 EXTERN_C_BEGIN 4455 #undef __FUNCT__ 4456 #define __FUNCT__ "MatCreate_MPIAIJ" 4457 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4458 { 4459 Mat_MPIAIJ *b; 4460 PetscErrorCode ierr; 4461 PetscMPIInt size; 4462 4463 PetscFunctionBegin; 4464 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4465 4466 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 4467 B->data = (void*)b; 4468 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4469 B->factor = 0; 4470 B->rmap.bs = 1; 4471 B->assembled = PETSC_FALSE; 4472 B->mapping = 0; 4473 4474 B->insertmode = NOT_SET_VALUES; 4475 b->size = size; 4476 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4477 4478 /* build cache for off array entries formed */ 4479 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4480 b->donotstash = PETSC_FALSE; 4481 b->colmap = 0; 4482 b->garray = 0; 4483 b->roworiented = PETSC_TRUE; 4484 4485 /* stuff used for matrix vector multiply */ 4486 b->lvec = PETSC_NULL; 4487 b->Mvctx = PETSC_NULL; 4488 4489 /* stuff for MatGetRow() */ 4490 b->rowindices = 0; 4491 b->rowvalues = 0; 4492 b->getrowactive = PETSC_FALSE; 4493 4494 4495 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4496 "MatStoreValues_MPIAIJ", 4497 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4498 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4499 "MatRetrieveValues_MPIAIJ", 4500 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4501 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4502 "MatGetDiagonalBlock_MPIAIJ", 4503 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4504 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4505 "MatIsTranspose_MPIAIJ", 4506 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4507 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4508 "MatMPIAIJSetPreallocation_MPIAIJ", 4509 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4510 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4511 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4512 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4513 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4514 "MatDiagonalScaleLocal_MPIAIJ", 4515 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4516 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4517 "MatConvert_MPIAIJ_MPICSRPERM", 4518 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4519 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4520 "MatConvert_MPIAIJ_MPICRL", 4521 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4522 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4523 PetscFunctionReturn(0); 4524 } 4525 EXTERN_C_END 4526 4527 #undef __FUNCT__ 4528 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4529 /*@C 4530 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4531 and "off-diagonal" part of the matrix in CSR format. 4532 4533 Collective on MPI_Comm 4534 4535 Input Parameters: 4536 + comm - MPI communicator 4537 . m - number of local rows (Cannot be PETSC_DECIDE) 4538 . n - This value should be the same as the local size used in creating the 4539 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4540 calculated if N is given) For square matrices n is almost always m. 4541 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4542 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4543 . i - row indices for "diagonal" portion of matrix 4544 . j - column indices 4545 . a - matrix values 4546 . oi - row indices for "off-diagonal" portion of matrix 4547 . oj - column indices 4548 - oa - matrix values 4549 4550 Output Parameter: 4551 . mat - the matrix 4552 4553 Level: advanced 4554 4555 Notes: 4556 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4557 4558 The i and j indices are 0 based 4559 4560 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4561 4562 4563 .keywords: matrix, aij, compressed row, sparse, parallel 4564 4565 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4566 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4567 @*/ 4568 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4569 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4570 { 4571 PetscErrorCode ierr; 4572 Mat_MPIAIJ *maij; 4573 4574 PetscFunctionBegin; 4575 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4576 if (i[0]) { 4577 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4578 } 4579 if (oi[0]) { 4580 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4581 } 4582 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4583 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4584 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4585 maij = (Mat_MPIAIJ*) (*mat)->data; 4586 maij->donotstash = PETSC_TRUE; 4587 (*mat)->preallocated = PETSC_TRUE; 4588 4589 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4590 ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr); 4591 ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr); 4592 4593 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4594 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4595 4596 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4597 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4598 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4599 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4600 4601 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4602 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4603 PetscFunctionReturn(0); 4604 } 4605 4606 /* 4607 Special version for direct calls from Fortran 4608 */ 4609 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4610 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4611 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4612 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4613 #endif 4614 4615 /* Change these macros so can be used in void function */ 4616 #undef CHKERRQ 4617 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4618 #undef SETERRQ2 4619 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4620 #undef SETERRQ 4621 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4622 4623 EXTERN_C_BEGIN 4624 #undef __FUNCT__ 4625 #define __FUNCT__ "matsetvaluesmpiaij_" 4626 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4627 { 4628 Mat mat = *mmat; 4629 PetscInt m = *mm, n = *mn; 4630 InsertMode addv = *maddv; 4631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4632 PetscScalar value; 4633 PetscErrorCode ierr; 4634 4635 MatPreallocated(mat); 4636 if (mat->insertmode == NOT_SET_VALUES) { 4637 mat->insertmode = addv; 4638 } 4639 #if defined(PETSC_USE_DEBUG) 4640 else if (mat->insertmode != addv) { 4641 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4642 } 4643 #endif 4644 { 4645 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4646 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4647 PetscTruth roworiented = aij->roworiented; 4648 4649 /* Some Variables required in the macro */ 4650 Mat A = aij->A; 4651 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4652 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4653 PetscScalar *aa = a->a; 4654 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4655 Mat B = aij->B; 4656 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4657 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4658 PetscScalar *ba = b->a; 4659 4660 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4661 PetscInt nonew = a->nonew; 4662 PetscScalar *ap1,*ap2; 4663 4664 PetscFunctionBegin; 4665 for (i=0; i<m; i++) { 4666 if (im[i] < 0) continue; 4667 #if defined(PETSC_USE_DEBUG) 4668 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4669 #endif 4670 if (im[i] >= rstart && im[i] < rend) { 4671 row = im[i] - rstart; 4672 lastcol1 = -1; 4673 rp1 = aj + ai[row]; 4674 ap1 = aa + ai[row]; 4675 rmax1 = aimax[row]; 4676 nrow1 = ailen[row]; 4677 low1 = 0; 4678 high1 = nrow1; 4679 lastcol2 = -1; 4680 rp2 = bj + bi[row]; 4681 ap2 = ba + bi[row]; 4682 rmax2 = bimax[row]; 4683 nrow2 = bilen[row]; 4684 low2 = 0; 4685 high2 = nrow2; 4686 4687 for (j=0; j<n; j++) { 4688 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4689 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4690 if (in[j] >= cstart && in[j] < cend){ 4691 col = in[j] - cstart; 4692 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4693 } else if (in[j] < 0) continue; 4694 #if defined(PETSC_USE_DEBUG) 4695 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4696 #endif 4697 else { 4698 if (mat->was_assembled) { 4699 if (!aij->colmap) { 4700 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4701 } 4702 #if defined (PETSC_USE_CTABLE) 4703 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4704 col--; 4705 #else 4706 col = aij->colmap[in[j]] - 1; 4707 #endif 4708 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4709 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4710 col = in[j]; 4711 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4712 B = aij->B; 4713 b = (Mat_SeqAIJ*)B->data; 4714 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4715 rp2 = bj + bi[row]; 4716 ap2 = ba + bi[row]; 4717 rmax2 = bimax[row]; 4718 nrow2 = bilen[row]; 4719 low2 = 0; 4720 high2 = nrow2; 4721 bm = aij->B->rmap.n; 4722 ba = b->a; 4723 } 4724 } else col = in[j]; 4725 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4726 } 4727 } 4728 } else { 4729 if (!aij->donotstash) { 4730 if (roworiented) { 4731 if (ignorezeroentries && v[i*n] == 0.0) continue; 4732 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4733 } else { 4734 if (ignorezeroentries && v[i] == 0.0) continue; 4735 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4736 } 4737 } 4738 } 4739 }} 4740 PetscFunctionReturnVoid(); 4741 } 4742 EXTERN_C_END 4743 4744