1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 246 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 324 PetscFunctionBegin; 325 if (!aij->donotstash) { 326 while (1) { 327 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 328 if (!flg) break; 329 330 for (i=0; i<n;) { 331 /* Now identify the consecutive vals belonging to the same row */ 332 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 333 if (j < n) ncols = j-i; 334 else ncols = n-i; 335 /* Now assemble all these values with a single function call */ 336 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 337 i = j; 338 } 339 } 340 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 341 } 342 a->compressedrow.use = PETSC_FALSE; 343 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 345 346 /* determine if any processor has disassembled, if so we must 347 also disassemble ourselfs, in order that we may reassemble. */ 348 /* 349 if nonzero structure of submatrix B cannot change then we know that 350 no processor disassembled thus we can skip this stuff 351 */ 352 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 353 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 354 if (mat->was_assembled && !other_disassembled) { 355 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 356 } 357 } 358 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 359 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 360 } 361 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 362 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 363 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 364 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 365 366 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 367 aij->rowvalues = 0; 368 369 /* used by MatAXPY() */ 370 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 371 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 372 373 PetscFunctionReturn(0); 374 } 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 379 { 380 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 381 PetscErrorCode ierr; 382 383 PetscFunctionBegin; 384 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 385 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatZeroRows_MPIAIJ" 391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 392 { 393 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 394 PetscErrorCode ierr; 395 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 396 PetscInt i,*owners = A->rmap.range; 397 PetscInt *nprocs,j,idx,nsends,row; 398 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 399 PetscInt *rvalues,count,base,slen,*source; 400 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 401 MPI_Comm comm = A->comm; 402 MPI_Request *send_waits,*recv_waits; 403 MPI_Status recv_status,*send_status; 404 #if defined(PETSC_DEBUG) 405 PetscTruth found = PETSC_FALSE; 406 #endif 407 408 PetscFunctionBegin; 409 /* first count number of contributors to each processor */ 410 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 411 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 412 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 413 j = 0; 414 for (i=0; i<N; i++) { 415 if (lastidx > (idx = rows[i])) j = 0; 416 lastidx = idx; 417 for (; j<size; j++) { 418 if (idx >= owners[j] && idx < owners[j+1]) { 419 nprocs[2*j]++; 420 nprocs[2*j+1] = 1; 421 owner[i] = j; 422 #if defined(PETSC_DEBUG) 423 found = PETSC_TRUE; 424 #endif 425 break; 426 } 427 } 428 #if defined(PETSC_DEBUG) 429 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 430 found = PETSC_FALSE; 431 #endif 432 } 433 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 434 435 /* inform other processors of number of messages and max length*/ 436 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 437 438 /* post receives: */ 439 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 440 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 441 for (i=0; i<nrecvs; i++) { 442 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 443 } 444 445 /* do sends: 446 1) starts[i] gives the starting index in svalues for stuff going to 447 the ith processor 448 */ 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 450 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 451 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 452 starts[0] = 0; 453 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 454 for (i=0; i<N; i++) { 455 svalues[starts[owner[i]]++] = rows[i]; 456 } 457 458 starts[0] = 0; 459 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 460 count = 0; 461 for (i=0; i<size; i++) { 462 if (nprocs[2*i+1]) { 463 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscFree(starts);CHKERRQ(ierr); 467 468 base = owners[rank]; 469 470 /* wait on receives */ 471 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 472 source = lens + nrecvs; 473 count = nrecvs; slen = 0; 474 while (count) { 475 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 476 /* unpack receives into our local space */ 477 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 478 source[imdex] = recv_status.MPI_SOURCE; 479 lens[imdex] = n; 480 slen += n; 481 count--; 482 } 483 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 484 485 /* move the data into the send scatter */ 486 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 487 count = 0; 488 for (i=0; i<nrecvs; i++) { 489 values = rvalues + i*nmax; 490 for (j=0; j<lens[i]; j++) { 491 lrows[count++] = values[j] - base; 492 } 493 } 494 ierr = PetscFree(rvalues);CHKERRQ(ierr); 495 ierr = PetscFree(lens);CHKERRQ(ierr); 496 ierr = PetscFree(owner);CHKERRQ(ierr); 497 ierr = PetscFree(nprocs);CHKERRQ(ierr); 498 499 /* actually zap the local rows */ 500 /* 501 Zero the required rows. If the "diagonal block" of the matrix 502 is square and the user wishes to set the diagonal we use separate 503 code so that MatSetValues() is not called for each diagonal allocating 504 new memory, thus calling lots of mallocs and slowing things down. 505 506 Contributed by: Matthew Knepley 507 */ 508 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 509 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 510 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 511 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 512 } else if (diag != 0.0) { 513 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 514 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 515 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 517 } 518 for (i = 0; i < slen; i++) { 519 row = lrows[i] + rstart; 520 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 521 } 522 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 524 } else { 525 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 526 } 527 ierr = PetscFree(lrows);CHKERRQ(ierr); 528 529 /* wait on sends */ 530 if (nsends) { 531 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 532 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 533 ierr = PetscFree(send_status);CHKERRQ(ierr); 534 } 535 ierr = PetscFree(send_waits);CHKERRQ(ierr); 536 ierr = PetscFree(svalues);CHKERRQ(ierr); 537 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatMult_MPIAIJ" 543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 544 { 545 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 546 PetscErrorCode ierr; 547 PetscInt nt; 548 549 PetscFunctionBegin; 550 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 551 if (nt != A->cmap.n) { 552 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 553 } 554 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 555 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 556 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 557 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatMultAdd_MPIAIJ" 563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 564 { 565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 570 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 571 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 572 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 579 { 580 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 581 PetscErrorCode ierr; 582 PetscTruth merged; 583 584 PetscFunctionBegin; 585 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 586 /* do nondiagonal part */ 587 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 588 if (!merged) { 589 /* send it on its way */ 590 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 591 /* do local part */ 592 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 593 /* receive remote parts: note this assumes the values are not actually */ 594 /* added in yy until the next line, */ 595 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 596 } else { 597 /* do local part */ 598 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 599 /* send it on its way */ 600 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 601 /* values actually were received in the Begin() but we need to call this nop */ 602 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 EXTERN_C_BEGIN 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 611 { 612 MPI_Comm comm; 613 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 614 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 615 IS Me,Notme; 616 PetscErrorCode ierr; 617 PetscInt M,N,first,last,*notme,i; 618 PetscMPIInt size; 619 620 PetscFunctionBegin; 621 622 /* Easy test: symmetric diagonal block */ 623 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 624 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 625 if (!*f) PetscFunctionReturn(0); 626 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 628 if (size == 1) PetscFunctionReturn(0); 629 630 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 631 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 632 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 633 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 634 for (i=0; i<first; i++) notme[i] = i; 635 for (i=last; i<M; i++) notme[i-last+first] = i; 636 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 637 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 638 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 639 Aoff = Aoffs[0]; 640 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 641 Boff = Boffs[0]; 642 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 644 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 645 ierr = ISDestroy(Me);CHKERRQ(ierr); 646 ierr = ISDestroy(Notme);CHKERRQ(ierr); 647 648 PetscFunctionReturn(0); 649 } 650 EXTERN_C_END 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 655 { 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 PetscErrorCode ierr; 658 659 PetscFunctionBegin; 660 /* do nondiagonal part */ 661 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 662 /* send it on its way */ 663 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 664 /* do local part */ 665 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 666 /* receive remote parts */ 667 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This only works correctly for square matrices where the subblock A->A is the 673 diagonal block 674 */ 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 678 { 679 PetscErrorCode ierr; 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 682 PetscFunctionBegin; 683 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 684 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 685 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 686 } 687 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatScale_MPIAIJ" 693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 696 PetscErrorCode ierr; 697 698 PetscFunctionBegin; 699 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 700 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatDestroy_MPIAIJ" 706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 709 PetscErrorCode ierr; 710 711 PetscFunctionBegin; 712 #if defined(PETSC_USE_LOG) 713 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 714 #endif 715 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 717 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 718 #if defined (PETSC_USE_CTABLE) 719 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 720 #else 721 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 722 #endif 723 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 724 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 725 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 726 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIAIJ_Binary" 742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 745 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 746 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 747 PetscErrorCode ierr; 748 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 749 int fd; 750 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 751 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (!rank) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->rmap.N; 761 header[2] = mat->cmap.N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->rmap.n; 767 range = mat->rmap.range; 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->rmap.n; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->rmap.n; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (!rank) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->rmap.n; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (!rank) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->rmap.n; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (!rank) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 PetscErrorCode ierr; 872 PetscMPIInt rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,iascii,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (iascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 PetscTruth inodes; 886 887 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 888 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 889 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 890 if (!inodes) { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 892 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 893 } else { 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 895 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 896 } 897 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 899 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 901 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 902 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } else if (format == PETSC_VIEWER_ASCII_INFO) { 905 PetscInt inodecount,inodelimit,*inodes; 906 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 907 if (inodes) { 908 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 909 } else { 910 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 911 } 912 PetscFunctionReturn(0); 913 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 914 PetscFunctionReturn(0); 915 } 916 } else if (isbinary) { 917 if (size == 1) { 918 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 919 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 920 } else { 921 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 922 } 923 PetscFunctionReturn(0); 924 } else if (isdraw) { 925 PetscDraw draw; 926 PetscTruth isnull; 927 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 928 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 929 } 930 931 if (size == 1) { 932 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 933 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 934 } else { 935 /* assemble the entire matrix onto first processor. */ 936 Mat A; 937 Mat_SeqAIJ *Aloc; 938 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 939 PetscScalar *a; 940 941 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 942 if (!rank) { 943 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 944 } else { 945 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 946 } 947 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 948 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 949 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 950 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 951 952 /* copy over the A part */ 953 Aloc = (Mat_SeqAIJ*)aij->A->data; 954 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 955 row = mat->rmap.rstart; 956 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 957 for (i=0; i<m; i++) { 958 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 959 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 960 } 961 aj = Aloc->j; 962 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 963 964 /* copy over the B part */ 965 Aloc = (Mat_SeqAIJ*)aij->B->data; 966 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 967 row = mat->rmap.rstart; 968 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 969 ct = cols; 970 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 971 for (i=0; i<m; i++) { 972 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 973 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 974 } 975 ierr = PetscFree(ct);CHKERRQ(ierr); 976 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 /* 979 Everyone has to call to draw the matrix since the graphics waits are 980 synchronized across all processors that share the PetscDraw object 981 */ 982 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 983 if (!rank) { 984 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 985 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 986 } 987 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 988 ierr = MatDestroy(A);CHKERRQ(ierr); 989 } 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatView_MPIAIJ" 995 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 996 { 997 PetscErrorCode ierr; 998 PetscTruth iascii,isdraw,issocket,isbinary; 999 1000 PetscFunctionBegin; 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1005 if (iascii || isdraw || isbinary || issocket) { 1006 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1007 } else { 1008 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1009 } 1010 PetscFunctionReturn(0); 1011 } 1012 1013 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatRelax_MPIAIJ" 1017 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1018 { 1019 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1020 PetscErrorCode ierr; 1021 Vec bb1; 1022 1023 PetscFunctionBegin; 1024 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1025 1026 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1027 1028 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1029 if (flag & SOR_ZERO_INITIAL_GUESS) { 1030 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1031 its--; 1032 } 1033 1034 while (its--) { 1035 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1037 1038 /* update rhs: bb1 = bb - B*x */ 1039 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1040 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1041 1042 /* local sweep */ 1043 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1044 CHKERRQ(ierr); 1045 } 1046 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1047 if (flag & SOR_ZERO_INITIAL_GUESS) { 1048 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1049 its--; 1050 } 1051 while (its--) { 1052 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1054 1055 /* update rhs: bb1 = bb - B*x */ 1056 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1057 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1058 1059 /* local sweep */ 1060 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1061 CHKERRQ(ierr); 1062 } 1063 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1064 if (flag & SOR_ZERO_INITIAL_GUESS) { 1065 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1066 its--; 1067 } 1068 while (its--) { 1069 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1071 1072 /* update rhs: bb1 = bb - B*x */ 1073 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1074 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1075 1076 /* local sweep */ 1077 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1078 CHKERRQ(ierr); 1079 } 1080 } else { 1081 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1082 } 1083 1084 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatPermute_MPIAIJ" 1090 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1091 { 1092 MPI_Comm comm,pcomm; 1093 PetscInt first,local_size,nrows,*rows; 1094 int ntids; 1095 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1100 /* make a collective version of 'rowp' */ 1101 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1102 if (pcomm==comm) { 1103 crowp = rowp; 1104 } else { 1105 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1106 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1107 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1108 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1109 } 1110 /* collect the global row permutation and invert it */ 1111 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1112 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1113 if (pcomm!=comm) { 1114 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1115 } 1116 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1117 /* get the local target indices */ 1118 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1119 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1120 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1121 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1122 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1123 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1124 /* the column permutation is so much easier; 1125 make a local version of 'colp' and invert it */ 1126 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1127 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1128 if (ntids==1) { 1129 lcolp = colp; 1130 } else { 1131 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1132 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1133 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1134 } 1135 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1136 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1137 if (ntids>1) { 1138 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1139 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1140 } 1141 /* now we just get the submatrix */ 1142 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1143 /* clean up */ 1144 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1145 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1151 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1152 { 1153 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1154 Mat A = mat->A,B = mat->B; 1155 PetscErrorCode ierr; 1156 PetscReal isend[5],irecv[5]; 1157 1158 PetscFunctionBegin; 1159 info->block_size = 1.0; 1160 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1161 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1162 isend[3] = info->memory; isend[4] = info->mallocs; 1163 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1164 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1165 isend[3] += info->memory; isend[4] += info->mallocs; 1166 if (flag == MAT_LOCAL) { 1167 info->nz_used = isend[0]; 1168 info->nz_allocated = isend[1]; 1169 info->nz_unneeded = isend[2]; 1170 info->memory = isend[3]; 1171 info->mallocs = isend[4]; 1172 } else if (flag == MAT_GLOBAL_MAX) { 1173 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1174 info->nz_used = irecv[0]; 1175 info->nz_allocated = irecv[1]; 1176 info->nz_unneeded = irecv[2]; 1177 info->memory = irecv[3]; 1178 info->mallocs = irecv[4]; 1179 } else if (flag == MAT_GLOBAL_SUM) { 1180 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1181 info->nz_used = irecv[0]; 1182 info->nz_allocated = irecv[1]; 1183 info->nz_unneeded = irecv[2]; 1184 info->memory = irecv[3]; 1185 info->mallocs = irecv[4]; 1186 } 1187 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1188 info->fill_ratio_needed = 0; 1189 info->factor_mallocs = 0; 1190 info->rows_global = (double)matin->rmap.N; 1191 info->columns_global = (double)matin->cmap.N; 1192 info->rows_local = (double)matin->rmap.n; 1193 info->columns_local = (double)matin->cmap.N; 1194 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatSetOption_MPIAIJ" 1200 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1201 { 1202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 switch (op) { 1207 case MAT_NO_NEW_NONZERO_LOCATIONS: 1208 case MAT_YES_NEW_NONZERO_LOCATIONS: 1209 case MAT_COLUMNS_UNSORTED: 1210 case MAT_COLUMNS_SORTED: 1211 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1212 case MAT_KEEP_ZEROED_ROWS: 1213 case MAT_NEW_NONZERO_LOCATION_ERR: 1214 case MAT_USE_INODES: 1215 case MAT_DO_NOT_USE_INODES: 1216 case MAT_IGNORE_ZERO_ENTRIES: 1217 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1218 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1219 break; 1220 case MAT_ROW_ORIENTED: 1221 a->roworiented = PETSC_TRUE; 1222 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1223 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1224 break; 1225 case MAT_ROWS_SORTED: 1226 case MAT_ROWS_UNSORTED: 1227 case MAT_YES_NEW_DIAGONALS: 1228 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1229 break; 1230 case MAT_COLUMN_ORIENTED: 1231 a->roworiented = PETSC_FALSE; 1232 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1233 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1234 break; 1235 case MAT_IGNORE_OFF_PROC_ENTRIES: 1236 a->donotstash = PETSC_TRUE; 1237 break; 1238 case MAT_NO_NEW_DIAGONALS: 1239 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1240 case MAT_SYMMETRIC: 1241 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1242 break; 1243 case MAT_STRUCTURALLY_SYMMETRIC: 1244 case MAT_HERMITIAN: 1245 case MAT_SYMMETRY_ETERNAL: 1246 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1247 break; 1248 case MAT_NOT_SYMMETRIC: 1249 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1250 case MAT_NOT_HERMITIAN: 1251 case MAT_NOT_SYMMETRY_ETERNAL: 1252 break; 1253 default: 1254 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatGetRow_MPIAIJ" 1261 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1262 { 1263 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1264 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1265 PetscErrorCode ierr; 1266 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1267 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1268 PetscInt *cmap,*idx_p; 1269 1270 PetscFunctionBegin; 1271 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1272 mat->getrowactive = PETSC_TRUE; 1273 1274 if (!mat->rowvalues && (idx || v)) { 1275 /* 1276 allocate enough space to hold information from the longest row. 1277 */ 1278 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1279 PetscInt max = 1,tmp; 1280 for (i=0; i<matin->rmap.n; i++) { 1281 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1282 if (max < tmp) { max = tmp; } 1283 } 1284 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1285 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1286 } 1287 1288 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1289 lrow = row - rstart; 1290 1291 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1292 if (!v) {pvA = 0; pvB = 0;} 1293 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1294 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1295 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1296 nztot = nzA + nzB; 1297 1298 cmap = mat->garray; 1299 if (v || idx) { 1300 if (nztot) { 1301 /* Sort by increasing column numbers, assuming A and B already sorted */ 1302 PetscInt imark = -1; 1303 if (v) { 1304 *v = v_p = mat->rowvalues; 1305 for (i=0; i<nzB; i++) { 1306 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1307 else break; 1308 } 1309 imark = i; 1310 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1311 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1312 } 1313 if (idx) { 1314 *idx = idx_p = mat->rowindices; 1315 if (imark > -1) { 1316 for (i=0; i<imark; i++) { 1317 idx_p[i] = cmap[cworkB[i]]; 1318 } 1319 } else { 1320 for (i=0; i<nzB; i++) { 1321 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1322 else break; 1323 } 1324 imark = i; 1325 } 1326 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1327 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1328 } 1329 } else { 1330 if (idx) *idx = 0; 1331 if (v) *v = 0; 1332 } 1333 } 1334 *nz = nztot; 1335 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1336 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1342 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1343 { 1344 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1345 1346 PetscFunctionBegin; 1347 if (!aij->getrowactive) { 1348 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1349 } 1350 aij->getrowactive = PETSC_FALSE; 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatNorm_MPIAIJ" 1356 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1357 { 1358 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1359 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1360 PetscErrorCode ierr; 1361 PetscInt i,j,cstart = mat->cmap.rstart; 1362 PetscReal sum = 0.0; 1363 PetscScalar *v; 1364 1365 PetscFunctionBegin; 1366 if (aij->size == 1) { 1367 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1368 } else { 1369 if (type == NORM_FROBENIUS) { 1370 v = amat->a; 1371 for (i=0; i<amat->nz; i++) { 1372 #if defined(PETSC_USE_COMPLEX) 1373 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1374 #else 1375 sum += (*v)*(*v); v++; 1376 #endif 1377 } 1378 v = bmat->a; 1379 for (i=0; i<bmat->nz; i++) { 1380 #if defined(PETSC_USE_COMPLEX) 1381 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1382 #else 1383 sum += (*v)*(*v); v++; 1384 #endif 1385 } 1386 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1387 *norm = sqrt(*norm); 1388 } else if (type == NORM_1) { /* max column norm */ 1389 PetscReal *tmp,*tmp2; 1390 PetscInt *jj,*garray = aij->garray; 1391 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1392 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1393 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1394 *norm = 0.0; 1395 v = amat->a; jj = amat->j; 1396 for (j=0; j<amat->nz; j++) { 1397 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1398 } 1399 v = bmat->a; jj = bmat->j; 1400 for (j=0; j<bmat->nz; j++) { 1401 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1402 } 1403 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1404 for (j=0; j<mat->cmap.N; j++) { 1405 if (tmp2[j] > *norm) *norm = tmp2[j]; 1406 } 1407 ierr = PetscFree(tmp);CHKERRQ(ierr); 1408 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1409 } else if (type == NORM_INFINITY) { /* max row norm */ 1410 PetscReal ntemp = 0.0; 1411 for (j=0; j<aij->A->rmap.n; j++) { 1412 v = amat->a + amat->i[j]; 1413 sum = 0.0; 1414 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1415 sum += PetscAbsScalar(*v); v++; 1416 } 1417 v = bmat->a + bmat->i[j]; 1418 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1419 sum += PetscAbsScalar(*v); v++; 1420 } 1421 if (sum > ntemp) ntemp = sum; 1422 } 1423 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1424 } else { 1425 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1426 } 1427 } 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatTranspose_MPIAIJ" 1433 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1434 { 1435 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1436 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1437 PetscErrorCode ierr; 1438 PetscInt M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1439 Mat B; 1440 PetscScalar *array; 1441 1442 PetscFunctionBegin; 1443 if (!matout && M != N) { 1444 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1445 } 1446 1447 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1448 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1449 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1450 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1451 1452 /* copy over the A part */ 1453 Aloc = (Mat_SeqAIJ*)a->A->data; 1454 m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1455 row = A->rmap.rstart; 1456 for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;} 1457 for (i=0; i<m; i++) { 1458 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1459 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1460 } 1461 aj = Aloc->j; 1462 for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;} 1463 1464 /* copy over the B part */ 1465 Aloc = (Mat_SeqAIJ*)a->B->data; 1466 m = a->B->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1467 row = A->rmap.rstart; 1468 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1469 ct = cols; 1470 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1471 for (i=0; i<m; i++) { 1472 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1473 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1474 } 1475 ierr = PetscFree(ct);CHKERRQ(ierr); 1476 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1477 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1478 if (matout) { 1479 *matout = B; 1480 } else { 1481 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1482 } 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1488 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1489 { 1490 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1491 Mat a = aij->A,b = aij->B; 1492 PetscErrorCode ierr; 1493 PetscInt s1,s2,s3; 1494 1495 PetscFunctionBegin; 1496 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1497 if (rr) { 1498 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1499 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1500 /* Overlap communication with computation. */ 1501 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1502 } 1503 if (ll) { 1504 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1505 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1506 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1507 } 1508 /* scale the diagonal block */ 1509 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1510 1511 if (rr) { 1512 /* Do a scatter end and then right scale the off-diagonal block */ 1513 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1514 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1515 } 1516 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1522 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1523 { 1524 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1525 PetscErrorCode ierr; 1526 1527 PetscFunctionBegin; 1528 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1529 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1534 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1535 { 1536 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1537 PetscErrorCode ierr; 1538 1539 PetscFunctionBegin; 1540 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatEqual_MPIAIJ" 1546 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1547 { 1548 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1549 Mat a,b,c,d; 1550 PetscTruth flg; 1551 PetscErrorCode ierr; 1552 1553 PetscFunctionBegin; 1554 a = matA->A; b = matA->B; 1555 c = matB->A; d = matB->B; 1556 1557 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1558 if (flg) { 1559 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1560 } 1561 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1562 PetscFunctionReturn(0); 1563 } 1564 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatCopy_MPIAIJ" 1567 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1568 { 1569 PetscErrorCode ierr; 1570 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1571 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1572 1573 PetscFunctionBegin; 1574 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1575 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1576 /* because of the column compression in the off-processor part of the matrix a->B, 1577 the number of columns in a->B and b->B may be different, hence we cannot call 1578 the MatCopy() directly on the two parts. If need be, we can provide a more 1579 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1580 then copying the submatrices */ 1581 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1582 } else { 1583 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1584 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1585 } 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1591 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1592 { 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #include "petscblaslapack.h" 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatAXPY_MPIAIJ" 1603 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1604 { 1605 PetscErrorCode ierr; 1606 PetscInt i; 1607 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1608 PetscBLASInt bnz,one=1; 1609 Mat_SeqAIJ *x,*y; 1610 1611 PetscFunctionBegin; 1612 if (str == SAME_NONZERO_PATTERN) { 1613 PetscScalar alpha = a; 1614 x = (Mat_SeqAIJ *)xx->A->data; 1615 y = (Mat_SeqAIJ *)yy->A->data; 1616 bnz = (PetscBLASInt)x->nz; 1617 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1618 x = (Mat_SeqAIJ *)xx->B->data; 1619 y = (Mat_SeqAIJ *)yy->B->data; 1620 bnz = (PetscBLASInt)x->nz; 1621 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1622 } else if (str == SUBSET_NONZERO_PATTERN) { 1623 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1624 1625 x = (Mat_SeqAIJ *)xx->B->data; 1626 y = (Mat_SeqAIJ *)yy->B->data; 1627 if (y->xtoy && y->XtoY != xx->B) { 1628 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1629 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1630 } 1631 if (!y->xtoy) { /* get xtoy */ 1632 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1633 y->XtoY = xx->B; 1634 } 1635 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1636 } else { 1637 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1638 } 1639 PetscFunctionReturn(0); 1640 } 1641 1642 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1643 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "MatConjugate_MPIAIJ" 1646 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1647 { 1648 #if defined(PETSC_USE_COMPLEX) 1649 PetscErrorCode ierr; 1650 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1651 1652 PetscFunctionBegin; 1653 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1654 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1655 #else 1656 PetscFunctionBegin; 1657 #endif 1658 PetscFunctionReturn(0); 1659 } 1660 1661 #undef __FUNCT__ 1662 #define __FUNCT__ "MatRealPart_MPIAIJ" 1663 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1664 { 1665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1666 PetscErrorCode ierr; 1667 1668 PetscFunctionBegin; 1669 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1670 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1676 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1677 { 1678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1679 PetscErrorCode ierr; 1680 1681 PetscFunctionBegin; 1682 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1683 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #ifdef PETSC_HAVE_PBGL 1688 1689 #include <boost/parallel/mpi/bsp_process_group.hpp> 1690 #include <boost/graph/distributed/ilu_default_graph.hpp> 1691 #include <boost/graph/distributed/ilu_0_block.hpp> 1692 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1693 #include <boost/graph/distributed/petsc/interface.hpp> 1694 #include <boost/multi_array.hpp> 1695 #include <boost/parallel/distributed_property_map.hpp> 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1699 /* 1700 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1701 */ 1702 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1703 { 1704 namespace petsc = boost::distributed::petsc; 1705 1706 namespace graph_dist = boost::graph::distributed; 1707 using boost::graph::distributed::ilu_default::process_group_type; 1708 using boost::graph::ilu_permuted; 1709 1710 PetscTruth row_identity, col_identity; 1711 PetscObjectContainer c; 1712 PetscInt m, n, M, N; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1717 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1718 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1719 if (!row_identity || !col_identity) { 1720 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1721 } 1722 1723 process_group_type pg; 1724 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1725 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1726 lgraph_type& level_graph = *lgraph_p; 1727 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1728 1729 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1730 ilu_permuted(level_graph); 1731 1732 /* put together the new matrix */ 1733 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1734 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1735 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1736 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1737 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1738 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1739 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1740 (*fact)->factor = FACTOR_LU; 1741 1742 ierr = PetscObjectContainerCreate(A->comm, &c); 1743 ierr = PetscObjectContainerSetPointer(c, lgraph_p); 1744 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1745 PetscFunctionReturn(0); 1746 } 1747 1748 #undef __FUNCT__ 1749 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1750 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1751 { 1752 PetscFunctionBegin; 1753 PetscFunctionReturn(0); 1754 } 1755 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatSolve_MPIAIJ" 1758 /* 1759 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1760 */ 1761 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1762 { 1763 namespace graph_dist = boost::graph::distributed; 1764 1765 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1766 lgraph_type* lgraph_p; 1767 PetscObjectContainer c; 1768 PetscErrorCode ierr; 1769 1770 PetscFunctionBegin; 1771 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1772 ierr = PetscObjectContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1773 ierr = VecCopy(b, x); CHKERRQ(ierr); 1774 1775 PetscScalar* array_x; 1776 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1777 PetscInt sx; 1778 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1779 1780 PetscScalar* array_b; 1781 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1782 PetscInt sb; 1783 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1784 1785 lgraph_type& level_graph = *lgraph_p; 1786 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1787 1788 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1789 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1790 ref_x(array_x, boost::extents[num_vertices(graph)]); 1791 1792 typedef boost::iterator_property_map<array_ref_type::iterator, 1793 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1794 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1795 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1796 1797 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1798 1799 PetscFunctionReturn(0); 1800 } 1801 #endif 1802 1803 /* -------------------------------------------------------------------*/ 1804 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1805 MatGetRow_MPIAIJ, 1806 MatRestoreRow_MPIAIJ, 1807 MatMult_MPIAIJ, 1808 /* 4*/ MatMultAdd_MPIAIJ, 1809 MatMultTranspose_MPIAIJ, 1810 MatMultTransposeAdd_MPIAIJ, 1811 #ifdef PETSC_HAVE_PBGL 1812 MatSolve_MPIAIJ, 1813 #else 1814 0, 1815 #endif 1816 0, 1817 0, 1818 /*10*/ 0, 1819 0, 1820 0, 1821 MatRelax_MPIAIJ, 1822 MatTranspose_MPIAIJ, 1823 /*15*/ MatGetInfo_MPIAIJ, 1824 MatEqual_MPIAIJ, 1825 MatGetDiagonal_MPIAIJ, 1826 MatDiagonalScale_MPIAIJ, 1827 MatNorm_MPIAIJ, 1828 /*20*/ MatAssemblyBegin_MPIAIJ, 1829 MatAssemblyEnd_MPIAIJ, 1830 0, 1831 MatSetOption_MPIAIJ, 1832 MatZeroEntries_MPIAIJ, 1833 /*25*/ MatZeroRows_MPIAIJ, 1834 0, 1835 #ifdef PETSC_HAVE_PBGL 1836 MatLUFactorNumeric_MPIAIJ, 1837 #else 1838 0, 1839 #endif 1840 0, 1841 0, 1842 /*30*/ MatSetUpPreallocation_MPIAIJ, 1843 #ifdef PETSC_HAVE_PBGL 1844 MatILUFactorSymbolic_MPIAIJ, 1845 #else 1846 0, 1847 #endif 1848 0, 1849 0, 1850 0, 1851 /*35*/ MatDuplicate_MPIAIJ, 1852 0, 1853 0, 1854 0, 1855 0, 1856 /*40*/ MatAXPY_MPIAIJ, 1857 MatGetSubMatrices_MPIAIJ, 1858 MatIncreaseOverlap_MPIAIJ, 1859 MatGetValues_MPIAIJ, 1860 MatCopy_MPIAIJ, 1861 /*45*/ 0, 1862 MatScale_MPIAIJ, 1863 0, 1864 0, 1865 0, 1866 /*50*/ MatSetBlockSize_MPIAIJ, 1867 0, 1868 0, 1869 0, 1870 0, 1871 /*55*/ MatFDColoringCreate_MPIAIJ, 1872 0, 1873 MatSetUnfactored_MPIAIJ, 1874 MatPermute_MPIAIJ, 1875 0, 1876 /*60*/ MatGetSubMatrix_MPIAIJ, 1877 MatDestroy_MPIAIJ, 1878 MatView_MPIAIJ, 1879 0, 1880 0, 1881 /*65*/ 0, 1882 0, 1883 0, 1884 0, 1885 0, 1886 /*70*/ 0, 1887 0, 1888 MatSetColoring_MPIAIJ, 1889 #if defined(PETSC_HAVE_ADIC) 1890 MatSetValuesAdic_MPIAIJ, 1891 #else 1892 0, 1893 #endif 1894 MatSetValuesAdifor_MPIAIJ, 1895 /*75*/ 0, 1896 0, 1897 0, 1898 0, 1899 0, 1900 /*80*/ 0, 1901 0, 1902 0, 1903 0, 1904 /*84*/ MatLoad_MPIAIJ, 1905 0, 1906 0, 1907 0, 1908 0, 1909 0, 1910 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1911 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1912 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1913 MatPtAP_Basic, 1914 MatPtAPSymbolic_MPIAIJ, 1915 /*95*/ MatPtAPNumeric_MPIAIJ, 1916 0, 1917 0, 1918 0, 1919 0, 1920 /*100*/0, 1921 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1922 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1923 MatConjugate_MPIAIJ, 1924 0, 1925 /*105*/MatSetValuesRow_MPIAIJ, 1926 MatRealPart_MPIAIJ, 1927 MatImaginaryPart_MPIAIJ}; 1928 1929 /* ----------------------------------------------------------------------------------------*/ 1930 1931 EXTERN_C_BEGIN 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1934 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1935 { 1936 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1937 PetscErrorCode ierr; 1938 1939 PetscFunctionBegin; 1940 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1941 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1942 PetscFunctionReturn(0); 1943 } 1944 EXTERN_C_END 1945 1946 EXTERN_C_BEGIN 1947 #undef __FUNCT__ 1948 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1949 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1950 { 1951 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1952 PetscErrorCode ierr; 1953 1954 PetscFunctionBegin; 1955 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1956 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1957 PetscFunctionReturn(0); 1958 } 1959 EXTERN_C_END 1960 1961 #include "petscpc.h" 1962 EXTERN_C_BEGIN 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1965 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1966 { 1967 Mat_MPIAIJ *b; 1968 PetscErrorCode ierr; 1969 PetscInt i; 1970 1971 PetscFunctionBegin; 1972 B->preallocated = PETSC_TRUE; 1973 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1974 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1975 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1976 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1977 1978 B->rmap.bs = B->cmap.bs = 1; 1979 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1980 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1981 if (d_nnz) { 1982 for (i=0; i<B->rmap.n; i++) { 1983 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1984 } 1985 } 1986 if (o_nnz) { 1987 for (i=0; i<B->rmap.n; i++) { 1988 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1989 } 1990 } 1991 b = (Mat_MPIAIJ*)B->data; 1992 1993 /* Explicitly create 2 MATSEQAIJ matrices. */ 1994 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1995 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1996 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1997 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1998 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1999 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2000 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2001 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2002 2003 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2004 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2005 2006 PetscFunctionReturn(0); 2007 } 2008 EXTERN_C_END 2009 2010 #undef __FUNCT__ 2011 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2012 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2013 { 2014 Mat mat; 2015 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2016 PetscErrorCode ierr; 2017 2018 PetscFunctionBegin; 2019 *newmat = 0; 2020 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2021 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2022 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2023 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2024 a = (Mat_MPIAIJ*)mat->data; 2025 2026 mat->factor = matin->factor; 2027 mat->rmap.bs = matin->rmap.bs; 2028 mat->assembled = PETSC_TRUE; 2029 mat->insertmode = NOT_SET_VALUES; 2030 mat->preallocated = PETSC_TRUE; 2031 2032 a->size = oldmat->size; 2033 a->rank = oldmat->rank; 2034 a->donotstash = oldmat->donotstash; 2035 a->roworiented = oldmat->roworiented; 2036 a->rowindices = 0; 2037 a->rowvalues = 0; 2038 a->getrowactive = PETSC_FALSE; 2039 2040 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2041 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2042 2043 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2044 if (oldmat->colmap) { 2045 #if defined (PETSC_USE_CTABLE) 2046 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2047 #else 2048 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2049 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2050 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2051 #endif 2052 } else a->colmap = 0; 2053 if (oldmat->garray) { 2054 PetscInt len; 2055 len = oldmat->B->cmap.n; 2056 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2057 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2058 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2059 } else a->garray = 0; 2060 2061 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2062 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2063 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2064 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2065 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2066 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2067 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2068 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2069 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2070 *newmat = mat; 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #include "petscsys.h" 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatLoad_MPIAIJ" 2078 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2079 { 2080 Mat A; 2081 PetscScalar *vals,*svals; 2082 MPI_Comm comm = ((PetscObject)viewer)->comm; 2083 MPI_Status status; 2084 PetscErrorCode ierr; 2085 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2086 PetscInt i,nz,j,rstart,rend,mmax; 2087 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2088 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2089 PetscInt cend,cstart,n,*rowners; 2090 int fd; 2091 2092 PetscFunctionBegin; 2093 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2094 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2095 if (!rank) { 2096 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2097 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2098 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2099 } 2100 2101 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2102 M = header[1]; N = header[2]; 2103 /* determine ownership of all rows */ 2104 m = M/size + ((M % size) > rank); 2105 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2106 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2107 2108 /* First process needs enough room for process with most rows */ 2109 if (!rank) { 2110 mmax = rowners[1]; 2111 for (i=2; i<size; i++) { 2112 mmax = PetscMax(mmax,rowners[i]); 2113 } 2114 } else mmax = m; 2115 2116 rowners[0] = 0; 2117 for (i=2; i<=size; i++) { 2118 mmax = PetscMax(mmax,rowners[i]); 2119 rowners[i] += rowners[i-1]; 2120 } 2121 rstart = rowners[rank]; 2122 rend = rowners[rank+1]; 2123 2124 /* distribute row lengths to all processors */ 2125 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2126 if (!rank) { 2127 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2128 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2129 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2130 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2131 for (j=0; j<m; j++) { 2132 procsnz[0] += ourlens[j]; 2133 } 2134 for (i=1; i<size; i++) { 2135 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2136 /* calculate the number of nonzeros on each processor */ 2137 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2138 procsnz[i] += rowlengths[j]; 2139 } 2140 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2141 } 2142 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2143 } else { 2144 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2145 } 2146 2147 if (!rank) { 2148 /* determine max buffer needed and allocate it */ 2149 maxnz = 0; 2150 for (i=0; i<size; i++) { 2151 maxnz = PetscMax(maxnz,procsnz[i]); 2152 } 2153 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2154 2155 /* read in my part of the matrix column indices */ 2156 nz = procsnz[0]; 2157 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2158 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2159 2160 /* read in every one elses and ship off */ 2161 for (i=1; i<size; i++) { 2162 nz = procsnz[i]; 2163 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2164 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2165 } 2166 ierr = PetscFree(cols);CHKERRQ(ierr); 2167 } else { 2168 /* determine buffer space needed for message */ 2169 nz = 0; 2170 for (i=0; i<m; i++) { 2171 nz += ourlens[i]; 2172 } 2173 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2174 2175 /* receive message of column indices*/ 2176 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2177 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2178 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2179 } 2180 2181 /* determine column ownership if matrix is not square */ 2182 if (N != M) { 2183 n = N/size + ((N % size) > rank); 2184 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2185 cstart = cend - n; 2186 } else { 2187 cstart = rstart; 2188 cend = rend; 2189 n = cend - cstart; 2190 } 2191 2192 /* loop over local rows, determining number of off diagonal entries */ 2193 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2194 jj = 0; 2195 for (i=0; i<m; i++) { 2196 for (j=0; j<ourlens[i]; j++) { 2197 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2198 jj++; 2199 } 2200 } 2201 2202 /* create our matrix */ 2203 for (i=0; i<m; i++) { 2204 ourlens[i] -= offlens[i]; 2205 } 2206 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2207 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2208 ierr = MatSetType(A,type);CHKERRQ(ierr); 2209 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2210 2211 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2212 for (i=0; i<m; i++) { 2213 ourlens[i] += offlens[i]; 2214 } 2215 2216 if (!rank) { 2217 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2218 2219 /* read in my part of the matrix numerical values */ 2220 nz = procsnz[0]; 2221 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2222 2223 /* insert into matrix */ 2224 jj = rstart; 2225 smycols = mycols; 2226 svals = vals; 2227 for (i=0; i<m; i++) { 2228 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2229 smycols += ourlens[i]; 2230 svals += ourlens[i]; 2231 jj++; 2232 } 2233 2234 /* read in other processors and ship out */ 2235 for (i=1; i<size; i++) { 2236 nz = procsnz[i]; 2237 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2238 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2239 } 2240 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2241 } else { 2242 /* receive numeric values */ 2243 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2244 2245 /* receive message of values*/ 2246 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2247 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2248 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2249 2250 /* insert into matrix */ 2251 jj = rstart; 2252 smycols = mycols; 2253 svals = vals; 2254 for (i=0; i<m; i++) { 2255 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2256 smycols += ourlens[i]; 2257 svals += ourlens[i]; 2258 jj++; 2259 } 2260 } 2261 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2262 ierr = PetscFree(vals);CHKERRQ(ierr); 2263 ierr = PetscFree(mycols);CHKERRQ(ierr); 2264 ierr = PetscFree(rowners);CHKERRQ(ierr); 2265 2266 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2267 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2268 *newmat = A; 2269 PetscFunctionReturn(0); 2270 } 2271 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2274 /* 2275 Not great since it makes two copies of the submatrix, first an SeqAIJ 2276 in local and then by concatenating the local matrices the end result. 2277 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2278 */ 2279 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2280 { 2281 PetscErrorCode ierr; 2282 PetscMPIInt rank,size; 2283 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2284 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2285 Mat *local,M,Mreuse; 2286 PetscScalar *vwork,*aa; 2287 MPI_Comm comm = mat->comm; 2288 Mat_SeqAIJ *aij; 2289 2290 2291 PetscFunctionBegin; 2292 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2293 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2294 2295 if (call == MAT_REUSE_MATRIX) { 2296 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2297 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2298 local = &Mreuse; 2299 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2300 } else { 2301 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2302 Mreuse = *local; 2303 ierr = PetscFree(local);CHKERRQ(ierr); 2304 } 2305 2306 /* 2307 m - number of local rows 2308 n - number of columns (same on all processors) 2309 rstart - first row in new global matrix generated 2310 */ 2311 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2312 if (call == MAT_INITIAL_MATRIX) { 2313 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2314 ii = aij->i; 2315 jj = aij->j; 2316 2317 /* 2318 Determine the number of non-zeros in the diagonal and off-diagonal 2319 portions of the matrix in order to do correct preallocation 2320 */ 2321 2322 /* first get start and end of "diagonal" columns */ 2323 if (csize == PETSC_DECIDE) { 2324 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2325 if (mglobal == n) { /* square matrix */ 2326 nlocal = m; 2327 } else { 2328 nlocal = n/size + ((n % size) > rank); 2329 } 2330 } else { 2331 nlocal = csize; 2332 } 2333 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2334 rstart = rend - nlocal; 2335 if (rank == size - 1 && rend != n) { 2336 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2337 } 2338 2339 /* next, compute all the lengths */ 2340 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2341 olens = dlens + m; 2342 for (i=0; i<m; i++) { 2343 jend = ii[i+1] - ii[i]; 2344 olen = 0; 2345 dlen = 0; 2346 for (j=0; j<jend; j++) { 2347 if (*jj < rstart || *jj >= rend) olen++; 2348 else dlen++; 2349 jj++; 2350 } 2351 olens[i] = olen; 2352 dlens[i] = dlen; 2353 } 2354 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2355 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2356 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2357 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2358 ierr = PetscFree(dlens);CHKERRQ(ierr); 2359 } else { 2360 PetscInt ml,nl; 2361 2362 M = *newmat; 2363 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2364 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2365 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2366 /* 2367 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2368 rather than the slower MatSetValues(). 2369 */ 2370 M->was_assembled = PETSC_TRUE; 2371 M->assembled = PETSC_FALSE; 2372 } 2373 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2374 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2375 ii = aij->i; 2376 jj = aij->j; 2377 aa = aij->a; 2378 for (i=0; i<m; i++) { 2379 row = rstart + i; 2380 nz = ii[i+1] - ii[i]; 2381 cwork = jj; jj += nz; 2382 vwork = aa; aa += nz; 2383 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2384 } 2385 2386 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2388 *newmat = M; 2389 2390 /* save submatrix used in processor for next request */ 2391 if (call == MAT_INITIAL_MATRIX) { 2392 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2393 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2394 } 2395 2396 PetscFunctionReturn(0); 2397 } 2398 2399 EXTERN_C_BEGIN 2400 #undef __FUNCT__ 2401 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2402 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2403 { 2404 PetscInt m,cstart, cend,j,nnz,i,d; 2405 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2406 const PetscInt *JJ; 2407 PetscScalar *values; 2408 PetscErrorCode ierr; 2409 2410 PetscFunctionBegin; 2411 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2412 2413 B->rmap.bs = B->cmap.bs = 1; 2414 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2415 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2416 m = B->rmap.n; 2417 cstart = B->cmap.rstart; 2418 cend = B->cmap.rend; 2419 rstart = B->rmap.rstart; 2420 2421 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2422 o_nnz = d_nnz + m; 2423 2424 for (i=0; i<m; i++) { 2425 nnz = Ii[i+1]- Ii[i]; 2426 JJ = J + Ii[i]; 2427 nnz_max = PetscMax(nnz_max,nnz); 2428 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2429 for (j=0; j<nnz; j++) { 2430 if (*JJ >= cstart) break; 2431 JJ++; 2432 } 2433 d = 0; 2434 for (; j<nnz; j++) { 2435 if (*JJ++ >= cend) break; 2436 d++; 2437 } 2438 d_nnz[i] = d; 2439 o_nnz[i] = nnz - d; 2440 } 2441 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2442 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2443 2444 if (v) values = (PetscScalar*)v; 2445 else { 2446 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2447 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2448 } 2449 2450 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2451 for (i=0; i<m; i++) { 2452 ii = i + rstart; 2453 nnz = Ii[i+1]- Ii[i]; 2454 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2455 } 2456 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2457 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2458 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2459 2460 if (!v) { 2461 ierr = PetscFree(values);CHKERRQ(ierr); 2462 } 2463 PetscFunctionReturn(0); 2464 } 2465 EXTERN_C_END 2466 2467 #undef __FUNCT__ 2468 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2469 /*@ 2470 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2471 (the default parallel PETSc format). 2472 2473 Collective on MPI_Comm 2474 2475 Input Parameters: 2476 + B - the matrix 2477 . i - the indices into j for the start of each local row (starts with zero) 2478 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2479 - v - optional values in the matrix 2480 2481 Level: developer 2482 2483 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2484 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2485 2486 .keywords: matrix, aij, compressed row, sparse, parallel 2487 2488 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2489 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2490 @*/ 2491 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2492 { 2493 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2494 2495 PetscFunctionBegin; 2496 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2497 if (f) { 2498 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2499 } 2500 PetscFunctionReturn(0); 2501 } 2502 2503 #undef __FUNCT__ 2504 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2505 /*@C 2506 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2507 (the default parallel PETSc format). For good matrix assembly performance 2508 the user should preallocate the matrix storage by setting the parameters 2509 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2510 performance can be increased by more than a factor of 50. 2511 2512 Collective on MPI_Comm 2513 2514 Input Parameters: 2515 + A - the matrix 2516 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2517 (same value is used for all local rows) 2518 . d_nnz - array containing the number of nonzeros in the various rows of the 2519 DIAGONAL portion of the local submatrix (possibly different for each row) 2520 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2521 The size of this array is equal to the number of local rows, i.e 'm'. 2522 You must leave room for the diagonal entry even if it is zero. 2523 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2524 submatrix (same value is used for all local rows). 2525 - o_nnz - array containing the number of nonzeros in the various rows of the 2526 OFF-DIAGONAL portion of the local submatrix (possibly different for 2527 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2528 structure. The size of this array is equal to the number 2529 of local rows, i.e 'm'. 2530 2531 If the *_nnz parameter is given then the *_nz parameter is ignored 2532 2533 The AIJ format (also called the Yale sparse matrix format or 2534 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2535 storage. The stored row and column indices begin with zero. See the users manual for details. 2536 2537 The parallel matrix is partitioned such that the first m0 rows belong to 2538 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2539 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2540 2541 The DIAGONAL portion of the local submatrix of a processor can be defined 2542 as the submatrix which is obtained by extraction the part corresponding 2543 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2544 first row that belongs to the processor, and r2 is the last row belonging 2545 to the this processor. This is a square mxm matrix. The remaining portion 2546 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2547 2548 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2549 2550 Example usage: 2551 2552 Consider the following 8x8 matrix with 34 non-zero values, that is 2553 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2554 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2555 as follows: 2556 2557 .vb 2558 1 2 0 | 0 3 0 | 0 4 2559 Proc0 0 5 6 | 7 0 0 | 8 0 2560 9 0 10 | 11 0 0 | 12 0 2561 ------------------------------------- 2562 13 0 14 | 15 16 17 | 0 0 2563 Proc1 0 18 0 | 19 20 21 | 0 0 2564 0 0 0 | 22 23 0 | 24 0 2565 ------------------------------------- 2566 Proc2 25 26 27 | 0 0 28 | 29 0 2567 30 0 0 | 31 32 33 | 0 34 2568 .ve 2569 2570 This can be represented as a collection of submatrices as: 2571 2572 .vb 2573 A B C 2574 D E F 2575 G H I 2576 .ve 2577 2578 Where the submatrices A,B,C are owned by proc0, D,E,F are 2579 owned by proc1, G,H,I are owned by proc2. 2580 2581 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2582 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2583 The 'M','N' parameters are 8,8, and have the same values on all procs. 2584 2585 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2586 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2587 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2588 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2589 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2590 matrix, ans [DF] as another SeqAIJ matrix. 2591 2592 When d_nz, o_nz parameters are specified, d_nz storage elements are 2593 allocated for every row of the local diagonal submatrix, and o_nz 2594 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2595 One way to choose d_nz and o_nz is to use the max nonzerors per local 2596 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2597 In this case, the values of d_nz,o_nz are: 2598 .vb 2599 proc0 : dnz = 2, o_nz = 2 2600 proc1 : dnz = 3, o_nz = 2 2601 proc2 : dnz = 1, o_nz = 4 2602 .ve 2603 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2604 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2605 for proc3. i.e we are using 12+15+10=37 storage locations to store 2606 34 values. 2607 2608 When d_nnz, o_nnz parameters are specified, the storage is specified 2609 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2610 In the above case the values for d_nnz,o_nnz are: 2611 .vb 2612 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2613 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2614 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2615 .ve 2616 Here the space allocated is sum of all the above values i.e 34, and 2617 hence pre-allocation is perfect. 2618 2619 Level: intermediate 2620 2621 .keywords: matrix, aij, compressed row, sparse, parallel 2622 2623 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2624 MPIAIJ 2625 @*/ 2626 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2627 { 2628 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2629 2630 PetscFunctionBegin; 2631 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2632 if (f) { 2633 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2634 } 2635 PetscFunctionReturn(0); 2636 } 2637 2638 #undef __FUNCT__ 2639 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 2640 /*@C 2641 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 2642 CSR format the local rows. 2643 2644 Collective on MPI_Comm 2645 2646 Input Parameters: 2647 + comm - MPI communicator 2648 . m - number of local rows (Cannot be PETSC_DECIDE) 2649 . n - This value should be the same as the local size used in creating the 2650 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2651 calculated if N is given) For square matrices n is almost always m. 2652 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2653 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2654 . i - row indices 2655 . j - column indices 2656 - a - matrix values 2657 2658 Output Parameter: 2659 . mat - the matrix 2660 2661 Level: intermediate 2662 2663 Notes: 2664 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2665 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2666 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2667 2668 The i and j indices are 0 based 2669 2670 .keywords: matrix, aij, compressed row, sparse, parallel 2671 2672 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2673 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 2674 @*/ 2675 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2676 { 2677 PetscErrorCode ierr; 2678 2679 PetscFunctionBegin; 2680 if (i[0]) { 2681 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2682 } 2683 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2684 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2685 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 2686 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 2687 PetscFunctionReturn(0); 2688 } 2689 2690 #undef __FUNCT__ 2691 #define __FUNCT__ "MatCreateMPIAIJ" 2692 /*@C 2693 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2694 (the default parallel PETSc format). For good matrix assembly performance 2695 the user should preallocate the matrix storage by setting the parameters 2696 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2697 performance can be increased by more than a factor of 50. 2698 2699 Collective on MPI_Comm 2700 2701 Input Parameters: 2702 + comm - MPI communicator 2703 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2704 This value should be the same as the local size used in creating the 2705 y vector for the matrix-vector product y = Ax. 2706 . n - This value should be the same as the local size used in creating the 2707 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2708 calculated if N is given) For square matrices n is almost always m. 2709 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2710 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2711 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2712 (same value is used for all local rows) 2713 . d_nnz - array containing the number of nonzeros in the various rows of the 2714 DIAGONAL portion of the local submatrix (possibly different for each row) 2715 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2716 The size of this array is equal to the number of local rows, i.e 'm'. 2717 You must leave room for the diagonal entry even if it is zero. 2718 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2719 submatrix (same value is used for all local rows). 2720 - o_nnz - array containing the number of nonzeros in the various rows of the 2721 OFF-DIAGONAL portion of the local submatrix (possibly different for 2722 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2723 structure. The size of this array is equal to the number 2724 of local rows, i.e 'm'. 2725 2726 Output Parameter: 2727 . A - the matrix 2728 2729 Notes: 2730 If the *_nnz parameter is given then the *_nz parameter is ignored 2731 2732 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2733 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2734 storage requirements for this matrix. 2735 2736 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2737 processor than it must be used on all processors that share the object for 2738 that argument. 2739 2740 The user MUST specify either the local or global matrix dimensions 2741 (possibly both). 2742 2743 The parallel matrix is partitioned such that the first m0 rows belong to 2744 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2745 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2746 2747 The DIAGONAL portion of the local submatrix of a processor can be defined 2748 as the submatrix which is obtained by extraction the part corresponding 2749 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2750 first row that belongs to the processor, and r2 is the last row belonging 2751 to the this processor. This is a square mxm matrix. The remaining portion 2752 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2753 2754 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2755 2756 When calling this routine with a single process communicator, a matrix of 2757 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2758 type of communicator, use the construction mechanism: 2759 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2760 2761 By default, this format uses inodes (identical nodes) when possible. 2762 We search for consecutive rows with the same nonzero structure, thereby 2763 reusing matrix information to achieve increased efficiency. 2764 2765 Options Database Keys: 2766 + -mat_no_inode - Do not use inodes 2767 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2768 - -mat_aij_oneindex - Internally use indexing starting at 1 2769 rather than 0. Note that when calling MatSetValues(), 2770 the user still MUST index entries starting at 0! 2771 2772 2773 Example usage: 2774 2775 Consider the following 8x8 matrix with 34 non-zero values, that is 2776 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2777 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2778 as follows: 2779 2780 .vb 2781 1 2 0 | 0 3 0 | 0 4 2782 Proc0 0 5 6 | 7 0 0 | 8 0 2783 9 0 10 | 11 0 0 | 12 0 2784 ------------------------------------- 2785 13 0 14 | 15 16 17 | 0 0 2786 Proc1 0 18 0 | 19 20 21 | 0 0 2787 0 0 0 | 22 23 0 | 24 0 2788 ------------------------------------- 2789 Proc2 25 26 27 | 0 0 28 | 29 0 2790 30 0 0 | 31 32 33 | 0 34 2791 .ve 2792 2793 This can be represented as a collection of submatrices as: 2794 2795 .vb 2796 A B C 2797 D E F 2798 G H I 2799 .ve 2800 2801 Where the submatrices A,B,C are owned by proc0, D,E,F are 2802 owned by proc1, G,H,I are owned by proc2. 2803 2804 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2805 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2806 The 'M','N' parameters are 8,8, and have the same values on all procs. 2807 2808 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2809 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2810 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2811 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2812 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2813 matrix, ans [DF] as another SeqAIJ matrix. 2814 2815 When d_nz, o_nz parameters are specified, d_nz storage elements are 2816 allocated for every row of the local diagonal submatrix, and o_nz 2817 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2818 One way to choose d_nz and o_nz is to use the max nonzerors per local 2819 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2820 In this case, the values of d_nz,o_nz are: 2821 .vb 2822 proc0 : dnz = 2, o_nz = 2 2823 proc1 : dnz = 3, o_nz = 2 2824 proc2 : dnz = 1, o_nz = 4 2825 .ve 2826 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2827 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2828 for proc3. i.e we are using 12+15+10=37 storage locations to store 2829 34 values. 2830 2831 When d_nnz, o_nnz parameters are specified, the storage is specified 2832 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2833 In the above case the values for d_nnz,o_nnz are: 2834 .vb 2835 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2836 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2837 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2838 .ve 2839 Here the space allocated is sum of all the above values i.e 34, and 2840 hence pre-allocation is perfect. 2841 2842 Level: intermediate 2843 2844 .keywords: matrix, aij, compressed row, sparse, parallel 2845 2846 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2847 MPIAIJ, MatCreateMPIAIJWithArrays() 2848 @*/ 2849 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2850 { 2851 PetscErrorCode ierr; 2852 PetscMPIInt size; 2853 2854 PetscFunctionBegin; 2855 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2856 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2857 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2858 if (size > 1) { 2859 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2860 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2861 } else { 2862 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2863 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2864 } 2865 PetscFunctionReturn(0); 2866 } 2867 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2870 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2871 { 2872 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2873 2874 PetscFunctionBegin; 2875 *Ad = a->A; 2876 *Ao = a->B; 2877 *colmap = a->garray; 2878 PetscFunctionReturn(0); 2879 } 2880 2881 #undef __FUNCT__ 2882 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2883 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2884 { 2885 PetscErrorCode ierr; 2886 PetscInt i; 2887 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2888 2889 PetscFunctionBegin; 2890 if (coloring->ctype == IS_COLORING_GLOBAL) { 2891 ISColoringValue *allcolors,*colors; 2892 ISColoring ocoloring; 2893 2894 /* set coloring for diagonal portion */ 2895 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2896 2897 /* set coloring for off-diagonal portion */ 2898 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2899 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2900 for (i=0; i<a->B->cmap.n; i++) { 2901 colors[i] = allcolors[a->garray[i]]; 2902 } 2903 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2904 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2905 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2906 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2907 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2908 ISColoringValue *colors; 2909 PetscInt *larray; 2910 ISColoring ocoloring; 2911 2912 /* set coloring for diagonal portion */ 2913 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2914 for (i=0; i<a->A->cmap.n; i++) { 2915 larray[i] = i + A->cmap.rstart; 2916 } 2917 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2918 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2919 for (i=0; i<a->A->cmap.n; i++) { 2920 colors[i] = coloring->colors[larray[i]]; 2921 } 2922 ierr = PetscFree(larray);CHKERRQ(ierr); 2923 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2924 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2925 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2926 2927 /* set coloring for off-diagonal portion */ 2928 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2929 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2930 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2931 for (i=0; i<a->B->cmap.n; i++) { 2932 colors[i] = coloring->colors[larray[i]]; 2933 } 2934 ierr = PetscFree(larray);CHKERRQ(ierr); 2935 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2936 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2937 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2938 } else { 2939 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2940 } 2941 2942 PetscFunctionReturn(0); 2943 } 2944 2945 #if defined(PETSC_HAVE_ADIC) 2946 #undef __FUNCT__ 2947 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2948 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2949 { 2950 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2951 PetscErrorCode ierr; 2952 2953 PetscFunctionBegin; 2954 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2955 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2956 PetscFunctionReturn(0); 2957 } 2958 #endif 2959 2960 #undef __FUNCT__ 2961 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2962 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2963 { 2964 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2965 PetscErrorCode ierr; 2966 2967 PetscFunctionBegin; 2968 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2969 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2970 PetscFunctionReturn(0); 2971 } 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatMerge" 2975 /*@C 2976 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2977 matrices from each processor 2978 2979 Collective on MPI_Comm 2980 2981 Input Parameters: 2982 + comm - the communicators the parallel matrix will live on 2983 . inmat - the input sequential matrices 2984 . n - number of local columns (or PETSC_DECIDE) 2985 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2986 2987 Output Parameter: 2988 . outmat - the parallel matrix generated 2989 2990 Level: advanced 2991 2992 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2993 2994 @*/ 2995 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2996 { 2997 PetscErrorCode ierr; 2998 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 2999 PetscInt *indx; 3000 PetscScalar *values; 3001 3002 PetscFunctionBegin; 3003 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3004 if (scall == MAT_INITIAL_MATRIX){ 3005 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3006 if (n == PETSC_DECIDE){ 3007 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3008 } 3009 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3010 rstart -= m; 3011 3012 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3013 for (i=0;i<m;i++) { 3014 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3015 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3016 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3017 } 3018 /* This routine will ONLY return MPIAIJ type matrix */ 3019 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3020 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3021 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3022 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3023 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3024 3025 } else if (scall == MAT_REUSE_MATRIX){ 3026 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3027 } else { 3028 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3029 } 3030 3031 for (i=0;i<m;i++) { 3032 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3033 Ii = i + rstart; 3034 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3035 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3036 } 3037 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3038 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3039 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3040 3041 PetscFunctionReturn(0); 3042 } 3043 3044 #undef __FUNCT__ 3045 #define __FUNCT__ "MatFileSplit" 3046 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3047 { 3048 PetscErrorCode ierr; 3049 PetscMPIInt rank; 3050 PetscInt m,N,i,rstart,nnz; 3051 size_t len; 3052 const PetscInt *indx; 3053 PetscViewer out; 3054 char *name; 3055 Mat B; 3056 const PetscScalar *values; 3057 3058 PetscFunctionBegin; 3059 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3060 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3061 /* Should this be the type of the diagonal block of A? */ 3062 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3063 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3064 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3065 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3066 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3067 for (i=0;i<m;i++) { 3068 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3069 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3070 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3071 } 3072 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3073 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3074 3075 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3076 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3077 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3078 sprintf(name,"%s.%d",outfile,rank); 3079 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3080 ierr = PetscFree(name); 3081 ierr = MatView(B,out);CHKERRQ(ierr); 3082 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3083 ierr = MatDestroy(B);CHKERRQ(ierr); 3084 PetscFunctionReturn(0); 3085 } 3086 3087 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3088 #undef __FUNCT__ 3089 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3090 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3091 { 3092 PetscErrorCode ierr; 3093 Mat_Merge_SeqsToMPI *merge; 3094 PetscObjectContainer container; 3095 3096 PetscFunctionBegin; 3097 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3098 if (container) { 3099 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3100 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3101 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3102 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3103 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3104 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3105 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3106 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3107 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3108 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3109 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3110 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3111 3112 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 3113 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3114 } 3115 ierr = PetscFree(merge);CHKERRQ(ierr); 3116 3117 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3118 PetscFunctionReturn(0); 3119 } 3120 3121 #include "src/mat/utils/freespace.h" 3122 #include "petscbt.h" 3123 static PetscEvent logkey_seqstompinum = 0; 3124 #undef __FUNCT__ 3125 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3126 /*@C 3127 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3128 matrices from each processor 3129 3130 Collective on MPI_Comm 3131 3132 Input Parameters: 3133 + comm - the communicators the parallel matrix will live on 3134 . seqmat - the input sequential matrices 3135 . m - number of local rows (or PETSC_DECIDE) 3136 . n - number of local columns (or PETSC_DECIDE) 3137 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3138 3139 Output Parameter: 3140 . mpimat - the parallel matrix generated 3141 3142 Level: advanced 3143 3144 Notes: 3145 The dimensions of the sequential matrix in each processor MUST be the same. 3146 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3147 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3148 @*/ 3149 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3150 { 3151 PetscErrorCode ierr; 3152 MPI_Comm comm=mpimat->comm; 3153 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3154 PetscMPIInt size,rank,taga,*len_s; 3155 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3156 PetscInt proc,m; 3157 PetscInt **buf_ri,**buf_rj; 3158 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3159 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3160 MPI_Request *s_waits,*r_waits; 3161 MPI_Status *status; 3162 MatScalar *aa=a->a,**abuf_r,*ba_i; 3163 Mat_Merge_SeqsToMPI *merge; 3164 PetscObjectContainer container; 3165 3166 PetscFunctionBegin; 3167 if (!logkey_seqstompinum) { 3168 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3169 } 3170 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3171 3172 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3173 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3174 3175 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3176 if (container) { 3177 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3178 } 3179 bi = merge->bi; 3180 bj = merge->bj; 3181 buf_ri = merge->buf_ri; 3182 buf_rj = merge->buf_rj; 3183 3184 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3185 owners = merge->rowmap.range; 3186 len_s = merge->len_s; 3187 3188 /* send and recv matrix values */ 3189 /*-----------------------------*/ 3190 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3191 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3192 3193 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3194 for (proc=0,k=0; proc<size; proc++){ 3195 if (!len_s[proc]) continue; 3196 i = owners[proc]; 3197 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3198 k++; 3199 } 3200 3201 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3202 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3203 ierr = PetscFree(status);CHKERRQ(ierr); 3204 3205 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3206 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3207 3208 /* insert mat values of mpimat */ 3209 /*----------------------------*/ 3210 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3211 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3212 nextrow = buf_ri_k + merge->nrecv; 3213 nextai = nextrow + merge->nrecv; 3214 3215 for (k=0; k<merge->nrecv; k++){ 3216 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3217 nrows = *(buf_ri_k[k]); 3218 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3219 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3220 } 3221 3222 /* set values of ba */ 3223 m = merge->rowmap.n; 3224 for (i=0; i<m; i++) { 3225 arow = owners[rank] + i; 3226 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3227 bnzi = bi[i+1] - bi[i]; 3228 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3229 3230 /* add local non-zero vals of this proc's seqmat into ba */ 3231 anzi = ai[arow+1] - ai[arow]; 3232 aj = a->j + ai[arow]; 3233 aa = a->a + ai[arow]; 3234 nextaj = 0; 3235 for (j=0; nextaj<anzi; j++){ 3236 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3237 ba_i[j] += aa[nextaj++]; 3238 } 3239 } 3240 3241 /* add received vals into ba */ 3242 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3243 /* i-th row */ 3244 if (i == *nextrow[k]) { 3245 anzi = *(nextai[k]+1) - *nextai[k]; 3246 aj = buf_rj[k] + *(nextai[k]); 3247 aa = abuf_r[k] + *(nextai[k]); 3248 nextaj = 0; 3249 for (j=0; nextaj<anzi; j++){ 3250 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3251 ba_i[j] += aa[nextaj++]; 3252 } 3253 } 3254 nextrow[k]++; nextai[k]++; 3255 } 3256 } 3257 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3258 } 3259 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3260 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3261 3262 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3263 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3264 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3265 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3266 PetscFunctionReturn(0); 3267 } 3268 3269 static PetscEvent logkey_seqstompisym = 0; 3270 #undef __FUNCT__ 3271 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3272 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3273 { 3274 PetscErrorCode ierr; 3275 Mat B_mpi; 3276 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3277 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3278 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3279 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3280 PetscInt len,proc,*dnz,*onz; 3281 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3282 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3283 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3284 MPI_Status *status; 3285 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3286 PetscBT lnkbt; 3287 Mat_Merge_SeqsToMPI *merge; 3288 PetscObjectContainer container; 3289 3290 PetscFunctionBegin; 3291 if (!logkey_seqstompisym) { 3292 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3293 } 3294 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3295 3296 /* make sure it is a PETSc comm */ 3297 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3298 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3299 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3300 3301 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3302 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3303 3304 /* determine row ownership */ 3305 /*---------------------------------------------------------*/ 3306 merge->rowmap.n = m; 3307 merge->rowmap.N = M; 3308 merge->rowmap.bs = 1; 3309 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3310 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3311 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3312 3313 m = merge->rowmap.n; 3314 M = merge->rowmap.N; 3315 owners = merge->rowmap.range; 3316 3317 /* determine the number of messages to send, their lengths */ 3318 /*---------------------------------------------------------*/ 3319 len_s = merge->len_s; 3320 3321 len = 0; /* length of buf_si[] */ 3322 merge->nsend = 0; 3323 for (proc=0; proc<size; proc++){ 3324 len_si[proc] = 0; 3325 if (proc == rank){ 3326 len_s[proc] = 0; 3327 } else { 3328 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3329 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3330 } 3331 if (len_s[proc]) { 3332 merge->nsend++; 3333 nrows = 0; 3334 for (i=owners[proc]; i<owners[proc+1]; i++){ 3335 if (ai[i+1] > ai[i]) nrows++; 3336 } 3337 len_si[proc] = 2*(nrows+1); 3338 len += len_si[proc]; 3339 } 3340 } 3341 3342 /* determine the number and length of messages to receive for ij-structure */ 3343 /*-------------------------------------------------------------------------*/ 3344 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3345 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3346 3347 /* post the Irecv of j-structure */ 3348 /*-------------------------------*/ 3349 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3350 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3351 3352 /* post the Isend of j-structure */ 3353 /*--------------------------------*/ 3354 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3355 sj_waits = si_waits + merge->nsend; 3356 3357 for (proc=0, k=0; proc<size; proc++){ 3358 if (!len_s[proc]) continue; 3359 i = owners[proc]; 3360 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3361 k++; 3362 } 3363 3364 /* receives and sends of j-structure are complete */ 3365 /*------------------------------------------------*/ 3366 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3367 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3368 3369 /* send and recv i-structure */ 3370 /*---------------------------*/ 3371 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3372 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3373 3374 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3375 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3376 for (proc=0,k=0; proc<size; proc++){ 3377 if (!len_s[proc]) continue; 3378 /* form outgoing message for i-structure: 3379 buf_si[0]: nrows to be sent 3380 [1:nrows]: row index (global) 3381 [nrows+1:2*nrows+1]: i-structure index 3382 */ 3383 /*-------------------------------------------*/ 3384 nrows = len_si[proc]/2 - 1; 3385 buf_si_i = buf_si + nrows+1; 3386 buf_si[0] = nrows; 3387 buf_si_i[0] = 0; 3388 nrows = 0; 3389 for (i=owners[proc]; i<owners[proc+1]; i++){ 3390 anzi = ai[i+1] - ai[i]; 3391 if (anzi) { 3392 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3393 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3394 nrows++; 3395 } 3396 } 3397 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3398 k++; 3399 buf_si += len_si[proc]; 3400 } 3401 3402 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3403 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3404 3405 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3406 for (i=0; i<merge->nrecv; i++){ 3407 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3408 } 3409 3410 ierr = PetscFree(len_si);CHKERRQ(ierr); 3411 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3412 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3413 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3414 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3415 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3416 ierr = PetscFree(status);CHKERRQ(ierr); 3417 3418 /* compute a local seq matrix in each processor */ 3419 /*----------------------------------------------*/ 3420 /* allocate bi array and free space for accumulating nonzero column info */ 3421 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3422 bi[0] = 0; 3423 3424 /* create and initialize a linked list */ 3425 nlnk = N+1; 3426 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3427 3428 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3429 len = 0; 3430 len = ai[owners[rank+1]] - ai[owners[rank]]; 3431 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3432 current_space = free_space; 3433 3434 /* determine symbolic info for each local row */ 3435 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3436 nextrow = buf_ri_k + merge->nrecv; 3437 nextai = nextrow + merge->nrecv; 3438 for (k=0; k<merge->nrecv; k++){ 3439 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3440 nrows = *buf_ri_k[k]; 3441 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3442 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3443 } 3444 3445 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3446 len = 0; 3447 for (i=0;i<m;i++) { 3448 bnzi = 0; 3449 /* add local non-zero cols of this proc's seqmat into lnk */ 3450 arow = owners[rank] + i; 3451 anzi = ai[arow+1] - ai[arow]; 3452 aj = a->j + ai[arow]; 3453 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3454 bnzi += nlnk; 3455 /* add received col data into lnk */ 3456 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3457 if (i == *nextrow[k]) { /* i-th row */ 3458 anzi = *(nextai[k]+1) - *nextai[k]; 3459 aj = buf_rj[k] + *nextai[k]; 3460 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3461 bnzi += nlnk; 3462 nextrow[k]++; nextai[k]++; 3463 } 3464 } 3465 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3466 3467 /* if free space is not available, make more free space */ 3468 if (current_space->local_remaining<bnzi) { 3469 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3470 nspacedouble++; 3471 } 3472 /* copy data into free space, then initialize lnk */ 3473 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3474 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3475 3476 current_space->array += bnzi; 3477 current_space->local_used += bnzi; 3478 current_space->local_remaining -= bnzi; 3479 3480 bi[i+1] = bi[i] + bnzi; 3481 } 3482 3483 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3484 3485 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3486 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3487 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3488 3489 /* create symbolic parallel matrix B_mpi */ 3490 /*---------------------------------------*/ 3491 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3492 if (n==PETSC_DECIDE) { 3493 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3494 } else { 3495 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3496 } 3497 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3498 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3499 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3500 3501 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3502 B_mpi->assembled = PETSC_FALSE; 3503 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3504 merge->bi = bi; 3505 merge->bj = bj; 3506 merge->buf_ri = buf_ri; 3507 merge->buf_rj = buf_rj; 3508 merge->coi = PETSC_NULL; 3509 merge->coj = PETSC_NULL; 3510 merge->owners_co = PETSC_NULL; 3511 3512 /* attach the supporting struct to B_mpi for reuse */ 3513 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3514 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3515 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3516 *mpimat = B_mpi; 3517 3518 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3519 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3520 PetscFunctionReturn(0); 3521 } 3522 3523 static PetscEvent logkey_seqstompi = 0; 3524 #undef __FUNCT__ 3525 #define __FUNCT__ "MatMerge_SeqsToMPI" 3526 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3527 { 3528 PetscErrorCode ierr; 3529 3530 PetscFunctionBegin; 3531 if (!logkey_seqstompi) { 3532 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3533 } 3534 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3535 if (scall == MAT_INITIAL_MATRIX){ 3536 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3537 } 3538 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3539 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3540 PetscFunctionReturn(0); 3541 } 3542 static PetscEvent logkey_getlocalmat = 0; 3543 #undef __FUNCT__ 3544 #define __FUNCT__ "MatGetLocalMat" 3545 /*@C 3546 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3547 3548 Not Collective 3549 3550 Input Parameters: 3551 + A - the matrix 3552 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3553 3554 Output Parameter: 3555 . A_loc - the local sequential matrix generated 3556 3557 Level: developer 3558 3559 @*/ 3560 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3561 { 3562 PetscErrorCode ierr; 3563 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3564 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3565 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3566 PetscScalar *aa=a->a,*ba=b->a,*ca; 3567 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3568 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3569 3570 PetscFunctionBegin; 3571 if (!logkey_getlocalmat) { 3572 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3573 } 3574 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3575 if (scall == MAT_INITIAL_MATRIX){ 3576 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3577 ci[0] = 0; 3578 for (i=0; i<am; i++){ 3579 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3580 } 3581 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3582 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3583 k = 0; 3584 for (i=0; i<am; i++) { 3585 ncols_o = bi[i+1] - bi[i]; 3586 ncols_d = ai[i+1] - ai[i]; 3587 /* off-diagonal portion of A */ 3588 for (jo=0; jo<ncols_o; jo++) { 3589 col = cmap[*bj]; 3590 if (col >= cstart) break; 3591 cj[k] = col; bj++; 3592 ca[k++] = *ba++; 3593 } 3594 /* diagonal portion of A */ 3595 for (j=0; j<ncols_d; j++) { 3596 cj[k] = cstart + *aj++; 3597 ca[k++] = *aa++; 3598 } 3599 /* off-diagonal portion of A */ 3600 for (j=jo; j<ncols_o; j++) { 3601 cj[k] = cmap[*bj++]; 3602 ca[k++] = *ba++; 3603 } 3604 } 3605 /* put together the new matrix */ 3606 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3607 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3608 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3609 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3610 mat->free_a = PETSC_TRUE; 3611 mat->free_ij = PETSC_TRUE; 3612 mat->nonew = 0; 3613 } else if (scall == MAT_REUSE_MATRIX){ 3614 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3615 ci = mat->i; cj = mat->j; ca = mat->a; 3616 for (i=0; i<am; i++) { 3617 /* off-diagonal portion of A */ 3618 ncols_o = bi[i+1] - bi[i]; 3619 for (jo=0; jo<ncols_o; jo++) { 3620 col = cmap[*bj]; 3621 if (col >= cstart) break; 3622 *ca++ = *ba++; bj++; 3623 } 3624 /* diagonal portion of A */ 3625 ncols_d = ai[i+1] - ai[i]; 3626 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3627 /* off-diagonal portion of A */ 3628 for (j=jo; j<ncols_o; j++) { 3629 *ca++ = *ba++; bj++; 3630 } 3631 } 3632 } else { 3633 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3634 } 3635 3636 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3637 PetscFunctionReturn(0); 3638 } 3639 3640 static PetscEvent logkey_getlocalmatcondensed = 0; 3641 #undef __FUNCT__ 3642 #define __FUNCT__ "MatGetLocalMatCondensed" 3643 /*@C 3644 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3645 3646 Not Collective 3647 3648 Input Parameters: 3649 + A - the matrix 3650 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3651 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3652 3653 Output Parameter: 3654 . A_loc - the local sequential matrix generated 3655 3656 Level: developer 3657 3658 @*/ 3659 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3660 { 3661 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3662 PetscErrorCode ierr; 3663 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3664 IS isrowa,iscola; 3665 Mat *aloc; 3666 3667 PetscFunctionBegin; 3668 if (!logkey_getlocalmatcondensed) { 3669 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3670 } 3671 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3672 if (!row){ 3673 start = A->rmap.rstart; end = A->rmap.rend; 3674 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3675 } else { 3676 isrowa = *row; 3677 } 3678 if (!col){ 3679 start = A->cmap.rstart; 3680 cmap = a->garray; 3681 nzA = a->A->cmap.n; 3682 nzB = a->B->cmap.n; 3683 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3684 ncols = 0; 3685 for (i=0; i<nzB; i++) { 3686 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3687 else break; 3688 } 3689 imark = i; 3690 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3691 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3692 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3693 ierr = PetscFree(idx);CHKERRQ(ierr); 3694 } else { 3695 iscola = *col; 3696 } 3697 if (scall != MAT_INITIAL_MATRIX){ 3698 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3699 aloc[0] = *A_loc; 3700 } 3701 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3702 *A_loc = aloc[0]; 3703 ierr = PetscFree(aloc);CHKERRQ(ierr); 3704 if (!row){ 3705 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3706 } 3707 if (!col){ 3708 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3709 } 3710 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3711 PetscFunctionReturn(0); 3712 } 3713 3714 static PetscEvent logkey_GetBrowsOfAcols = 0; 3715 #undef __FUNCT__ 3716 #define __FUNCT__ "MatGetBrowsOfAcols" 3717 /*@C 3718 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3719 3720 Collective on Mat 3721 3722 Input Parameters: 3723 + A,B - the matrices in mpiaij format 3724 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3725 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3726 3727 Output Parameter: 3728 + rowb, colb - index sets of rows and columns of B to extract 3729 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 3730 - B_seq - the sequential matrix generated 3731 3732 Level: developer 3733 3734 @*/ 3735 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3736 { 3737 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3738 PetscErrorCode ierr; 3739 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3740 IS isrowb,iscolb; 3741 Mat *bseq; 3742 3743 PetscFunctionBegin; 3744 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3745 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3746 } 3747 if (!logkey_GetBrowsOfAcols) { 3748 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3749 } 3750 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3751 3752 if (scall == MAT_INITIAL_MATRIX){ 3753 start = A->cmap.rstart; 3754 cmap = a->garray; 3755 nzA = a->A->cmap.n; 3756 nzB = a->B->cmap.n; 3757 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3758 ncols = 0; 3759 for (i=0; i<nzB; i++) { /* row < local row index */ 3760 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3761 else break; 3762 } 3763 imark = i; 3764 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3765 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3766 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3767 ierr = PetscFree(idx);CHKERRQ(ierr); 3768 *brstart = imark; 3769 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 3770 } else { 3771 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3772 isrowb = *rowb; iscolb = *colb; 3773 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3774 bseq[0] = *B_seq; 3775 } 3776 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3777 *B_seq = bseq[0]; 3778 ierr = PetscFree(bseq);CHKERRQ(ierr); 3779 if (!rowb){ 3780 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3781 } else { 3782 *rowb = isrowb; 3783 } 3784 if (!colb){ 3785 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3786 } else { 3787 *colb = iscolb; 3788 } 3789 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3790 PetscFunctionReturn(0); 3791 } 3792 3793 static PetscEvent logkey_GetBrowsOfAocols = 0; 3794 #undef __FUNCT__ 3795 #define __FUNCT__ "MatGetBrowsOfAoCols" 3796 /*@C 3797 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3798 of the OFF-DIAGONAL portion of local A 3799 3800 Collective on Mat 3801 3802 Input Parameters: 3803 + A,B - the matrices in mpiaij format 3804 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3805 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3806 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3807 3808 Output Parameter: 3809 + B_oth - the sequential matrix generated 3810 3811 Level: developer 3812 3813 @*/ 3814 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3815 { 3816 VecScatter_MPI_General *gen_to,*gen_from; 3817 PetscErrorCode ierr; 3818 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3819 Mat_SeqAIJ *b_oth; 3820 VecScatter ctx=a->Mvctx; 3821 MPI_Comm comm=ctx->comm; 3822 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3823 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 3824 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3825 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3826 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 3827 MPI_Status *sstatus,rstatus; 3828 PetscInt *cols,sbs,rbs; 3829 PetscScalar *vals; 3830 3831 PetscFunctionBegin; 3832 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3833 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3834 } 3835 if (!logkey_GetBrowsOfAocols) { 3836 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3837 } 3838 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3839 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3840 3841 gen_to = (VecScatter_MPI_General*)ctx->todata; 3842 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3843 rvalues = gen_from->values; /* holds the length of receiving row */ 3844 svalues = gen_to->values; /* holds the length of sending row */ 3845 nrecvs = gen_from->n; 3846 nsends = gen_to->n; 3847 3848 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 3849 srow = gen_to->indices; /* local row index to be sent */ 3850 sstarts = gen_to->starts; 3851 sprocs = gen_to->procs; 3852 sstatus = gen_to->sstatus; 3853 sbs = gen_to->bs; 3854 rstarts = gen_from->starts; 3855 rprocs = gen_from->procs; 3856 rbs = gen_from->bs; 3857 3858 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3859 if (scall == MAT_INITIAL_MATRIX){ 3860 /* i-array */ 3861 /*---------*/ 3862 /* post receives */ 3863 for (i=0; i<nrecvs; i++){ 3864 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 3865 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 3866 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3867 } 3868 3869 /* pack the outgoing message */ 3870 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3871 rstartsj = sstartsj + nsends +1; 3872 sstartsj[0] = 0; rstartsj[0] = 0; 3873 len = 0; /* total length of j or a array to be sent */ 3874 k = 0; 3875 for (i=0; i<nsends; i++){ 3876 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 3877 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 3878 for (j=0; j<nrows; j++) { 3879 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 3880 for (l=0; l<sbs; l++){ 3881 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3882 rowlen[j*sbs+l] = ncols; 3883 len += ncols; 3884 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3885 } 3886 k++; 3887 } 3888 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3889 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3890 } 3891 /* recvs and sends of i-array are completed */ 3892 i = nrecvs; 3893 while (i--) { 3894 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3895 } 3896 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3897 3898 /* allocate buffers for sending j and a arrays */ 3899 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3900 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3901 3902 /* create i-array of B_oth */ 3903 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3904 b_othi[0] = 0; 3905 len = 0; /* total length of j or a array to be received */ 3906 k = 0; 3907 for (i=0; i<nrecvs; i++){ 3908 rowlen = (PetscInt*)rvalues + rstarts[i]; 3909 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 3910 for (j=0; j<nrows; j++) { 3911 b_othi[k+1] = b_othi[k] + rowlen[j]; 3912 len += rowlen[j]; k++; 3913 } 3914 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3915 } 3916 3917 /* allocate space for j and a arrrays of B_oth */ 3918 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3919 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3920 3921 /* j-array */ 3922 /*---------*/ 3923 /* post receives of j-array */ 3924 for (i=0; i<nrecvs; i++){ 3925 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3926 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3927 } 3928 3929 /* pack the outgoing message j-array */ 3930 k = 0; 3931 for (i=0; i<nsends; i++){ 3932 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 3933 bufJ = bufj+sstartsj[i]; 3934 for (j=0; j<nrows; j++) { 3935 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3936 for (ll=0; ll<sbs; ll++){ 3937 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3938 for (l=0; l<ncols; l++){ 3939 *bufJ++ = cols[l]; 3940 } 3941 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3942 } 3943 } 3944 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3945 } 3946 3947 /* recvs and sends of j-array are completed */ 3948 i = nrecvs; 3949 while (i--) { 3950 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3951 } 3952 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3953 } else if (scall == MAT_REUSE_MATRIX){ 3954 sstartsj = *startsj; 3955 rstartsj = sstartsj + nsends +1; 3956 bufa = *bufa_ptr; 3957 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3958 b_otha = b_oth->a; 3959 } else { 3960 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3961 } 3962 3963 /* a-array */ 3964 /*---------*/ 3965 /* post receives of a-array */ 3966 for (i=0; i<nrecvs; i++){ 3967 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3968 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3969 } 3970 3971 /* pack the outgoing message a-array */ 3972 k = 0; 3973 for (i=0; i<nsends; i++){ 3974 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 3975 bufA = bufa+sstartsj[i]; 3976 for (j=0; j<nrows; j++) { 3977 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3978 for (ll=0; ll<sbs; ll++){ 3979 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3980 for (l=0; l<ncols; l++){ 3981 *bufA++ = vals[l]; 3982 } 3983 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3984 } 3985 } 3986 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3987 } 3988 /* recvs and sends of a-array are completed */ 3989 i = nrecvs; 3990 while (i--) { 3991 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3992 } 3993 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3994 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 3995 3996 if (scall == MAT_INITIAL_MATRIX){ 3997 /* put together the new matrix */ 3998 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3999 4000 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4001 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4002 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4003 b_oth->free_a = PETSC_TRUE; 4004 b_oth->free_ij = PETSC_TRUE; 4005 b_oth->nonew = 0; 4006 4007 ierr = PetscFree(bufj);CHKERRQ(ierr); 4008 if (!startsj || !bufa_ptr){ 4009 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4010 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4011 } else { 4012 *startsj = sstartsj; 4013 *bufa_ptr = bufa; 4014 } 4015 } 4016 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4017 4018 PetscFunctionReturn(0); 4019 } 4020 4021 #undef __FUNCT__ 4022 #define __FUNCT__ "MatGetCommunicationStructs" 4023 /*@C 4024 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4025 4026 Not Collective 4027 4028 Input Parameters: 4029 . A - The matrix in mpiaij format 4030 4031 Output Parameter: 4032 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4033 . colmap - A map from global column index to local index into lvec 4034 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4035 4036 Level: developer 4037 4038 @*/ 4039 #if defined (PETSC_USE_CTABLE) 4040 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4041 #else 4042 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4043 #endif 4044 { 4045 Mat_MPIAIJ *a; 4046 4047 PetscFunctionBegin; 4048 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4049 PetscValidPointer(lvec, 2) 4050 PetscValidPointer(colmap, 3) 4051 PetscValidPointer(multScatter, 4) 4052 a = (Mat_MPIAIJ *) A->data; 4053 if (lvec) *lvec = a->lvec; 4054 if (colmap) *colmap = a->colmap; 4055 if (multScatter) *multScatter = a->Mvctx; 4056 PetscFunctionReturn(0); 4057 } 4058 4059 EXTERN_C_BEGIN 4060 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4061 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4062 EXTERN_C_END 4063 4064 /*MC 4065 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4066 4067 Options Database Keys: 4068 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4069 4070 Level: beginner 4071 4072 .seealso: MatCreateMPIAIJ 4073 M*/ 4074 4075 EXTERN_C_BEGIN 4076 #undef __FUNCT__ 4077 #define __FUNCT__ "MatCreate_MPIAIJ" 4078 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4079 { 4080 Mat_MPIAIJ *b; 4081 PetscErrorCode ierr; 4082 PetscMPIInt size; 4083 4084 PetscFunctionBegin; 4085 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4086 4087 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 4088 B->data = (void*)b; 4089 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4090 B->factor = 0; 4091 B->rmap.bs = 1; 4092 B->assembled = PETSC_FALSE; 4093 B->mapping = 0; 4094 4095 B->insertmode = NOT_SET_VALUES; 4096 b->size = size; 4097 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4098 4099 /* build cache for off array entries formed */ 4100 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4101 b->donotstash = PETSC_FALSE; 4102 b->colmap = 0; 4103 b->garray = 0; 4104 b->roworiented = PETSC_TRUE; 4105 4106 /* stuff used for matrix vector multiply */ 4107 b->lvec = PETSC_NULL; 4108 b->Mvctx = PETSC_NULL; 4109 4110 /* stuff for MatGetRow() */ 4111 b->rowindices = 0; 4112 b->rowvalues = 0; 4113 b->getrowactive = PETSC_FALSE; 4114 4115 4116 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4117 "MatStoreValues_MPIAIJ", 4118 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4119 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4120 "MatRetrieveValues_MPIAIJ", 4121 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4122 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4123 "MatGetDiagonalBlock_MPIAIJ", 4124 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4125 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4126 "MatIsTranspose_MPIAIJ", 4127 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4128 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4129 "MatMPIAIJSetPreallocation_MPIAIJ", 4130 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4131 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4132 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4133 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4134 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4135 "MatDiagonalScaleLocal_MPIAIJ", 4136 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4137 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4138 "MatConvert_MPIAIJ_MPICSRPERM", 4139 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4140 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4141 "MatConvert_MPIAIJ_MPICRL", 4142 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4143 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4144 PetscFunctionReturn(0); 4145 } 4146 EXTERN_C_END 4147 4148 #undef __FUNCT__ 4149 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4150 /*@C 4151 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4152 and "off-diagonal" part of the matrix in CSR format. 4153 4154 Collective on MPI_Comm 4155 4156 Input Parameters: 4157 + comm - MPI communicator 4158 . m - number of local rows (Cannot be PETSC_DECIDE) 4159 . n - This value should be the same as the local size used in creating the 4160 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4161 calculated if N is given) For square matrices n is almost always m. 4162 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4163 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4164 . i - row indices for "diagonal" portion of matrix 4165 . j - column indices 4166 . a - matrix values 4167 . oi - row indices for "off-diagonal" portion of matrix 4168 . oj - column indices 4169 - oa - matrix values 4170 4171 Output Parameter: 4172 . mat - the matrix 4173 4174 Level: advanced 4175 4176 Notes: 4177 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4178 4179 The i and j indices are 0 based 4180 4181 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4182 4183 4184 .keywords: matrix, aij, compressed row, sparse, parallel 4185 4186 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4187 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4188 @*/ 4189 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4190 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4191 { 4192 PetscErrorCode ierr; 4193 Mat_MPIAIJ *maij; 4194 4195 PetscFunctionBegin; 4196 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4197 if (i[0]) { 4198 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4199 } 4200 if (oi[0]) { 4201 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4202 } 4203 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4204 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4205 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4206 maij = (Mat_MPIAIJ*) (*mat)->data; 4207 maij->donotstash = PETSC_TRUE; 4208 (*mat)->preallocated = PETSC_TRUE; 4209 4210 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4211 ierr = PetscMapInitialize((*mat)->comm,&(*mat)->rmap);CHKERRQ(ierr); 4212 ierr = PetscMapInitialize((*mat)->comm,&(*mat)->cmap);CHKERRQ(ierr); 4213 4214 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4215 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4216 4217 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4218 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4219 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4220 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4221 4222 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4223 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4224 PetscFunctionReturn(0); 4225 } 4226 4227 /* 4228 Special version for direct calls from Fortran 4229 */ 4230 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4231 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4232 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4233 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4234 #endif 4235 4236 /* Change these macros so can be used in void function */ 4237 #undef CHKERRQ 4238 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4239 #undef SETERRQ2 4240 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4241 #undef SETERRQ 4242 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4243 4244 EXTERN_C_BEGIN 4245 #undef __FUNCT__ 4246 #define __FUNCT__ "matsetvaluesmpiaij_" 4247 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4248 { 4249 Mat mat = *mmat; 4250 PetscInt m = *mm, n = *mn; 4251 InsertMode addv = *maddv; 4252 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4253 PetscScalar value; 4254 PetscErrorCode ierr; 4255 4256 MatPreallocated(mat); 4257 if (mat->insertmode == NOT_SET_VALUES) { 4258 mat->insertmode = addv; 4259 } 4260 #if defined(PETSC_USE_DEBUG) 4261 else if (mat->insertmode != addv) { 4262 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4263 } 4264 #endif 4265 { 4266 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4267 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4268 PetscTruth roworiented = aij->roworiented; 4269 4270 /* Some Variables required in the macro */ 4271 Mat A = aij->A; 4272 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4273 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4274 PetscScalar *aa = a->a; 4275 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4276 Mat B = aij->B; 4277 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4278 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4279 PetscScalar *ba = b->a; 4280 4281 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4282 PetscInt nonew = a->nonew; 4283 PetscScalar *ap1,*ap2; 4284 4285 PetscFunctionBegin; 4286 for (i=0; i<m; i++) { 4287 if (im[i] < 0) continue; 4288 #if defined(PETSC_USE_DEBUG) 4289 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4290 #endif 4291 if (im[i] >= rstart && im[i] < rend) { 4292 row = im[i] - rstart; 4293 lastcol1 = -1; 4294 rp1 = aj + ai[row]; 4295 ap1 = aa + ai[row]; 4296 rmax1 = aimax[row]; 4297 nrow1 = ailen[row]; 4298 low1 = 0; 4299 high1 = nrow1; 4300 lastcol2 = -1; 4301 rp2 = bj + bi[row]; 4302 ap2 = ba + bi[row]; 4303 rmax2 = bimax[row]; 4304 nrow2 = bilen[row]; 4305 low2 = 0; 4306 high2 = nrow2; 4307 4308 for (j=0; j<n; j++) { 4309 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4310 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4311 if (in[j] >= cstart && in[j] < cend){ 4312 col = in[j] - cstart; 4313 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4314 } else if (in[j] < 0) continue; 4315 #if defined(PETSC_USE_DEBUG) 4316 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4317 #endif 4318 else { 4319 if (mat->was_assembled) { 4320 if (!aij->colmap) { 4321 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4322 } 4323 #if defined (PETSC_USE_CTABLE) 4324 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4325 col--; 4326 #else 4327 col = aij->colmap[in[j]] - 1; 4328 #endif 4329 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4330 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4331 col = in[j]; 4332 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4333 B = aij->B; 4334 b = (Mat_SeqAIJ*)B->data; 4335 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4336 rp2 = bj + bi[row]; 4337 ap2 = ba + bi[row]; 4338 rmax2 = bimax[row]; 4339 nrow2 = bilen[row]; 4340 low2 = 0; 4341 high2 = nrow2; 4342 bm = aij->B->rmap.n; 4343 ba = b->a; 4344 } 4345 } else col = in[j]; 4346 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4347 } 4348 } 4349 } else { 4350 if (!aij->donotstash) { 4351 if (roworiented) { 4352 if (ignorezeroentries && v[i*n] == 0.0) continue; 4353 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4354 } else { 4355 if (ignorezeroentries && v[i] == 0.0) continue; 4356 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4357 } 4358 } 4359 } 4360 }} 4361 PetscFunctionReturnVoid(); 4362 } 4363 EXTERN_C_END 4364 4365