1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 246 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 324 PetscFunctionBegin; 325 if (!aij->donotstash) { 326 while (1) { 327 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 328 if (!flg) break; 329 330 for (i=0; i<n;) { 331 /* Now identify the consecutive vals belonging to the same row */ 332 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 333 if (j < n) ncols = j-i; 334 else ncols = n-i; 335 /* Now assemble all these values with a single function call */ 336 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 337 i = j; 338 } 339 } 340 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 341 } 342 a->compressedrow.use = PETSC_FALSE; 343 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 345 346 /* determine if any processor has disassembled, if so we must 347 also disassemble ourselfs, in order that we may reassemble. */ 348 /* 349 if nonzero structure of submatrix B cannot change then we know that 350 no processor disassembled thus we can skip this stuff 351 */ 352 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 353 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 354 if (mat->was_assembled && !other_disassembled) { 355 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 356 } 357 } 358 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 359 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 360 } 361 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 362 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 363 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 364 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 365 366 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 367 aij->rowvalues = 0; 368 369 /* used by MatAXPY() */ 370 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 371 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 372 373 PetscFunctionReturn(0); 374 } 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 379 { 380 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 381 PetscErrorCode ierr; 382 383 PetscFunctionBegin; 384 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 385 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatZeroRows_MPIAIJ" 391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 392 { 393 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 394 PetscErrorCode ierr; 395 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 396 PetscInt i,*owners = A->rmap.range; 397 PetscInt *nprocs,j,idx,nsends,row; 398 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 399 PetscInt *rvalues,count,base,slen,*source; 400 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 401 MPI_Comm comm = A->comm; 402 MPI_Request *send_waits,*recv_waits; 403 MPI_Status recv_status,*send_status; 404 #if defined(PETSC_DEBUG) 405 PetscTruth found = PETSC_FALSE; 406 #endif 407 408 PetscFunctionBegin; 409 /* first count number of contributors to each processor */ 410 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 411 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 412 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 413 j = 0; 414 for (i=0; i<N; i++) { 415 if (lastidx > (idx = rows[i])) j = 0; 416 lastidx = idx; 417 for (; j<size; j++) { 418 if (idx >= owners[j] && idx < owners[j+1]) { 419 nprocs[2*j]++; 420 nprocs[2*j+1] = 1; 421 owner[i] = j; 422 #if defined(PETSC_DEBUG) 423 found = PETSC_TRUE; 424 #endif 425 break; 426 } 427 } 428 #if defined(PETSC_DEBUG) 429 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 430 found = PETSC_FALSE; 431 #endif 432 } 433 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 434 435 /* inform other processors of number of messages and max length*/ 436 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 437 438 /* post receives: */ 439 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 440 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 441 for (i=0; i<nrecvs; i++) { 442 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 443 } 444 445 /* do sends: 446 1) starts[i] gives the starting index in svalues for stuff going to 447 the ith processor 448 */ 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 450 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 451 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 452 starts[0] = 0; 453 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 454 for (i=0; i<N; i++) { 455 svalues[starts[owner[i]]++] = rows[i]; 456 } 457 458 starts[0] = 0; 459 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 460 count = 0; 461 for (i=0; i<size; i++) { 462 if (nprocs[2*i+1]) { 463 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscFree(starts);CHKERRQ(ierr); 467 468 base = owners[rank]; 469 470 /* wait on receives */ 471 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 472 source = lens + nrecvs; 473 count = nrecvs; slen = 0; 474 while (count) { 475 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 476 /* unpack receives into our local space */ 477 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 478 source[imdex] = recv_status.MPI_SOURCE; 479 lens[imdex] = n; 480 slen += n; 481 count--; 482 } 483 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 484 485 /* move the data into the send scatter */ 486 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 487 count = 0; 488 for (i=0; i<nrecvs; i++) { 489 values = rvalues + i*nmax; 490 for (j=0; j<lens[i]; j++) { 491 lrows[count++] = values[j] - base; 492 } 493 } 494 ierr = PetscFree(rvalues);CHKERRQ(ierr); 495 ierr = PetscFree(lens);CHKERRQ(ierr); 496 ierr = PetscFree(owner);CHKERRQ(ierr); 497 ierr = PetscFree(nprocs);CHKERRQ(ierr); 498 499 /* actually zap the local rows */ 500 /* 501 Zero the required rows. If the "diagonal block" of the matrix 502 is square and the user wishes to set the diagonal we use separate 503 code so that MatSetValues() is not called for each diagonal allocating 504 new memory, thus calling lots of mallocs and slowing things down. 505 506 Contributed by: Matthew Knepley 507 */ 508 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 509 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 510 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 511 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 512 } else if (diag != 0.0) { 513 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 514 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 515 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 517 } 518 for (i = 0; i < slen; i++) { 519 row = lrows[i] + rstart; 520 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 521 } 522 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 524 } else { 525 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 526 } 527 ierr = PetscFree(lrows);CHKERRQ(ierr); 528 529 /* wait on sends */ 530 if (nsends) { 531 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 532 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 533 ierr = PetscFree(send_status);CHKERRQ(ierr); 534 } 535 ierr = PetscFree(send_waits);CHKERRQ(ierr); 536 ierr = PetscFree(svalues);CHKERRQ(ierr); 537 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatMult_MPIAIJ" 543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 544 { 545 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 546 PetscErrorCode ierr; 547 PetscInt nt; 548 549 PetscFunctionBegin; 550 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 551 if (nt != A->cmap.n) { 552 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 553 } 554 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 555 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 556 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 557 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatMultAdd_MPIAIJ" 563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 564 { 565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 570 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 571 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 572 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 579 { 580 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 581 PetscErrorCode ierr; 582 PetscTruth merged; 583 584 PetscFunctionBegin; 585 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 586 /* do nondiagonal part */ 587 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 588 if (!merged) { 589 /* send it on its way */ 590 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 591 /* do local part */ 592 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 593 /* receive remote parts: note this assumes the values are not actually */ 594 /* added in yy until the next line, */ 595 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 596 } else { 597 /* do local part */ 598 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 599 /* send it on its way */ 600 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 601 /* values actually were received in the Begin() but we need to call this nop */ 602 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 EXTERN_C_BEGIN 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 611 { 612 MPI_Comm comm; 613 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 614 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 615 IS Me,Notme; 616 PetscErrorCode ierr; 617 PetscInt M,N,first,last,*notme,i; 618 PetscMPIInt size; 619 620 PetscFunctionBegin; 621 622 /* Easy test: symmetric diagonal block */ 623 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 624 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 625 if (!*f) PetscFunctionReturn(0); 626 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 628 if (size == 1) PetscFunctionReturn(0); 629 630 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 631 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 632 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 633 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 634 for (i=0; i<first; i++) notme[i] = i; 635 for (i=last; i<M; i++) notme[i-last+first] = i; 636 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 637 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 638 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 639 Aoff = Aoffs[0]; 640 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 641 Boff = Boffs[0]; 642 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 644 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 645 ierr = ISDestroy(Me);CHKERRQ(ierr); 646 ierr = ISDestroy(Notme);CHKERRQ(ierr); 647 648 PetscFunctionReturn(0); 649 } 650 EXTERN_C_END 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 655 { 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 PetscErrorCode ierr; 658 659 PetscFunctionBegin; 660 /* do nondiagonal part */ 661 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 662 /* send it on its way */ 663 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 664 /* do local part */ 665 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 666 /* receive remote parts */ 667 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This only works correctly for square matrices where the subblock A->A is the 673 diagonal block 674 */ 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 678 { 679 PetscErrorCode ierr; 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 682 PetscFunctionBegin; 683 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 684 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 685 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 686 } 687 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatScale_MPIAIJ" 693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 696 PetscErrorCode ierr; 697 698 PetscFunctionBegin; 699 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 700 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatDestroy_MPIAIJ" 706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 709 PetscErrorCode ierr; 710 711 PetscFunctionBegin; 712 #if defined(PETSC_USE_LOG) 713 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 714 #endif 715 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 717 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 718 #if defined (PETSC_USE_CTABLE) 719 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 720 #else 721 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 722 #endif 723 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 724 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 725 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 726 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatView_MPIAIJ_Binary" 741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 744 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 745 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 746 PetscErrorCode ierr; 747 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 748 int fd; 749 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 750 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 751 PetscScalar *column_values; 752 753 PetscFunctionBegin; 754 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 755 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 756 nz = A->nz + B->nz; 757 if (!rank) { 758 header[0] = MAT_FILE_COOKIE; 759 header[1] = mat->rmap.N; 760 header[2] = mat->cmap.N; 761 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 762 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 763 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 764 /* get largest number of rows any processor has */ 765 rlen = mat->rmap.n; 766 range = mat->rmap.range; 767 for (i=1; i<size; i++) { 768 rlen = PetscMax(rlen,range[i+1] - range[i]); 769 } 770 } else { 771 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 772 rlen = mat->rmap.n; 773 } 774 775 /* load up the local row counts */ 776 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 777 for (i=0; i<mat->rmap.n; i++) { 778 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 779 } 780 781 /* store the row lengths to the file */ 782 if (!rank) { 783 MPI_Status status; 784 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 785 for (i=1; i<size; i++) { 786 rlen = range[i+1] - range[i]; 787 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 788 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 789 } 790 } else { 791 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 792 } 793 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 794 795 /* load up the local column indices */ 796 nzmax = nz; /* )th processor needs space a largest processor needs */ 797 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 798 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 799 cnt = 0; 800 for (i=0; i<mat->rmap.n; i++) { 801 for (j=B->i[i]; j<B->i[i+1]; j++) { 802 if ( (col = garray[B->j[j]]) > cstart) break; 803 column_indices[cnt++] = col; 804 } 805 for (k=A->i[i]; k<A->i[i+1]; k++) { 806 column_indices[cnt++] = A->j[k] + cstart; 807 } 808 for (; j<B->i[i+1]; j++) { 809 column_indices[cnt++] = garray[B->j[j]]; 810 } 811 } 812 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 813 814 /* store the column indices to the file */ 815 if (!rank) { 816 MPI_Status status; 817 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 818 for (i=1; i<size; i++) { 819 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 820 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 821 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 823 } 824 } else { 825 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 826 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 } 828 ierr = PetscFree(column_indices);CHKERRQ(ierr); 829 830 /* load up the local column values */ 831 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 832 cnt = 0; 833 for (i=0; i<mat->rmap.n; i++) { 834 for (j=B->i[i]; j<B->i[i+1]; j++) { 835 if ( garray[B->j[j]] > cstart) break; 836 column_values[cnt++] = B->a[j]; 837 } 838 for (k=A->i[i]; k<A->i[i+1]; k++) { 839 column_values[cnt++] = A->a[k]; 840 } 841 for (; j<B->i[i+1]; j++) { 842 column_values[cnt++] = B->a[j]; 843 } 844 } 845 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 846 847 /* store the column values to the file */ 848 if (!rank) { 849 MPI_Status status; 850 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 851 for (i=1; i<size; i++) { 852 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 853 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 854 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 856 } 857 } else { 858 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 859 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 860 } 861 ierr = PetscFree(column_values);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 868 { 869 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 870 PetscErrorCode ierr; 871 PetscMPIInt rank = aij->rank,size = aij->size; 872 PetscTruth isdraw,iascii,isbinary; 873 PetscViewer sviewer; 874 PetscViewerFormat format; 875 876 PetscFunctionBegin; 877 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 880 if (iascii) { 881 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 882 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 883 MatInfo info; 884 PetscTruth inodes; 885 886 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 887 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 889 if (!inodes) { 890 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 891 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 892 } else { 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 894 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 895 } 896 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 898 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 900 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 901 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } else if (format == PETSC_VIEWER_ASCII_INFO) { 904 PetscInt inodecount,inodelimit,*inodes; 905 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 906 if (inodes) { 907 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 908 } else { 909 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 910 } 911 PetscFunctionReturn(0); 912 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 913 PetscFunctionReturn(0); 914 } 915 } else if (isbinary) { 916 if (size == 1) { 917 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 918 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 919 } else { 920 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 921 } 922 PetscFunctionReturn(0); 923 } else if (isdraw) { 924 PetscDraw draw; 925 PetscTruth isnull; 926 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 927 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 928 } 929 930 if (size == 1) { 931 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 932 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 933 } else { 934 /* assemble the entire matrix onto first processor. */ 935 Mat A; 936 Mat_SeqAIJ *Aloc; 937 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 938 PetscScalar *a; 939 940 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 941 if (!rank) { 942 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 943 } else { 944 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 945 } 946 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 947 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 948 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 949 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 950 951 /* copy over the A part */ 952 Aloc = (Mat_SeqAIJ*)aij->A->data; 953 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 954 row = mat->rmap.rstart; 955 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 956 for (i=0; i<m; i++) { 957 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 958 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 959 } 960 aj = Aloc->j; 961 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 962 963 /* copy over the B part */ 964 Aloc = (Mat_SeqAIJ*)aij->B->data; 965 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 966 row = mat->rmap.rstart; 967 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 968 ct = cols; 969 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 970 for (i=0; i<m; i++) { 971 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 972 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 973 } 974 ierr = PetscFree(ct);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 /* 978 Everyone has to call to draw the matrix since the graphics waits are 979 synchronized across all processors that share the PetscDraw object 980 */ 981 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 982 if (!rank) { 983 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 984 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 985 } 986 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 ierr = MatDestroy(A);CHKERRQ(ierr); 988 } 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatView_MPIAIJ" 994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 995 { 996 PetscErrorCode ierr; 997 PetscTruth iascii,isdraw,issocket,isbinary; 998 999 PetscFunctionBegin; 1000 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1004 if (iascii || isdraw || isbinary || issocket) { 1005 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1006 } else { 1007 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatRelax_MPIAIJ" 1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscErrorCode ierr; 1020 Vec bb1; 1021 1022 PetscFunctionBegin; 1023 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1024 1025 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1026 1027 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1028 if (flag & SOR_ZERO_INITIAL_GUESS) { 1029 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1030 its--; 1031 } 1032 1033 while (its--) { 1034 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1036 1037 /* update rhs: bb1 = bb - B*x */ 1038 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1039 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1040 1041 /* local sweep */ 1042 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1043 CHKERRQ(ierr); 1044 } 1045 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1046 if (flag & SOR_ZERO_INITIAL_GUESS) { 1047 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1048 its--; 1049 } 1050 while (its--) { 1051 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 1054 /* update rhs: bb1 = bb - B*x */ 1055 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1056 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1057 1058 /* local sweep */ 1059 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1060 CHKERRQ(ierr); 1061 } 1062 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1063 if (flag & SOR_ZERO_INITIAL_GUESS) { 1064 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1065 its--; 1066 } 1067 while (its--) { 1068 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 1071 /* update rhs: bb1 = bb - B*x */ 1072 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1073 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1074 1075 /* local sweep */ 1076 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1077 CHKERRQ(ierr); 1078 } 1079 } else { 1080 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1081 } 1082 1083 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatPermute_MPIAIJ" 1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1090 { 1091 MPI_Comm comm,pcomm; 1092 PetscInt first,local_size,nrows,*rows; 1093 int ntids; 1094 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1099 /* make a collective version of 'rowp' */ 1100 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1101 if (pcomm==comm) { 1102 crowp = rowp; 1103 } else { 1104 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1105 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1107 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1108 } 1109 /* collect the global row permutation and invert it */ 1110 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1111 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1112 if (pcomm!=comm) { 1113 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1114 } 1115 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1116 /* get the local target indices */ 1117 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1118 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1119 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1121 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1122 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1123 /* the column permutation is so much easier; 1124 make a local version of 'colp' and invert it */ 1125 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1126 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1127 if (ntids==1) { 1128 lcolp = colp; 1129 } else { 1130 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1131 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1132 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1133 } 1134 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1135 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1136 if (ntids>1) { 1137 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1138 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1139 } 1140 /* now we just get the submatrix */ 1141 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1142 /* clean up */ 1143 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1144 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1151 { 1152 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1153 Mat A = mat->A,B = mat->B; 1154 PetscErrorCode ierr; 1155 PetscReal isend[5],irecv[5]; 1156 1157 PetscFunctionBegin; 1158 info->block_size = 1.0; 1159 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1160 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1161 isend[3] = info->memory; isend[4] = info->mallocs; 1162 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1163 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1164 isend[3] += info->memory; isend[4] += info->mallocs; 1165 if (flag == MAT_LOCAL) { 1166 info->nz_used = isend[0]; 1167 info->nz_allocated = isend[1]; 1168 info->nz_unneeded = isend[2]; 1169 info->memory = isend[3]; 1170 info->mallocs = isend[4]; 1171 } else if (flag == MAT_GLOBAL_MAX) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else if (flag == MAT_GLOBAL_SUM) { 1179 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1180 info->nz_used = irecv[0]; 1181 info->nz_allocated = irecv[1]; 1182 info->nz_unneeded = irecv[2]; 1183 info->memory = irecv[3]; 1184 info->mallocs = irecv[4]; 1185 } 1186 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1187 info->fill_ratio_needed = 0; 1188 info->factor_mallocs = 0; 1189 info->rows_global = (double)matin->rmap.N; 1190 info->columns_global = (double)matin->cmap.N; 1191 info->rows_local = (double)matin->rmap.n; 1192 info->columns_local = (double)matin->cmap.N; 1193 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatSetOption_MPIAIJ" 1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1200 { 1201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 switch (op) { 1206 case MAT_NO_NEW_NONZERO_LOCATIONS: 1207 case MAT_YES_NEW_NONZERO_LOCATIONS: 1208 case MAT_COLUMNS_UNSORTED: 1209 case MAT_COLUMNS_SORTED: 1210 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1211 case MAT_KEEP_ZEROED_ROWS: 1212 case MAT_NEW_NONZERO_LOCATION_ERR: 1213 case MAT_USE_INODES: 1214 case MAT_DO_NOT_USE_INODES: 1215 case MAT_IGNORE_ZERO_ENTRIES: 1216 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1217 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1218 break; 1219 case MAT_ROW_ORIENTED: 1220 a->roworiented = PETSC_TRUE; 1221 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1222 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1223 break; 1224 case MAT_ROWS_SORTED: 1225 case MAT_ROWS_UNSORTED: 1226 case MAT_YES_NEW_DIAGONALS: 1227 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1228 break; 1229 case MAT_COLUMN_ORIENTED: 1230 a->roworiented = PETSC_FALSE; 1231 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1232 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1233 break; 1234 case MAT_IGNORE_OFF_PROC_ENTRIES: 1235 a->donotstash = PETSC_TRUE; 1236 break; 1237 case MAT_NO_NEW_DIAGONALS: 1238 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1239 case MAT_SYMMETRIC: 1240 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1241 break; 1242 case MAT_STRUCTURALLY_SYMMETRIC: 1243 case MAT_HERMITIAN: 1244 case MAT_SYMMETRY_ETERNAL: 1245 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1246 break; 1247 case MAT_NOT_SYMMETRIC: 1248 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1249 case MAT_NOT_HERMITIAN: 1250 case MAT_NOT_SYMMETRY_ETERNAL: 1251 break; 1252 default: 1253 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatGetRow_MPIAIJ" 1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1261 { 1262 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1263 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1264 PetscErrorCode ierr; 1265 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1266 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1267 PetscInt *cmap,*idx_p; 1268 1269 PetscFunctionBegin; 1270 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1271 mat->getrowactive = PETSC_TRUE; 1272 1273 if (!mat->rowvalues && (idx || v)) { 1274 /* 1275 allocate enough space to hold information from the longest row. 1276 */ 1277 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1278 PetscInt max = 1,tmp; 1279 for (i=0; i<matin->rmap.n; i++) { 1280 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1281 if (max < tmp) { max = tmp; } 1282 } 1283 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1284 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1285 } 1286 1287 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1288 lrow = row - rstart; 1289 1290 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1291 if (!v) {pvA = 0; pvB = 0;} 1292 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1293 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1294 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1295 nztot = nzA + nzB; 1296 1297 cmap = mat->garray; 1298 if (v || idx) { 1299 if (nztot) { 1300 /* Sort by increasing column numbers, assuming A and B already sorted */ 1301 PetscInt imark = -1; 1302 if (v) { 1303 *v = v_p = mat->rowvalues; 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1306 else break; 1307 } 1308 imark = i; 1309 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1310 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1311 } 1312 if (idx) { 1313 *idx = idx_p = mat->rowindices; 1314 if (imark > -1) { 1315 for (i=0; i<imark; i++) { 1316 idx_p[i] = cmap[cworkB[i]]; 1317 } 1318 } else { 1319 for (i=0; i<nzB; i++) { 1320 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1321 else break; 1322 } 1323 imark = i; 1324 } 1325 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1326 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1327 } 1328 } else { 1329 if (idx) *idx = 0; 1330 if (v) *v = 0; 1331 } 1332 } 1333 *nz = nztot; 1334 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1335 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1342 { 1343 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1344 1345 PetscFunctionBegin; 1346 if (!aij->getrowactive) { 1347 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1348 } 1349 aij->getrowactive = PETSC_FALSE; 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatNorm_MPIAIJ" 1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1356 { 1357 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1358 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1359 PetscErrorCode ierr; 1360 PetscInt i,j,cstart = mat->cmap.rstart; 1361 PetscReal sum = 0.0; 1362 PetscScalar *v; 1363 1364 PetscFunctionBegin; 1365 if (aij->size == 1) { 1366 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1367 } else { 1368 if (type == NORM_FROBENIUS) { 1369 v = amat->a; 1370 for (i=0; i<amat->nz; i++) { 1371 #if defined(PETSC_USE_COMPLEX) 1372 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1373 #else 1374 sum += (*v)*(*v); v++; 1375 #endif 1376 } 1377 v = bmat->a; 1378 for (i=0; i<bmat->nz; i++) { 1379 #if defined(PETSC_USE_COMPLEX) 1380 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1381 #else 1382 sum += (*v)*(*v); v++; 1383 #endif 1384 } 1385 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1386 *norm = sqrt(*norm); 1387 } else if (type == NORM_1) { /* max column norm */ 1388 PetscReal *tmp,*tmp2; 1389 PetscInt *jj,*garray = aij->garray; 1390 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1391 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1392 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1393 *norm = 0.0; 1394 v = amat->a; jj = amat->j; 1395 for (j=0; j<amat->nz; j++) { 1396 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1397 } 1398 v = bmat->a; jj = bmat->j; 1399 for (j=0; j<bmat->nz; j++) { 1400 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1401 } 1402 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1403 for (j=0; j<mat->cmap.N; j++) { 1404 if (tmp2[j] > *norm) *norm = tmp2[j]; 1405 } 1406 ierr = PetscFree(tmp);CHKERRQ(ierr); 1407 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1408 } else if (type == NORM_INFINITY) { /* max row norm */ 1409 PetscReal ntemp = 0.0; 1410 for (j=0; j<aij->A->rmap.n; j++) { 1411 v = amat->a + amat->i[j]; 1412 sum = 0.0; 1413 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1414 sum += PetscAbsScalar(*v); v++; 1415 } 1416 v = bmat->a + bmat->i[j]; 1417 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1418 sum += PetscAbsScalar(*v); v++; 1419 } 1420 if (sum > ntemp) ntemp = sum; 1421 } 1422 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1423 } else { 1424 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1425 } 1426 } 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatTranspose_MPIAIJ" 1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1433 { 1434 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1435 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1436 PetscErrorCode ierr; 1437 PetscInt M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1438 Mat B; 1439 PetscScalar *array; 1440 1441 PetscFunctionBegin; 1442 if (!matout && M != N) { 1443 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1444 } 1445 1446 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1447 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1448 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1449 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1450 1451 /* copy over the A part */ 1452 Aloc = (Mat_SeqAIJ*)a->A->data; 1453 m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1454 row = A->rmap.rstart; 1455 for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;} 1456 for (i=0; i<m; i++) { 1457 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1458 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1459 } 1460 aj = Aloc->j; 1461 for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;} 1462 1463 /* copy over the B part */ 1464 Aloc = (Mat_SeqAIJ*)a->B->data; 1465 m = a->B->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1466 row = A->rmap.rstart; 1467 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1468 ct = cols; 1469 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1470 for (i=0; i<m; i++) { 1471 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1472 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1473 } 1474 ierr = PetscFree(ct);CHKERRQ(ierr); 1475 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1476 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1477 if (matout) { 1478 *matout = B; 1479 } else { 1480 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1487 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1488 { 1489 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1490 Mat a = aij->A,b = aij->B; 1491 PetscErrorCode ierr; 1492 PetscInt s1,s2,s3; 1493 1494 PetscFunctionBegin; 1495 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1496 if (rr) { 1497 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1498 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1499 /* Overlap communication with computation. */ 1500 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1501 } 1502 if (ll) { 1503 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1504 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1505 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1506 } 1507 /* scale the diagonal block */ 1508 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1509 1510 if (rr) { 1511 /* Do a scatter end and then right scale the off-diagonal block */ 1512 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1513 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1514 } 1515 1516 PetscFunctionReturn(0); 1517 } 1518 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1522 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1523 { 1524 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1525 PetscErrorCode ierr; 1526 1527 PetscFunctionBegin; 1528 if (!a->rank) { 1529 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1530 } 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1536 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1537 { 1538 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1539 PetscErrorCode ierr; 1540 1541 PetscFunctionBegin; 1542 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1543 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1544 PetscFunctionReturn(0); 1545 } 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1548 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1549 { 1550 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1551 PetscErrorCode ierr; 1552 1553 PetscFunctionBegin; 1554 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatEqual_MPIAIJ" 1560 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1561 { 1562 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1563 Mat a,b,c,d; 1564 PetscTruth flg; 1565 PetscErrorCode ierr; 1566 1567 PetscFunctionBegin; 1568 a = matA->A; b = matA->B; 1569 c = matB->A; d = matB->B; 1570 1571 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1572 if (flg) { 1573 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1574 } 1575 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatCopy_MPIAIJ" 1581 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1582 { 1583 PetscErrorCode ierr; 1584 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1585 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1586 1587 PetscFunctionBegin; 1588 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1589 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1590 /* because of the column compression in the off-processor part of the matrix a->B, 1591 the number of columns in a->B and b->B may be different, hence we cannot call 1592 the MatCopy() directly on the two parts. If need be, we can provide a more 1593 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1594 then copying the submatrices */ 1595 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1596 } else { 1597 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1598 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1599 } 1600 PetscFunctionReturn(0); 1601 } 1602 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1605 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1606 { 1607 PetscErrorCode ierr; 1608 1609 PetscFunctionBegin; 1610 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1611 PetscFunctionReturn(0); 1612 } 1613 1614 #include "petscblaslapack.h" 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "MatAXPY_MPIAIJ" 1617 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1618 { 1619 PetscErrorCode ierr; 1620 PetscInt i; 1621 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1622 PetscBLASInt bnz,one=1; 1623 Mat_SeqAIJ *x,*y; 1624 1625 PetscFunctionBegin; 1626 if (str == SAME_NONZERO_PATTERN) { 1627 PetscScalar alpha = a; 1628 x = (Mat_SeqAIJ *)xx->A->data; 1629 y = (Mat_SeqAIJ *)yy->A->data; 1630 bnz = (PetscBLASInt)x->nz; 1631 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1632 x = (Mat_SeqAIJ *)xx->B->data; 1633 y = (Mat_SeqAIJ *)yy->B->data; 1634 bnz = (PetscBLASInt)x->nz; 1635 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1636 } else if (str == SUBSET_NONZERO_PATTERN) { 1637 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1638 1639 x = (Mat_SeqAIJ *)xx->B->data; 1640 y = (Mat_SeqAIJ *)yy->B->data; 1641 if (y->xtoy && y->XtoY != xx->B) { 1642 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1643 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1644 } 1645 if (!y->xtoy) { /* get xtoy */ 1646 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1647 y->XtoY = xx->B; 1648 } 1649 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1650 } else { 1651 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1652 } 1653 PetscFunctionReturn(0); 1654 } 1655 1656 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1657 1658 #undef __FUNCT__ 1659 #define __FUNCT__ "MatConjugate_MPIAIJ" 1660 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1661 { 1662 #if defined(PETSC_USE_COMPLEX) 1663 PetscErrorCode ierr; 1664 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1665 1666 PetscFunctionBegin; 1667 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1668 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1669 #else 1670 PetscFunctionBegin; 1671 #endif 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatRealPart_MPIAIJ" 1677 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1678 { 1679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1680 PetscErrorCode ierr; 1681 1682 PetscFunctionBegin; 1683 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1684 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1690 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1691 { 1692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1693 PetscErrorCode ierr; 1694 1695 PetscFunctionBegin; 1696 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1697 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 /* -------------------------------------------------------------------*/ 1702 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1703 MatGetRow_MPIAIJ, 1704 MatRestoreRow_MPIAIJ, 1705 MatMult_MPIAIJ, 1706 /* 4*/ MatMultAdd_MPIAIJ, 1707 MatMultTranspose_MPIAIJ, 1708 MatMultTransposeAdd_MPIAIJ, 1709 0, 1710 0, 1711 0, 1712 /*10*/ 0, 1713 0, 1714 0, 1715 MatRelax_MPIAIJ, 1716 MatTranspose_MPIAIJ, 1717 /*15*/ MatGetInfo_MPIAIJ, 1718 MatEqual_MPIAIJ, 1719 MatGetDiagonal_MPIAIJ, 1720 MatDiagonalScale_MPIAIJ, 1721 MatNorm_MPIAIJ, 1722 /*20*/ MatAssemblyBegin_MPIAIJ, 1723 MatAssemblyEnd_MPIAIJ, 1724 0, 1725 MatSetOption_MPIAIJ, 1726 MatZeroEntries_MPIAIJ, 1727 /*25*/ MatZeroRows_MPIAIJ, 1728 0, 1729 0, 1730 0, 1731 0, 1732 /*30*/ MatSetUpPreallocation_MPIAIJ, 1733 0, 1734 0, 1735 0, 1736 0, 1737 /*35*/ MatDuplicate_MPIAIJ, 1738 0, 1739 0, 1740 0, 1741 0, 1742 /*40*/ MatAXPY_MPIAIJ, 1743 MatGetSubMatrices_MPIAIJ, 1744 MatIncreaseOverlap_MPIAIJ, 1745 MatGetValues_MPIAIJ, 1746 MatCopy_MPIAIJ, 1747 /*45*/ MatPrintHelp_MPIAIJ, 1748 MatScale_MPIAIJ, 1749 0, 1750 0, 1751 0, 1752 /*50*/ MatSetBlockSize_MPIAIJ, 1753 0, 1754 0, 1755 0, 1756 0, 1757 /*55*/ MatFDColoringCreate_MPIAIJ, 1758 0, 1759 MatSetUnfactored_MPIAIJ, 1760 MatPermute_MPIAIJ, 1761 0, 1762 /*60*/ MatGetSubMatrix_MPIAIJ, 1763 MatDestroy_MPIAIJ, 1764 MatView_MPIAIJ, 1765 0, 1766 0, 1767 /*65*/ 0, 1768 0, 1769 0, 1770 0, 1771 0, 1772 /*70*/ 0, 1773 0, 1774 MatSetColoring_MPIAIJ, 1775 #if defined(PETSC_HAVE_ADIC) 1776 MatSetValuesAdic_MPIAIJ, 1777 #else 1778 0, 1779 #endif 1780 MatSetValuesAdifor_MPIAIJ, 1781 /*75*/ 0, 1782 0, 1783 0, 1784 0, 1785 0, 1786 /*80*/ 0, 1787 0, 1788 0, 1789 0, 1790 /*84*/ MatLoad_MPIAIJ, 1791 0, 1792 0, 1793 0, 1794 0, 1795 0, 1796 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1797 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1798 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1799 MatPtAP_Basic, 1800 MatPtAPSymbolic_MPIAIJ, 1801 /*95*/ MatPtAPNumeric_MPIAIJ, 1802 0, 1803 0, 1804 0, 1805 0, 1806 /*100*/0, 1807 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1808 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1809 MatConjugate_MPIAIJ, 1810 0, 1811 /*105*/MatSetValuesRow_MPIAIJ, 1812 MatRealPart_MPIAIJ, 1813 MatImaginaryPart_MPIAIJ}; 1814 1815 /* ----------------------------------------------------------------------------------------*/ 1816 1817 EXTERN_C_BEGIN 1818 #undef __FUNCT__ 1819 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1820 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1821 { 1822 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1823 PetscErrorCode ierr; 1824 1825 PetscFunctionBegin; 1826 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1827 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1828 PetscFunctionReturn(0); 1829 } 1830 EXTERN_C_END 1831 1832 EXTERN_C_BEGIN 1833 #undef __FUNCT__ 1834 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1835 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1836 { 1837 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1838 PetscErrorCode ierr; 1839 1840 PetscFunctionBegin; 1841 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1842 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 EXTERN_C_END 1846 1847 #include "petscpc.h" 1848 EXTERN_C_BEGIN 1849 #undef __FUNCT__ 1850 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1851 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1852 { 1853 Mat_MPIAIJ *b; 1854 PetscErrorCode ierr; 1855 PetscInt i; 1856 1857 PetscFunctionBegin; 1858 B->preallocated = PETSC_TRUE; 1859 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1860 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1861 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1862 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1863 1864 B->rmap.bs = B->cmap.bs = 1; 1865 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1866 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1867 if (d_nnz) { 1868 for (i=0; i<B->rmap.n; i++) { 1869 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1870 } 1871 } 1872 if (o_nnz) { 1873 for (i=0; i<B->rmap.n; i++) { 1874 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1875 } 1876 } 1877 b = (Mat_MPIAIJ*)B->data; 1878 1879 /* Explicitly create 2 MATSEQAIJ matrices. */ 1880 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1881 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1882 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1883 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1884 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1885 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1886 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1887 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1888 1889 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1890 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1891 1892 PetscFunctionReturn(0); 1893 } 1894 EXTERN_C_END 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1898 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1899 { 1900 Mat mat; 1901 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1902 PetscErrorCode ierr; 1903 1904 PetscFunctionBegin; 1905 *newmat = 0; 1906 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1907 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 1908 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1909 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1910 a = (Mat_MPIAIJ*)mat->data; 1911 1912 mat->factor = matin->factor; 1913 mat->rmap.bs = matin->rmap.bs; 1914 mat->assembled = PETSC_TRUE; 1915 mat->insertmode = NOT_SET_VALUES; 1916 mat->preallocated = PETSC_TRUE; 1917 1918 a->size = oldmat->size; 1919 a->rank = oldmat->rank; 1920 a->donotstash = oldmat->donotstash; 1921 a->roworiented = oldmat->roworiented; 1922 a->rowindices = 0; 1923 a->rowvalues = 0; 1924 a->getrowactive = PETSC_FALSE; 1925 1926 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 1927 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 1928 1929 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1930 if (oldmat->colmap) { 1931 #if defined (PETSC_USE_CTABLE) 1932 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1933 #else 1934 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1935 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 1936 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 1937 #endif 1938 } else a->colmap = 0; 1939 if (oldmat->garray) { 1940 PetscInt len; 1941 len = oldmat->B->cmap.n; 1942 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1943 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1944 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1945 } else a->garray = 0; 1946 1947 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1948 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1949 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1950 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1951 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1952 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1953 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1954 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1955 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1956 *newmat = mat; 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #include "petscsys.h" 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatLoad_MPIAIJ" 1964 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1965 { 1966 Mat A; 1967 PetscScalar *vals,*svals; 1968 MPI_Comm comm = ((PetscObject)viewer)->comm; 1969 MPI_Status status; 1970 PetscErrorCode ierr; 1971 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1972 PetscInt i,nz,j,rstart,rend,mmax; 1973 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1974 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1975 PetscInt cend,cstart,n,*rowners; 1976 int fd; 1977 1978 PetscFunctionBegin; 1979 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1980 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1981 if (!rank) { 1982 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1983 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1984 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1985 } 1986 1987 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1988 M = header[1]; N = header[2]; 1989 /* determine ownership of all rows */ 1990 m = M/size + ((M % size) > rank); 1991 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1992 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1993 1994 /* First process needs enough room for process with most rows */ 1995 if (!rank) { 1996 mmax = rowners[1]; 1997 for (i=2; i<size; i++) { 1998 mmax = PetscMax(mmax,rowners[i]); 1999 } 2000 } else mmax = m; 2001 2002 rowners[0] = 0; 2003 for (i=2; i<=size; i++) { 2004 mmax = PetscMax(mmax,rowners[i]); 2005 rowners[i] += rowners[i-1]; 2006 } 2007 rstart = rowners[rank]; 2008 rend = rowners[rank+1]; 2009 2010 /* distribute row lengths to all processors */ 2011 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2012 if (!rank) { 2013 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2014 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2015 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2016 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2017 for (j=0; j<m; j++) { 2018 procsnz[0] += ourlens[j]; 2019 } 2020 for (i=1; i<size; i++) { 2021 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2022 /* calculate the number of nonzeros on each processor */ 2023 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2024 procsnz[i] += rowlengths[j]; 2025 } 2026 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2027 } 2028 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2029 } else { 2030 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2031 } 2032 2033 if (!rank) { 2034 /* determine max buffer needed and allocate it */ 2035 maxnz = 0; 2036 for (i=0; i<size; i++) { 2037 maxnz = PetscMax(maxnz,procsnz[i]); 2038 } 2039 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2040 2041 /* read in my part of the matrix column indices */ 2042 nz = procsnz[0]; 2043 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2044 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2045 2046 /* read in every one elses and ship off */ 2047 for (i=1; i<size; i++) { 2048 nz = procsnz[i]; 2049 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2050 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2051 } 2052 ierr = PetscFree(cols);CHKERRQ(ierr); 2053 } else { 2054 /* determine buffer space needed for message */ 2055 nz = 0; 2056 for (i=0; i<m; i++) { 2057 nz += ourlens[i]; 2058 } 2059 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2060 2061 /* receive message of column indices*/ 2062 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2063 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2064 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2065 } 2066 2067 /* determine column ownership if matrix is not square */ 2068 if (N != M) { 2069 n = N/size + ((N % size) > rank); 2070 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2071 cstart = cend - n; 2072 } else { 2073 cstart = rstart; 2074 cend = rend; 2075 n = cend - cstart; 2076 } 2077 2078 /* loop over local rows, determining number of off diagonal entries */ 2079 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2080 jj = 0; 2081 for (i=0; i<m; i++) { 2082 for (j=0; j<ourlens[i]; j++) { 2083 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2084 jj++; 2085 } 2086 } 2087 2088 /* create our matrix */ 2089 for (i=0; i<m; i++) { 2090 ourlens[i] -= offlens[i]; 2091 } 2092 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2093 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2094 ierr = MatSetType(A,type);CHKERRQ(ierr); 2095 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2096 2097 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2098 for (i=0; i<m; i++) { 2099 ourlens[i] += offlens[i]; 2100 } 2101 2102 if (!rank) { 2103 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2104 2105 /* read in my part of the matrix numerical values */ 2106 nz = procsnz[0]; 2107 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2108 2109 /* insert into matrix */ 2110 jj = rstart; 2111 smycols = mycols; 2112 svals = vals; 2113 for (i=0; i<m; i++) { 2114 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2115 smycols += ourlens[i]; 2116 svals += ourlens[i]; 2117 jj++; 2118 } 2119 2120 /* read in other processors and ship out */ 2121 for (i=1; i<size; i++) { 2122 nz = procsnz[i]; 2123 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2124 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2125 } 2126 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2127 } else { 2128 /* receive numeric values */ 2129 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2130 2131 /* receive message of values*/ 2132 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2133 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2134 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2135 2136 /* insert into matrix */ 2137 jj = rstart; 2138 smycols = mycols; 2139 svals = vals; 2140 for (i=0; i<m; i++) { 2141 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2142 smycols += ourlens[i]; 2143 svals += ourlens[i]; 2144 jj++; 2145 } 2146 } 2147 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2148 ierr = PetscFree(vals);CHKERRQ(ierr); 2149 ierr = PetscFree(mycols);CHKERRQ(ierr); 2150 ierr = PetscFree(rowners);CHKERRQ(ierr); 2151 2152 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2153 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2154 *newmat = A; 2155 PetscFunctionReturn(0); 2156 } 2157 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2160 /* 2161 Not great since it makes two copies of the submatrix, first an SeqAIJ 2162 in local and then by concatenating the local matrices the end result. 2163 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2164 */ 2165 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2166 { 2167 PetscErrorCode ierr; 2168 PetscMPIInt rank,size; 2169 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2170 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2171 Mat *local,M,Mreuse; 2172 PetscScalar *vwork,*aa; 2173 MPI_Comm comm = mat->comm; 2174 Mat_SeqAIJ *aij; 2175 2176 2177 PetscFunctionBegin; 2178 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2180 2181 if (call == MAT_REUSE_MATRIX) { 2182 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2183 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2184 local = &Mreuse; 2185 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2186 } else { 2187 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2188 Mreuse = *local; 2189 ierr = PetscFree(local);CHKERRQ(ierr); 2190 } 2191 2192 /* 2193 m - number of local rows 2194 n - number of columns (same on all processors) 2195 rstart - first row in new global matrix generated 2196 */ 2197 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2198 if (call == MAT_INITIAL_MATRIX) { 2199 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2200 ii = aij->i; 2201 jj = aij->j; 2202 2203 /* 2204 Determine the number of non-zeros in the diagonal and off-diagonal 2205 portions of the matrix in order to do correct preallocation 2206 */ 2207 2208 /* first get start and end of "diagonal" columns */ 2209 if (csize == PETSC_DECIDE) { 2210 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2211 if (mglobal == n) { /* square matrix */ 2212 nlocal = m; 2213 } else { 2214 nlocal = n/size + ((n % size) > rank); 2215 } 2216 } else { 2217 nlocal = csize; 2218 } 2219 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2220 rstart = rend - nlocal; 2221 if (rank == size - 1 && rend != n) { 2222 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2223 } 2224 2225 /* next, compute all the lengths */ 2226 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2227 olens = dlens + m; 2228 for (i=0; i<m; i++) { 2229 jend = ii[i+1] - ii[i]; 2230 olen = 0; 2231 dlen = 0; 2232 for (j=0; j<jend; j++) { 2233 if (*jj < rstart || *jj >= rend) olen++; 2234 else dlen++; 2235 jj++; 2236 } 2237 olens[i] = olen; 2238 dlens[i] = dlen; 2239 } 2240 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2241 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2242 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2243 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2244 ierr = PetscFree(dlens);CHKERRQ(ierr); 2245 } else { 2246 PetscInt ml,nl; 2247 2248 M = *newmat; 2249 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2250 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2251 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2252 /* 2253 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2254 rather than the slower MatSetValues(). 2255 */ 2256 M->was_assembled = PETSC_TRUE; 2257 M->assembled = PETSC_FALSE; 2258 } 2259 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2260 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2261 ii = aij->i; 2262 jj = aij->j; 2263 aa = aij->a; 2264 for (i=0; i<m; i++) { 2265 row = rstart + i; 2266 nz = ii[i+1] - ii[i]; 2267 cwork = jj; jj += nz; 2268 vwork = aa; aa += nz; 2269 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2270 } 2271 2272 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2273 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2274 *newmat = M; 2275 2276 /* save submatrix used in processor for next request */ 2277 if (call == MAT_INITIAL_MATRIX) { 2278 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2279 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2280 } 2281 2282 PetscFunctionReturn(0); 2283 } 2284 2285 EXTERN_C_BEGIN 2286 #undef __FUNCT__ 2287 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2288 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2289 { 2290 PetscInt m,cstart, cend,j,nnz,i,d; 2291 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2292 const PetscInt *JJ; 2293 PetscScalar *values; 2294 PetscErrorCode ierr; 2295 2296 PetscFunctionBegin; 2297 if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]); 2298 2299 B->rmap.bs = B->cmap.bs = 1; 2300 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2301 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2302 m = B->rmap.n; 2303 cstart = B->cmap.rstart; 2304 cend = B->cmap.rend; 2305 rstart = B->rmap.rstart; 2306 2307 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2308 o_nnz = d_nnz + m; 2309 2310 for (i=0; i<m; i++) { 2311 nnz = I[i+1]- I[i]; 2312 JJ = J + I[i]; 2313 nnz_max = PetscMax(nnz_max,nnz); 2314 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2315 for (j=0; j<nnz; j++) { 2316 if (*JJ >= cstart) break; 2317 JJ++; 2318 } 2319 d = 0; 2320 for (; j<nnz; j++) { 2321 if (*JJ++ >= cend) break; 2322 d++; 2323 } 2324 d_nnz[i] = d; 2325 o_nnz[i] = nnz - d; 2326 } 2327 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2328 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2329 2330 if (v) values = (PetscScalar*)v; 2331 else { 2332 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2333 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2334 } 2335 2336 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2337 for (i=0; i<m; i++) { 2338 ii = i + rstart; 2339 nnz = I[i+1]- I[i]; 2340 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2341 } 2342 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2343 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2344 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2345 2346 if (!v) { 2347 ierr = PetscFree(values);CHKERRQ(ierr); 2348 } 2349 PetscFunctionReturn(0); 2350 } 2351 EXTERN_C_END 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2355 /*@ 2356 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2357 (the default parallel PETSc format). 2358 2359 Collective on MPI_Comm 2360 2361 Input Parameters: 2362 + B - the matrix 2363 . i - the indices into j for the start of each local row (starts with zero) 2364 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2365 - v - optional values in the matrix 2366 2367 Level: developer 2368 2369 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2370 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2371 2372 .keywords: matrix, aij, compressed row, sparse, parallel 2373 2374 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2375 MatCreateSeqAIJWithArrays() 2376 @*/ 2377 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2378 { 2379 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2380 2381 PetscFunctionBegin; 2382 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2383 if (f) { 2384 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2385 } 2386 PetscFunctionReturn(0); 2387 } 2388 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2391 /*@C 2392 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2393 (the default parallel PETSc format). For good matrix assembly performance 2394 the user should preallocate the matrix storage by setting the parameters 2395 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2396 performance can be increased by more than a factor of 50. 2397 2398 Collective on MPI_Comm 2399 2400 Input Parameters: 2401 + A - the matrix 2402 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2403 (same value is used for all local rows) 2404 . d_nnz - array containing the number of nonzeros in the various rows of the 2405 DIAGONAL portion of the local submatrix (possibly different for each row) 2406 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2407 The size of this array is equal to the number of local rows, i.e 'm'. 2408 You must leave room for the diagonal entry even if it is zero. 2409 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2410 submatrix (same value is used for all local rows). 2411 - o_nnz - array containing the number of nonzeros in the various rows of the 2412 OFF-DIAGONAL portion of the local submatrix (possibly different for 2413 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2414 structure. The size of this array is equal to the number 2415 of local rows, i.e 'm'. 2416 2417 If the *_nnz parameter is given then the *_nz parameter is ignored 2418 2419 The AIJ format (also called the Yale sparse matrix format or 2420 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2421 storage. The stored row and column indices begin with zero. See the users manual for details. 2422 2423 The parallel matrix is partitioned such that the first m0 rows belong to 2424 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2425 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2426 2427 The DIAGONAL portion of the local submatrix of a processor can be defined 2428 as the submatrix which is obtained by extraction the part corresponding 2429 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2430 first row that belongs to the processor, and r2 is the last row belonging 2431 to the this processor. This is a square mxm matrix. The remaining portion 2432 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2433 2434 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2435 2436 Example usage: 2437 2438 Consider the following 8x8 matrix with 34 non-zero values, that is 2439 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2440 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2441 as follows: 2442 2443 .vb 2444 1 2 0 | 0 3 0 | 0 4 2445 Proc0 0 5 6 | 7 0 0 | 8 0 2446 9 0 10 | 11 0 0 | 12 0 2447 ------------------------------------- 2448 13 0 14 | 15 16 17 | 0 0 2449 Proc1 0 18 0 | 19 20 21 | 0 0 2450 0 0 0 | 22 23 0 | 24 0 2451 ------------------------------------- 2452 Proc2 25 26 27 | 0 0 28 | 29 0 2453 30 0 0 | 31 32 33 | 0 34 2454 .ve 2455 2456 This can be represented as a collection of submatrices as: 2457 2458 .vb 2459 A B C 2460 D E F 2461 G H I 2462 .ve 2463 2464 Where the submatrices A,B,C are owned by proc0, D,E,F are 2465 owned by proc1, G,H,I are owned by proc2. 2466 2467 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2468 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2469 The 'M','N' parameters are 8,8, and have the same values on all procs. 2470 2471 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2472 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2473 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2474 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2475 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2476 matrix, ans [DF] as another SeqAIJ matrix. 2477 2478 When d_nz, o_nz parameters are specified, d_nz storage elements are 2479 allocated for every row of the local diagonal submatrix, and o_nz 2480 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2481 One way to choose d_nz and o_nz is to use the max nonzerors per local 2482 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2483 In this case, the values of d_nz,o_nz are: 2484 .vb 2485 proc0 : dnz = 2, o_nz = 2 2486 proc1 : dnz = 3, o_nz = 2 2487 proc2 : dnz = 1, o_nz = 4 2488 .ve 2489 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2490 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2491 for proc3. i.e we are using 12+15+10=37 storage locations to store 2492 34 values. 2493 2494 When d_nnz, o_nnz parameters are specified, the storage is specified 2495 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2496 In the above case the values for d_nnz,o_nnz are: 2497 .vb 2498 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2499 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2500 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2501 .ve 2502 Here the space allocated is sum of all the above values i.e 34, and 2503 hence pre-allocation is perfect. 2504 2505 Level: intermediate 2506 2507 .keywords: matrix, aij, compressed row, sparse, parallel 2508 2509 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2510 MPIAIJ 2511 @*/ 2512 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2513 { 2514 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2515 2516 PetscFunctionBegin; 2517 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2518 if (f) { 2519 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2520 } 2521 PetscFunctionReturn(0); 2522 } 2523 2524 #undef __FUNCT__ 2525 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 2526 /*@C 2527 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 2528 CSR format the local rows. 2529 2530 Collective on MPI_Comm 2531 2532 Input Parameters: 2533 + comm - MPI communicator 2534 . m - number of local rows (Cannot be PETSC_DECIDE) 2535 . n - This value should be the same as the local size used in creating the 2536 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2537 calculated if N is given) For square matrices n is almost always m. 2538 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2539 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2540 . i - row indices 2541 . j - column indices 2542 - a - matrix values 2543 2544 Output Parameter: 2545 . mat - the matrix 2546 Level: intermediate 2547 2548 Notes: 2549 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2550 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2551 called this routine. 2552 2553 The i and j indices are 0 based 2554 2555 .keywords: matrix, aij, compressed row, sparse, parallel 2556 2557 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2558 MPIAIJ, MatCreateMPIAIJ() 2559 @*/ 2560 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2561 { 2562 PetscErrorCode ierr; 2563 2564 PetscFunctionBegin; 2565 if (i[0]) { 2566 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2567 } 2568 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2569 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2570 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 2571 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 #undef __FUNCT__ 2576 #define __FUNCT__ "MatCreateMPIAIJ" 2577 /*@C 2578 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2579 (the default parallel PETSc format). For good matrix assembly performance 2580 the user should preallocate the matrix storage by setting the parameters 2581 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2582 performance can be increased by more than a factor of 50. 2583 2584 Collective on MPI_Comm 2585 2586 Input Parameters: 2587 + comm - MPI communicator 2588 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2589 This value should be the same as the local size used in creating the 2590 y vector for the matrix-vector product y = Ax. 2591 . n - This value should be the same as the local size used in creating the 2592 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2593 calculated if N is given) For square matrices n is almost always m. 2594 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2595 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2596 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2597 (same value is used for all local rows) 2598 . d_nnz - array containing the number of nonzeros in the various rows of the 2599 DIAGONAL portion of the local submatrix (possibly different for each row) 2600 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2601 The size of this array is equal to the number of local rows, i.e 'm'. 2602 You must leave room for the diagonal entry even if it is zero. 2603 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2604 submatrix (same value is used for all local rows). 2605 - o_nnz - array containing the number of nonzeros in the various rows of the 2606 OFF-DIAGONAL portion of the local submatrix (possibly different for 2607 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2608 structure. The size of this array is equal to the number 2609 of local rows, i.e 'm'. 2610 2611 Output Parameter: 2612 . A - the matrix 2613 2614 Notes: 2615 If the *_nnz parameter is given then the *_nz parameter is ignored 2616 2617 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2618 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2619 storage requirements for this matrix. 2620 2621 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2622 processor than it must be used on all processors that share the object for 2623 that argument. 2624 2625 The user MUST specify either the local or global matrix dimensions 2626 (possibly both). 2627 2628 The parallel matrix is partitioned such that the first m0 rows belong to 2629 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2630 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2631 2632 The DIAGONAL portion of the local submatrix of a processor can be defined 2633 as the submatrix which is obtained by extraction the part corresponding 2634 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2635 first row that belongs to the processor, and r2 is the last row belonging 2636 to the this processor. This is a square mxm matrix. The remaining portion 2637 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2638 2639 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2640 2641 When calling this routine with a single process communicator, a matrix of 2642 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2643 type of communicator, use the construction mechanism: 2644 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2645 2646 By default, this format uses inodes (identical nodes) when possible. 2647 We search for consecutive rows with the same nonzero structure, thereby 2648 reusing matrix information to achieve increased efficiency. 2649 2650 Options Database Keys: 2651 + -mat_no_inode - Do not use inodes 2652 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2653 - -mat_aij_oneindex - Internally use indexing starting at 1 2654 rather than 0. Note that when calling MatSetValues(), 2655 the user still MUST index entries starting at 0! 2656 2657 2658 Example usage: 2659 2660 Consider the following 8x8 matrix with 34 non-zero values, that is 2661 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2662 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2663 as follows: 2664 2665 .vb 2666 1 2 0 | 0 3 0 | 0 4 2667 Proc0 0 5 6 | 7 0 0 | 8 0 2668 9 0 10 | 11 0 0 | 12 0 2669 ------------------------------------- 2670 13 0 14 | 15 16 17 | 0 0 2671 Proc1 0 18 0 | 19 20 21 | 0 0 2672 0 0 0 | 22 23 0 | 24 0 2673 ------------------------------------- 2674 Proc2 25 26 27 | 0 0 28 | 29 0 2675 30 0 0 | 31 32 33 | 0 34 2676 .ve 2677 2678 This can be represented as a collection of submatrices as: 2679 2680 .vb 2681 A B C 2682 D E F 2683 G H I 2684 .ve 2685 2686 Where the submatrices A,B,C are owned by proc0, D,E,F are 2687 owned by proc1, G,H,I are owned by proc2. 2688 2689 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2690 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2691 The 'M','N' parameters are 8,8, and have the same values on all procs. 2692 2693 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2694 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2695 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2696 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2697 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2698 matrix, ans [DF] as another SeqAIJ matrix. 2699 2700 When d_nz, o_nz parameters are specified, d_nz storage elements are 2701 allocated for every row of the local diagonal submatrix, and o_nz 2702 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2703 One way to choose d_nz and o_nz is to use the max nonzerors per local 2704 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2705 In this case, the values of d_nz,o_nz are: 2706 .vb 2707 proc0 : dnz = 2, o_nz = 2 2708 proc1 : dnz = 3, o_nz = 2 2709 proc2 : dnz = 1, o_nz = 4 2710 .ve 2711 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2712 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2713 for proc3. i.e we are using 12+15+10=37 storage locations to store 2714 34 values. 2715 2716 When d_nnz, o_nnz parameters are specified, the storage is specified 2717 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2718 In the above case the values for d_nnz,o_nnz are: 2719 .vb 2720 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2721 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2722 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2723 .ve 2724 Here the space allocated is sum of all the above values i.e 34, and 2725 hence pre-allocation is perfect. 2726 2727 Level: intermediate 2728 2729 .keywords: matrix, aij, compressed row, sparse, parallel 2730 2731 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2732 MPIAIJ, MatCreateMPIAIJWithArrays() 2733 @*/ 2734 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2735 { 2736 PetscErrorCode ierr; 2737 PetscMPIInt size; 2738 2739 PetscFunctionBegin; 2740 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2741 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2742 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2743 if (size > 1) { 2744 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2745 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2746 } else { 2747 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2748 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2749 } 2750 PetscFunctionReturn(0); 2751 } 2752 2753 #undef __FUNCT__ 2754 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2755 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2756 { 2757 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2758 2759 PetscFunctionBegin; 2760 *Ad = a->A; 2761 *Ao = a->B; 2762 *colmap = a->garray; 2763 PetscFunctionReturn(0); 2764 } 2765 2766 #undef __FUNCT__ 2767 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2768 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2769 { 2770 PetscErrorCode ierr; 2771 PetscInt i; 2772 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2773 2774 PetscFunctionBegin; 2775 if (coloring->ctype == IS_COLORING_LOCAL) { 2776 ISColoringValue *allcolors,*colors; 2777 ISColoring ocoloring; 2778 2779 /* set coloring for diagonal portion */ 2780 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2781 2782 /* set coloring for off-diagonal portion */ 2783 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2784 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2785 for (i=0; i<a->B->cmap.n; i++) { 2786 colors[i] = allcolors[a->garray[i]]; 2787 } 2788 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2789 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2790 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2791 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2792 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2793 ISColoringValue *colors; 2794 PetscInt *larray; 2795 ISColoring ocoloring; 2796 2797 /* set coloring for diagonal portion */ 2798 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2799 for (i=0; i<a->A->cmap.n; i++) { 2800 larray[i] = i + A->cmap.rstart; 2801 } 2802 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2803 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2804 for (i=0; i<a->A->cmap.n; i++) { 2805 colors[i] = coloring->colors[larray[i]]; 2806 } 2807 ierr = PetscFree(larray);CHKERRQ(ierr); 2808 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2809 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2810 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2811 2812 /* set coloring for off-diagonal portion */ 2813 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2814 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2815 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2816 for (i=0; i<a->B->cmap.n; i++) { 2817 colors[i] = coloring->colors[larray[i]]; 2818 } 2819 ierr = PetscFree(larray);CHKERRQ(ierr); 2820 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2821 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2822 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2823 } else { 2824 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2825 } 2826 2827 PetscFunctionReturn(0); 2828 } 2829 2830 #if defined(PETSC_HAVE_ADIC) 2831 #undef __FUNCT__ 2832 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2833 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2834 { 2835 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2836 PetscErrorCode ierr; 2837 2838 PetscFunctionBegin; 2839 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2840 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2841 PetscFunctionReturn(0); 2842 } 2843 #endif 2844 2845 #undef __FUNCT__ 2846 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2847 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2848 { 2849 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2850 PetscErrorCode ierr; 2851 2852 PetscFunctionBegin; 2853 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2854 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2855 PetscFunctionReturn(0); 2856 } 2857 2858 #undef __FUNCT__ 2859 #define __FUNCT__ "MatMerge" 2860 /*@C 2861 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2862 matrices from each processor 2863 2864 Collective on MPI_Comm 2865 2866 Input Parameters: 2867 + comm - the communicators the parallel matrix will live on 2868 . inmat - the input sequential matrices 2869 . n - number of local columns (or PETSC_DECIDE) 2870 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2871 2872 Output Parameter: 2873 . outmat - the parallel matrix generated 2874 2875 Level: advanced 2876 2877 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2878 2879 @*/ 2880 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2881 { 2882 PetscErrorCode ierr; 2883 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2884 PetscInt *indx; 2885 PetscScalar *values; 2886 2887 PetscFunctionBegin; 2888 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2889 if (scall == MAT_INITIAL_MATRIX){ 2890 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2891 if (n == PETSC_DECIDE){ 2892 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 2893 } 2894 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2895 rstart -= m; 2896 2897 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2898 for (i=0;i<m;i++) { 2899 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2900 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2901 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2902 } 2903 /* This routine will ONLY return MPIAIJ type matrix */ 2904 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2905 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2906 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2907 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2908 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2909 2910 } else if (scall == MAT_REUSE_MATRIX){ 2911 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2912 } else { 2913 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2914 } 2915 2916 for (i=0;i<m;i++) { 2917 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2918 I = i + rstart; 2919 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2920 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2921 } 2922 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2923 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2924 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2925 2926 PetscFunctionReturn(0); 2927 } 2928 2929 #undef __FUNCT__ 2930 #define __FUNCT__ "MatFileSplit" 2931 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2932 { 2933 PetscErrorCode ierr; 2934 PetscMPIInt rank; 2935 PetscInt m,N,i,rstart,nnz; 2936 size_t len; 2937 const PetscInt *indx; 2938 PetscViewer out; 2939 char *name; 2940 Mat B; 2941 const PetscScalar *values; 2942 2943 PetscFunctionBegin; 2944 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2945 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2946 /* Should this be the type of the diagonal block of A? */ 2947 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2948 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2949 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2950 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2951 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2952 for (i=0;i<m;i++) { 2953 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2954 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2955 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2956 } 2957 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2958 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2959 2960 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2961 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2962 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2963 sprintf(name,"%s.%d",outfile,rank); 2964 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 2965 ierr = PetscFree(name); 2966 ierr = MatView(B,out);CHKERRQ(ierr); 2967 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2968 ierr = MatDestroy(B);CHKERRQ(ierr); 2969 PetscFunctionReturn(0); 2970 } 2971 2972 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2975 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2976 { 2977 PetscErrorCode ierr; 2978 Mat_Merge_SeqsToMPI *merge; 2979 PetscObjectContainer container; 2980 2981 PetscFunctionBegin; 2982 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2983 if (container) { 2984 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2985 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2986 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2987 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2988 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2989 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2990 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2991 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2992 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 2993 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 2994 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 2995 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 2996 2997 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2998 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2999 } 3000 ierr = PetscFree(merge);CHKERRQ(ierr); 3001 3002 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3003 PetscFunctionReturn(0); 3004 } 3005 3006 #include "src/mat/utils/freespace.h" 3007 #include "petscbt.h" 3008 static PetscEvent logkey_seqstompinum = 0; 3009 #undef __FUNCT__ 3010 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3011 /*@C 3012 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3013 matrices from each processor 3014 3015 Collective on MPI_Comm 3016 3017 Input Parameters: 3018 + comm - the communicators the parallel matrix will live on 3019 . seqmat - the input sequential matrices 3020 . m - number of local rows (or PETSC_DECIDE) 3021 . n - number of local columns (or PETSC_DECIDE) 3022 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3023 3024 Output Parameter: 3025 . mpimat - the parallel matrix generated 3026 3027 Level: advanced 3028 3029 Notes: 3030 The dimensions of the sequential matrix in each processor MUST be the same. 3031 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3032 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3033 @*/ 3034 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3035 { 3036 PetscErrorCode ierr; 3037 MPI_Comm comm=mpimat->comm; 3038 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3039 PetscMPIInt size,rank,taga,*len_s; 3040 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3041 PetscInt proc,m; 3042 PetscInt **buf_ri,**buf_rj; 3043 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3044 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3045 MPI_Request *s_waits,*r_waits; 3046 MPI_Status *status; 3047 MatScalar *aa=a->a,**abuf_r,*ba_i; 3048 Mat_Merge_SeqsToMPI *merge; 3049 PetscObjectContainer container; 3050 3051 PetscFunctionBegin; 3052 if (!logkey_seqstompinum) { 3053 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3054 } 3055 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3056 3057 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3058 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3059 3060 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3061 if (container) { 3062 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3063 } 3064 bi = merge->bi; 3065 bj = merge->bj; 3066 buf_ri = merge->buf_ri; 3067 buf_rj = merge->buf_rj; 3068 3069 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3070 owners = merge->rowmap.range; 3071 len_s = merge->len_s; 3072 3073 /* send and recv matrix values */ 3074 /*-----------------------------*/ 3075 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3076 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3077 3078 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3079 for (proc=0,k=0; proc<size; proc++){ 3080 if (!len_s[proc]) continue; 3081 i = owners[proc]; 3082 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3083 k++; 3084 } 3085 3086 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3087 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3088 ierr = PetscFree(status);CHKERRQ(ierr); 3089 3090 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3091 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3092 3093 /* insert mat values of mpimat */ 3094 /*----------------------------*/ 3095 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3096 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3097 nextrow = buf_ri_k + merge->nrecv; 3098 nextai = nextrow + merge->nrecv; 3099 3100 for (k=0; k<merge->nrecv; k++){ 3101 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3102 nrows = *(buf_ri_k[k]); 3103 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3104 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3105 } 3106 3107 /* set values of ba */ 3108 m = merge->rowmap.n; 3109 for (i=0; i<m; i++) { 3110 arow = owners[rank] + i; 3111 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3112 bnzi = bi[i+1] - bi[i]; 3113 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3114 3115 /* add local non-zero vals of this proc's seqmat into ba */ 3116 anzi = ai[arow+1] - ai[arow]; 3117 aj = a->j + ai[arow]; 3118 aa = a->a + ai[arow]; 3119 nextaj = 0; 3120 for (j=0; nextaj<anzi; j++){ 3121 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3122 ba_i[j] += aa[nextaj++]; 3123 } 3124 } 3125 3126 /* add received vals into ba */ 3127 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3128 /* i-th row */ 3129 if (i == *nextrow[k]) { 3130 anzi = *(nextai[k]+1) - *nextai[k]; 3131 aj = buf_rj[k] + *(nextai[k]); 3132 aa = abuf_r[k] + *(nextai[k]); 3133 nextaj = 0; 3134 for (j=0; nextaj<anzi; j++){ 3135 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3136 ba_i[j] += aa[nextaj++]; 3137 } 3138 } 3139 nextrow[k]++; nextai[k]++; 3140 } 3141 } 3142 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3143 } 3144 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3145 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3146 3147 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3148 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3149 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3150 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3151 PetscFunctionReturn(0); 3152 } 3153 3154 static PetscEvent logkey_seqstompisym = 0; 3155 #undef __FUNCT__ 3156 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3157 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3158 { 3159 PetscErrorCode ierr; 3160 Mat B_mpi; 3161 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3162 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3163 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3164 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3165 PetscInt len,proc,*dnz,*onz; 3166 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3167 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3168 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3169 MPI_Status *status; 3170 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3171 PetscBT lnkbt; 3172 Mat_Merge_SeqsToMPI *merge; 3173 PetscObjectContainer container; 3174 3175 PetscFunctionBegin; 3176 if (!logkey_seqstompisym) { 3177 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3178 } 3179 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3180 3181 /* make sure it is a PETSc comm */ 3182 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3183 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3184 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3185 3186 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3187 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3188 3189 /* determine row ownership */ 3190 /*---------------------------------------------------------*/ 3191 merge->rowmap.n = m; 3192 merge->rowmap.N = M; 3193 merge->rowmap.bs = 1; 3194 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3195 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3196 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3197 3198 m = merge->rowmap.n; 3199 M = merge->rowmap.N; 3200 owners = merge->rowmap.range; 3201 3202 /* determine the number of messages to send, their lengths */ 3203 /*---------------------------------------------------------*/ 3204 len_s = merge->len_s; 3205 3206 len = 0; /* length of buf_si[] */ 3207 merge->nsend = 0; 3208 for (proc=0; proc<size; proc++){ 3209 len_si[proc] = 0; 3210 if (proc == rank){ 3211 len_s[proc] = 0; 3212 } else { 3213 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3214 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3215 } 3216 if (len_s[proc]) { 3217 merge->nsend++; 3218 nrows = 0; 3219 for (i=owners[proc]; i<owners[proc+1]; i++){ 3220 if (ai[i+1] > ai[i]) nrows++; 3221 } 3222 len_si[proc] = 2*(nrows+1); 3223 len += len_si[proc]; 3224 } 3225 } 3226 3227 /* determine the number and length of messages to receive for ij-structure */ 3228 /*-------------------------------------------------------------------------*/ 3229 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3230 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3231 3232 /* post the Irecv of j-structure */ 3233 /*-------------------------------*/ 3234 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3235 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3236 3237 /* post the Isend of j-structure */ 3238 /*--------------------------------*/ 3239 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3240 sj_waits = si_waits + merge->nsend; 3241 3242 for (proc=0, k=0; proc<size; proc++){ 3243 if (!len_s[proc]) continue; 3244 i = owners[proc]; 3245 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3246 k++; 3247 } 3248 3249 /* receives and sends of j-structure are complete */ 3250 /*------------------------------------------------*/ 3251 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3252 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3253 3254 /* send and recv i-structure */ 3255 /*---------------------------*/ 3256 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3257 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3258 3259 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3260 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3261 for (proc=0,k=0; proc<size; proc++){ 3262 if (!len_s[proc]) continue; 3263 /* form outgoing message for i-structure: 3264 buf_si[0]: nrows to be sent 3265 [1:nrows]: row index (global) 3266 [nrows+1:2*nrows+1]: i-structure index 3267 */ 3268 /*-------------------------------------------*/ 3269 nrows = len_si[proc]/2 - 1; 3270 buf_si_i = buf_si + nrows+1; 3271 buf_si[0] = nrows; 3272 buf_si_i[0] = 0; 3273 nrows = 0; 3274 for (i=owners[proc]; i<owners[proc+1]; i++){ 3275 anzi = ai[i+1] - ai[i]; 3276 if (anzi) { 3277 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3278 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3279 nrows++; 3280 } 3281 } 3282 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3283 k++; 3284 buf_si += len_si[proc]; 3285 } 3286 3287 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3288 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3289 3290 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3291 for (i=0; i<merge->nrecv; i++){ 3292 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3293 } 3294 3295 ierr = PetscFree(len_si);CHKERRQ(ierr); 3296 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3297 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3298 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3299 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3300 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3301 ierr = PetscFree(status);CHKERRQ(ierr); 3302 3303 /* compute a local seq matrix in each processor */ 3304 /*----------------------------------------------*/ 3305 /* allocate bi array and free space for accumulating nonzero column info */ 3306 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3307 bi[0] = 0; 3308 3309 /* create and initialize a linked list */ 3310 nlnk = N+1; 3311 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3312 3313 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3314 len = 0; 3315 len = ai[owners[rank+1]] - ai[owners[rank]]; 3316 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3317 current_space = free_space; 3318 3319 /* determine symbolic info for each local row */ 3320 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3321 nextrow = buf_ri_k + merge->nrecv; 3322 nextai = nextrow + merge->nrecv; 3323 for (k=0; k<merge->nrecv; k++){ 3324 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3325 nrows = *buf_ri_k[k]; 3326 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3327 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3328 } 3329 3330 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3331 len = 0; 3332 for (i=0;i<m;i++) { 3333 bnzi = 0; 3334 /* add local non-zero cols of this proc's seqmat into lnk */ 3335 arow = owners[rank] + i; 3336 anzi = ai[arow+1] - ai[arow]; 3337 aj = a->j + ai[arow]; 3338 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3339 bnzi += nlnk; 3340 /* add received col data into lnk */ 3341 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3342 if (i == *nextrow[k]) { /* i-th row */ 3343 anzi = *(nextai[k]+1) - *nextai[k]; 3344 aj = buf_rj[k] + *nextai[k]; 3345 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3346 bnzi += nlnk; 3347 nextrow[k]++; nextai[k]++; 3348 } 3349 } 3350 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3351 3352 /* if free space is not available, make more free space */ 3353 if (current_space->local_remaining<bnzi) { 3354 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3355 nspacedouble++; 3356 } 3357 /* copy data into free space, then initialize lnk */ 3358 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3359 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3360 3361 current_space->array += bnzi; 3362 current_space->local_used += bnzi; 3363 current_space->local_remaining -= bnzi; 3364 3365 bi[i+1] = bi[i] + bnzi; 3366 } 3367 3368 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3369 3370 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3371 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3372 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3373 3374 /* create symbolic parallel matrix B_mpi */ 3375 /*---------------------------------------*/ 3376 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3377 if (n==PETSC_DECIDE) { 3378 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3379 } else { 3380 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3381 } 3382 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3383 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3384 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3385 3386 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3387 B_mpi->assembled = PETSC_FALSE; 3388 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3389 merge->bi = bi; 3390 merge->bj = bj; 3391 merge->buf_ri = buf_ri; 3392 merge->buf_rj = buf_rj; 3393 merge->coi = PETSC_NULL; 3394 merge->coj = PETSC_NULL; 3395 merge->owners_co = PETSC_NULL; 3396 3397 /* attach the supporting struct to B_mpi for reuse */ 3398 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3399 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3400 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3401 *mpimat = B_mpi; 3402 3403 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3404 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3405 PetscFunctionReturn(0); 3406 } 3407 3408 static PetscEvent logkey_seqstompi = 0; 3409 #undef __FUNCT__ 3410 #define __FUNCT__ "MatMerge_SeqsToMPI" 3411 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3412 { 3413 PetscErrorCode ierr; 3414 3415 PetscFunctionBegin; 3416 if (!logkey_seqstompi) { 3417 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3418 } 3419 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3420 if (scall == MAT_INITIAL_MATRIX){ 3421 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3422 } 3423 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3424 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3425 PetscFunctionReturn(0); 3426 } 3427 static PetscEvent logkey_getlocalmat = 0; 3428 #undef __FUNCT__ 3429 #define __FUNCT__ "MatGetLocalMat" 3430 /*@C 3431 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3432 3433 Not Collective 3434 3435 Input Parameters: 3436 + A - the matrix 3437 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3438 3439 Output Parameter: 3440 . A_loc - the local sequential matrix generated 3441 3442 Level: developer 3443 3444 @*/ 3445 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3446 { 3447 PetscErrorCode ierr; 3448 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3449 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3450 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3451 PetscScalar *aa=a->a,*ba=b->a,*ca; 3452 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3453 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3454 3455 PetscFunctionBegin; 3456 if (!logkey_getlocalmat) { 3457 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3458 } 3459 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3460 if (scall == MAT_INITIAL_MATRIX){ 3461 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3462 ci[0] = 0; 3463 for (i=0; i<am; i++){ 3464 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3465 } 3466 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3467 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3468 k = 0; 3469 for (i=0; i<am; i++) { 3470 ncols_o = bi[i+1] - bi[i]; 3471 ncols_d = ai[i+1] - ai[i]; 3472 /* off-diagonal portion of A */ 3473 for (jo=0; jo<ncols_o; jo++) { 3474 col = cmap[*bj]; 3475 if (col >= cstart) break; 3476 cj[k] = col; bj++; 3477 ca[k++] = *ba++; 3478 } 3479 /* diagonal portion of A */ 3480 for (j=0; j<ncols_d; j++) { 3481 cj[k] = cstart + *aj++; 3482 ca[k++] = *aa++; 3483 } 3484 /* off-diagonal portion of A */ 3485 for (j=jo; j<ncols_o; j++) { 3486 cj[k] = cmap[*bj++]; 3487 ca[k++] = *ba++; 3488 } 3489 } 3490 /* put together the new matrix */ 3491 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3492 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3493 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3494 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3495 mat->freedata = PETSC_TRUE; 3496 mat->nonew = 0; 3497 } else if (scall == MAT_REUSE_MATRIX){ 3498 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3499 ci = mat->i; cj = mat->j; ca = mat->a; 3500 for (i=0; i<am; i++) { 3501 /* off-diagonal portion of A */ 3502 ncols_o = bi[i+1] - bi[i]; 3503 for (jo=0; jo<ncols_o; jo++) { 3504 col = cmap[*bj]; 3505 if (col >= cstart) break; 3506 *ca++ = *ba++; bj++; 3507 } 3508 /* diagonal portion of A */ 3509 ncols_d = ai[i+1] - ai[i]; 3510 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3511 /* off-diagonal portion of A */ 3512 for (j=jo; j<ncols_o; j++) { 3513 *ca++ = *ba++; bj++; 3514 } 3515 } 3516 } else { 3517 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3518 } 3519 3520 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3521 PetscFunctionReturn(0); 3522 } 3523 3524 static PetscEvent logkey_getlocalmatcondensed = 0; 3525 #undef __FUNCT__ 3526 #define __FUNCT__ "MatGetLocalMatCondensed" 3527 /*@C 3528 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3529 3530 Not Collective 3531 3532 Input Parameters: 3533 + A - the matrix 3534 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3535 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3536 3537 Output Parameter: 3538 . A_loc - the local sequential matrix generated 3539 3540 Level: developer 3541 3542 @*/ 3543 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3544 { 3545 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3546 PetscErrorCode ierr; 3547 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3548 IS isrowa,iscola; 3549 Mat *aloc; 3550 3551 PetscFunctionBegin; 3552 if (!logkey_getlocalmatcondensed) { 3553 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3554 } 3555 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3556 if (!row){ 3557 start = A->rmap.rstart; end = A->rmap.rend; 3558 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3559 } else { 3560 isrowa = *row; 3561 } 3562 if (!col){ 3563 start = A->cmap.rstart; 3564 cmap = a->garray; 3565 nzA = a->A->cmap.n; 3566 nzB = a->B->cmap.n; 3567 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3568 ncols = 0; 3569 for (i=0; i<nzB; i++) { 3570 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3571 else break; 3572 } 3573 imark = i; 3574 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3575 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3576 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3577 ierr = PetscFree(idx);CHKERRQ(ierr); 3578 } else { 3579 iscola = *col; 3580 } 3581 if (scall != MAT_INITIAL_MATRIX){ 3582 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3583 aloc[0] = *A_loc; 3584 } 3585 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3586 *A_loc = aloc[0]; 3587 ierr = PetscFree(aloc);CHKERRQ(ierr); 3588 if (!row){ 3589 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3590 } 3591 if (!col){ 3592 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3593 } 3594 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3595 PetscFunctionReturn(0); 3596 } 3597 3598 static PetscEvent logkey_GetBrowsOfAcols = 0; 3599 #undef __FUNCT__ 3600 #define __FUNCT__ "MatGetBrowsOfAcols" 3601 /*@C 3602 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3603 3604 Collective on Mat 3605 3606 Input Parameters: 3607 + A,B - the matrices in mpiaij format 3608 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3609 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3610 3611 Output Parameter: 3612 + rowb, colb - index sets of rows and columns of B to extract 3613 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 3614 - B_seq - the sequential matrix generated 3615 3616 Level: developer 3617 3618 @*/ 3619 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3620 { 3621 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3622 PetscErrorCode ierr; 3623 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3624 IS isrowb,iscolb; 3625 Mat *bseq; 3626 3627 PetscFunctionBegin; 3628 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3629 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3630 } 3631 if (!logkey_GetBrowsOfAcols) { 3632 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3633 } 3634 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3635 3636 if (scall == MAT_INITIAL_MATRIX){ 3637 start = A->cmap.rstart; 3638 cmap = a->garray; 3639 nzA = a->A->cmap.n; 3640 nzB = a->B->cmap.n; 3641 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3642 ncols = 0; 3643 for (i=0; i<nzB; i++) { /* row < local row index */ 3644 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3645 else break; 3646 } 3647 imark = i; 3648 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3649 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3650 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3651 ierr = PetscFree(idx);CHKERRQ(ierr); 3652 *brstart = imark; 3653 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 3654 } else { 3655 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3656 isrowb = *rowb; iscolb = *colb; 3657 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3658 bseq[0] = *B_seq; 3659 } 3660 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3661 *B_seq = bseq[0]; 3662 ierr = PetscFree(bseq);CHKERRQ(ierr); 3663 if (!rowb){ 3664 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3665 } else { 3666 *rowb = isrowb; 3667 } 3668 if (!colb){ 3669 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3670 } else { 3671 *colb = iscolb; 3672 } 3673 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3674 PetscFunctionReturn(0); 3675 } 3676 3677 static PetscEvent logkey_GetBrowsOfAocols = 0; 3678 #undef __FUNCT__ 3679 #define __FUNCT__ "MatGetBrowsOfAoCols" 3680 /*@C 3681 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3682 of the OFF-DIAGONAL portion of local A 3683 3684 Collective on Mat 3685 3686 Input Parameters: 3687 + A,B - the matrices in mpiaij format 3688 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3689 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3690 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3691 3692 Output Parameter: 3693 + B_oth - the sequential matrix generated 3694 3695 Level: developer 3696 3697 @*/ 3698 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3699 { 3700 VecScatter_MPI_General *gen_to,*gen_from; 3701 PetscErrorCode ierr; 3702 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3703 Mat_SeqAIJ *b_oth; 3704 VecScatter ctx=a->Mvctx; 3705 MPI_Comm comm=ctx->comm; 3706 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3707 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 3708 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3709 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3710 MPI_Request *rwaits,*swaits; 3711 MPI_Status *sstatus,rstatus; 3712 PetscInt *cols; 3713 PetscScalar *vals; 3714 PetscMPIInt j; 3715 3716 PetscFunctionBegin; 3717 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3718 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3719 } 3720 if (!logkey_GetBrowsOfAocols) { 3721 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3722 } 3723 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3724 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3725 3726 gen_to = (VecScatter_MPI_General*)ctx->todata; 3727 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3728 rvalues = gen_from->values; /* holds the length of sending row */ 3729 svalues = gen_to->values; /* holds the length of receiving row */ 3730 nrecvs = gen_from->n; 3731 nsends = gen_to->n; 3732 3733 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 3734 srow = gen_to->indices; /* local row index to be sent */ 3735 rstarts = gen_from->starts; 3736 sstarts = gen_to->starts; 3737 rprocs = gen_from->procs; 3738 sprocs = gen_to->procs; 3739 sstatus = gen_to->sstatus; 3740 3741 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3742 if (scall == MAT_INITIAL_MATRIX){ 3743 /* i-array */ 3744 /*---------*/ 3745 /* post receives */ 3746 for (i=0; i<nrecvs; i++){ 3747 rowlen = (PetscInt*)rvalues + rstarts[i]; 3748 nrows = rstarts[i+1]-rstarts[i]; 3749 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3750 } 3751 3752 /* pack the outgoing message */ 3753 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3754 rstartsj = sstartsj + nsends +1; 3755 sstartsj[0] = 0; rstartsj[0] = 0; 3756 len = 0; /* total length of j or a array to be sent */ 3757 k = 0; 3758 for (i=0; i<nsends; i++){ 3759 rowlen = (PetscInt*)svalues + sstarts[i]; 3760 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3761 for (j=0; j<nrows; j++) { 3762 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 3763 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3764 len += rowlen[j]; 3765 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3766 k++; 3767 } 3768 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3769 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3770 } 3771 /* recvs and sends of i-array are completed */ 3772 i = nrecvs; 3773 while (i--) { 3774 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3775 } 3776 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3777 /* allocate buffers for sending j and a arrays */ 3778 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3779 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3780 3781 /* create i-array of B_oth */ 3782 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3783 b_othi[0] = 0; 3784 len = 0; /* total length of j or a array to be received */ 3785 k = 0; 3786 for (i=0; i<nrecvs; i++){ 3787 rowlen = (PetscInt*)rvalues + rstarts[i]; 3788 nrows = rstarts[i+1]-rstarts[i]; 3789 for (j=0; j<nrows; j++) { 3790 b_othi[k+1] = b_othi[k] + rowlen[j]; 3791 len += rowlen[j]; k++; 3792 } 3793 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3794 } 3795 3796 /* allocate space for j and a arrrays of B_oth */ 3797 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3798 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3799 3800 /* j-array */ 3801 /*---------*/ 3802 /* post receives of j-array */ 3803 for (i=0; i<nrecvs; i++){ 3804 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3805 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3806 } 3807 k = 0; 3808 for (i=0; i<nsends; i++){ 3809 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3810 bufJ = bufj+sstartsj[i]; 3811 for (j=0; j<nrows; j++) { 3812 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3813 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3814 for (l=0; l<ncols; l++){ 3815 *bufJ++ = cols[l]; 3816 } 3817 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3818 } 3819 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3820 } 3821 3822 /* recvs and sends of j-array are completed */ 3823 i = nrecvs; 3824 while (i--) { 3825 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3826 } 3827 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3828 } else if (scall == MAT_REUSE_MATRIX){ 3829 sstartsj = *startsj; 3830 rstartsj = sstartsj + nsends +1; 3831 bufa = *bufa_ptr; 3832 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3833 b_otha = b_oth->a; 3834 } else { 3835 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3836 } 3837 3838 /* a-array */ 3839 /*---------*/ 3840 /* post receives of a-array */ 3841 for (i=0; i<nrecvs; i++){ 3842 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3843 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3844 } 3845 k = 0; 3846 for (i=0; i<nsends; i++){ 3847 nrows = sstarts[i+1]-sstarts[i]; 3848 bufA = bufa+sstartsj[i]; 3849 for (j=0; j<nrows; j++) { 3850 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3851 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3852 for (l=0; l<ncols; l++){ 3853 *bufA++ = vals[l]; 3854 } 3855 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3856 3857 } 3858 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3859 } 3860 /* recvs and sends of a-array are completed */ 3861 i = nrecvs; 3862 while (i--) { 3863 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3864 } 3865 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3866 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 3867 3868 if (scall == MAT_INITIAL_MATRIX){ 3869 /* put together the new matrix */ 3870 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3871 3872 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3873 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3874 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3875 b_oth->freedata = PETSC_TRUE; 3876 b_oth->nonew = 0; 3877 3878 ierr = PetscFree(bufj);CHKERRQ(ierr); 3879 if (!startsj || !bufa_ptr){ 3880 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3881 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3882 } else { 3883 *startsj = sstartsj; 3884 *bufa_ptr = bufa; 3885 } 3886 } 3887 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3888 3889 PetscFunctionReturn(0); 3890 } 3891 3892 #undef __FUNCT__ 3893 #define __FUNCT__ "MatGetCommunicationStructs" 3894 /*@C 3895 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 3896 3897 Not Collective 3898 3899 Input Parameters: 3900 . A - The matrix in mpiaij format 3901 3902 Output Parameter: 3903 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 3904 . colmap - A map from global column index to local index into lvec 3905 - multScatter - A scatter from the argument of a matrix-vector product to lvec 3906 3907 Level: developer 3908 3909 @*/ 3910 #if defined (PETSC_USE_CTABLE) 3911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 3912 #else 3913 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 3914 #endif 3915 { 3916 Mat_MPIAIJ *a; 3917 3918 PetscFunctionBegin; 3919 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 3920 PetscValidPointer(lvec, 2) 3921 PetscValidPointer(colmap, 3) 3922 PetscValidPointer(multScatter, 4) 3923 a = (Mat_MPIAIJ *) A->data; 3924 if (lvec) *lvec = a->lvec; 3925 if (colmap) *colmap = a->colmap; 3926 if (multScatter) *multScatter = a->Mvctx; 3927 PetscFunctionReturn(0); 3928 } 3929 3930 /*MC 3931 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3932 3933 Options Database Keys: 3934 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3935 3936 Level: beginner 3937 3938 .seealso: MatCreateMPIAIJ 3939 M*/ 3940 3941 EXTERN_C_BEGIN 3942 #undef __FUNCT__ 3943 #define __FUNCT__ "MatCreate_MPIAIJ" 3944 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3945 { 3946 Mat_MPIAIJ *b; 3947 PetscErrorCode ierr; 3948 PetscMPIInt size; 3949 3950 PetscFunctionBegin; 3951 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3952 3953 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3954 B->data = (void*)b; 3955 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3956 B->factor = 0; 3957 B->rmap.bs = 1; 3958 B->assembled = PETSC_FALSE; 3959 B->mapping = 0; 3960 3961 B->insertmode = NOT_SET_VALUES; 3962 b->size = size; 3963 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3964 3965 /* build cache for off array entries formed */ 3966 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3967 b->donotstash = PETSC_FALSE; 3968 b->colmap = 0; 3969 b->garray = 0; 3970 b->roworiented = PETSC_TRUE; 3971 3972 /* stuff used for matrix vector multiply */ 3973 b->lvec = PETSC_NULL; 3974 b->Mvctx = PETSC_NULL; 3975 3976 /* stuff for MatGetRow() */ 3977 b->rowindices = 0; 3978 b->rowvalues = 0; 3979 b->getrowactive = PETSC_FALSE; 3980 3981 3982 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3983 "MatStoreValues_MPIAIJ", 3984 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3985 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3986 "MatRetrieveValues_MPIAIJ", 3987 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3988 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3989 "MatGetDiagonalBlock_MPIAIJ", 3990 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3991 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3992 "MatIsTranspose_MPIAIJ", 3993 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3994 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3995 "MatMPIAIJSetPreallocation_MPIAIJ", 3996 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3997 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3998 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3999 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4000 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4001 "MatDiagonalScaleLocal_MPIAIJ", 4002 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4003 PetscFunctionReturn(0); 4004 } 4005 EXTERN_C_END 4006 4007 /* 4008 Special version for direct calls from Fortran 4009 */ 4010 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4011 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4012 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4013 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4014 #endif 4015 4016 /* Change these macros so can be used in void function */ 4017 #undef CHKERRQ 4018 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4019 #undef SETERRQ2 4020 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4021 #undef SETERRQ 4022 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4023 4024 EXTERN_C_BEGIN 4025 #undef __FUNCT__ 4026 #define __FUNCT__ "matsetvaluesmpiaij_" 4027 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv) 4028 { 4029 Mat mat = *mmat; 4030 PetscInt m = *mm, n = *mn; 4031 InsertMode addv = *maddv; 4032 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4033 PetscScalar value; 4034 PetscErrorCode ierr; 4035 4036 MatPreallocated(mat); 4037 if (mat->insertmode == NOT_SET_VALUES) { 4038 mat->insertmode = addv; 4039 } 4040 #if defined(PETSC_USE_DEBUG) 4041 else if (mat->insertmode != addv) { 4042 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4043 } 4044 #endif 4045 { 4046 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4047 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4048 PetscTruth roworiented = aij->roworiented; 4049 4050 /* Some Variables required in the macro */ 4051 Mat A = aij->A; 4052 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4053 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4054 PetscScalar *aa = a->a; 4055 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4056 Mat B = aij->B; 4057 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4058 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4059 PetscScalar *ba = b->a; 4060 4061 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4062 PetscInt nonew = a->nonew; 4063 PetscScalar *ap1,*ap2; 4064 4065 PetscFunctionBegin; 4066 for (i=0; i<m; i++) { 4067 if (im[i] < 0) continue; 4068 #if defined(PETSC_USE_DEBUG) 4069 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4070 #endif 4071 if (im[i] >= rstart && im[i] < rend) { 4072 row = im[i] - rstart; 4073 lastcol1 = -1; 4074 rp1 = aj + ai[row]; 4075 ap1 = aa + ai[row]; 4076 rmax1 = aimax[row]; 4077 nrow1 = ailen[row]; 4078 low1 = 0; 4079 high1 = nrow1; 4080 lastcol2 = -1; 4081 rp2 = bj + bi[row]; 4082 ap2 = ba + bi[row]; 4083 rmax2 = bimax[row]; 4084 nrow2 = bilen[row]; 4085 low2 = 0; 4086 high2 = nrow2; 4087 4088 for (j=0; j<n; j++) { 4089 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4090 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4091 if (in[j] >= cstart && in[j] < cend){ 4092 col = in[j] - cstart; 4093 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4094 } else if (in[j] < 0) continue; 4095 #if defined(PETSC_USE_DEBUG) 4096 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4097 #endif 4098 else { 4099 if (mat->was_assembled) { 4100 if (!aij->colmap) { 4101 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4102 } 4103 #if defined (PETSC_USE_CTABLE) 4104 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4105 col--; 4106 #else 4107 col = aij->colmap[in[j]] - 1; 4108 #endif 4109 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4110 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4111 col = in[j]; 4112 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4113 B = aij->B; 4114 b = (Mat_SeqAIJ*)B->data; 4115 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4116 rp2 = bj + bi[row]; 4117 ap2 = ba + bi[row]; 4118 rmax2 = bimax[row]; 4119 nrow2 = bilen[row]; 4120 low2 = 0; 4121 high2 = nrow2; 4122 bm = aij->B->rmap.n; 4123 ba = b->a; 4124 } 4125 } else col = in[j]; 4126 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4127 } 4128 } 4129 } else { 4130 if (!aij->donotstash) { 4131 if (roworiented) { 4132 if (ignorezeroentries && v[i*n] == 0.0) continue; 4133 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4134 } else { 4135 if (ignorezeroentries && v[i] == 0.0) continue; 4136 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4137 } 4138 } 4139 } 4140 }} 4141 PetscFunctionReturnVoid(); 4142 } 4143 EXTERN_C_END 4144