1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 246 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 324 PetscFunctionBegin; 325 if (!aij->donotstash) { 326 while (1) { 327 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 328 if (!flg) break; 329 330 for (i=0; i<n;) { 331 /* Now identify the consecutive vals belonging to the same row */ 332 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 333 if (j < n) ncols = j-i; 334 else ncols = n-i; 335 /* Now assemble all these values with a single function call */ 336 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 337 i = j; 338 } 339 } 340 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 341 } 342 a->compressedrow.use = PETSC_FALSE; 343 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 345 346 /* determine if any processor has disassembled, if so we must 347 also disassemble ourselfs, in order that we may reassemble. */ 348 /* 349 if nonzero structure of submatrix B cannot change then we know that 350 no processor disassembled thus we can skip this stuff 351 */ 352 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 353 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 354 if (mat->was_assembled && !other_disassembled) { 355 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 356 } 357 } 358 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 359 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 360 } 361 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 362 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 363 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 364 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 365 366 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 367 aij->rowvalues = 0; 368 369 /* used by MatAXPY() */ 370 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 371 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 372 373 PetscFunctionReturn(0); 374 } 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 379 { 380 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 381 PetscErrorCode ierr; 382 383 PetscFunctionBegin; 384 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 385 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatZeroRows_MPIAIJ" 391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 392 { 393 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 394 PetscErrorCode ierr; 395 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 396 PetscInt i,*owners = A->rmap.range; 397 PetscInt *nprocs,j,idx,nsends,row; 398 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 399 PetscInt *rvalues,count,base,slen,*source; 400 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 401 MPI_Comm comm = A->comm; 402 MPI_Request *send_waits,*recv_waits; 403 MPI_Status recv_status,*send_status; 404 #if defined(PETSC_DEBUG) 405 PetscTruth found = PETSC_FALSE; 406 #endif 407 408 PetscFunctionBegin; 409 /* first count number of contributors to each processor */ 410 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 411 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 412 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 413 j = 0; 414 for (i=0; i<N; i++) { 415 if (lastidx > (idx = rows[i])) j = 0; 416 lastidx = idx; 417 for (; j<size; j++) { 418 if (idx >= owners[j] && idx < owners[j+1]) { 419 nprocs[2*j]++; 420 nprocs[2*j+1] = 1; 421 owner[i] = j; 422 #if defined(PETSC_DEBUG) 423 found = PETSC_TRUE; 424 #endif 425 break; 426 } 427 } 428 #if defined(PETSC_DEBUG) 429 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 430 found = PETSC_FALSE; 431 #endif 432 } 433 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 434 435 /* inform other processors of number of messages and max length*/ 436 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 437 438 /* post receives: */ 439 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 440 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 441 for (i=0; i<nrecvs; i++) { 442 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 443 } 444 445 /* do sends: 446 1) starts[i] gives the starting index in svalues for stuff going to 447 the ith processor 448 */ 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 450 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 451 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 452 starts[0] = 0; 453 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 454 for (i=0; i<N; i++) { 455 svalues[starts[owner[i]]++] = rows[i]; 456 } 457 458 starts[0] = 0; 459 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 460 count = 0; 461 for (i=0; i<size; i++) { 462 if (nprocs[2*i+1]) { 463 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscFree(starts);CHKERRQ(ierr); 467 468 base = owners[rank]; 469 470 /* wait on receives */ 471 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 472 source = lens + nrecvs; 473 count = nrecvs; slen = 0; 474 while (count) { 475 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 476 /* unpack receives into our local space */ 477 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 478 source[imdex] = recv_status.MPI_SOURCE; 479 lens[imdex] = n; 480 slen += n; 481 count--; 482 } 483 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 484 485 /* move the data into the send scatter */ 486 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 487 count = 0; 488 for (i=0; i<nrecvs; i++) { 489 values = rvalues + i*nmax; 490 for (j=0; j<lens[i]; j++) { 491 lrows[count++] = values[j] - base; 492 } 493 } 494 ierr = PetscFree(rvalues);CHKERRQ(ierr); 495 ierr = PetscFree(lens);CHKERRQ(ierr); 496 ierr = PetscFree(owner);CHKERRQ(ierr); 497 ierr = PetscFree(nprocs);CHKERRQ(ierr); 498 499 /* actually zap the local rows */ 500 /* 501 Zero the required rows. If the "diagonal block" of the matrix 502 is square and the user wishes to set the diagonal we use separate 503 code so that MatSetValues() is not called for each diagonal allocating 504 new memory, thus calling lots of mallocs and slowing things down. 505 506 Contributed by: Matthew Knepley 507 */ 508 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 509 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 510 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 511 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 512 } else if (diag != 0.0) { 513 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 514 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 515 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 517 } 518 for (i = 0; i < slen; i++) { 519 row = lrows[i] + rstart; 520 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 521 } 522 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 524 } else { 525 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 526 } 527 ierr = PetscFree(lrows);CHKERRQ(ierr); 528 529 /* wait on sends */ 530 if (nsends) { 531 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 532 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 533 ierr = PetscFree(send_status);CHKERRQ(ierr); 534 } 535 ierr = PetscFree(send_waits);CHKERRQ(ierr); 536 ierr = PetscFree(svalues);CHKERRQ(ierr); 537 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatMult_MPIAIJ" 543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 544 { 545 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 546 PetscErrorCode ierr; 547 PetscInt nt; 548 549 PetscFunctionBegin; 550 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 551 if (nt != A->cmap.n) { 552 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 553 } 554 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 555 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 556 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 557 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatMultAdd_MPIAIJ" 563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 564 { 565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 570 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 571 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 572 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 579 { 580 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 581 PetscErrorCode ierr; 582 PetscTruth merged; 583 584 PetscFunctionBegin; 585 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 586 /* do nondiagonal part */ 587 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 588 if (!merged) { 589 /* send it on its way */ 590 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 591 /* do local part */ 592 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 593 /* receive remote parts: note this assumes the values are not actually */ 594 /* added in yy until the next line, */ 595 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 596 } else { 597 /* do local part */ 598 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 599 /* send it on its way */ 600 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 601 /* values actually were received in the Begin() but we need to call this nop */ 602 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 EXTERN_C_BEGIN 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 611 { 612 MPI_Comm comm; 613 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 614 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 615 IS Me,Notme; 616 PetscErrorCode ierr; 617 PetscInt M,N,first,last,*notme,i; 618 PetscMPIInt size; 619 620 PetscFunctionBegin; 621 622 /* Easy test: symmetric diagonal block */ 623 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 624 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 625 if (!*f) PetscFunctionReturn(0); 626 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 628 if (size == 1) PetscFunctionReturn(0); 629 630 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 631 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 632 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 633 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 634 for (i=0; i<first; i++) notme[i] = i; 635 for (i=last; i<M; i++) notme[i-last+first] = i; 636 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 637 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 638 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 639 Aoff = Aoffs[0]; 640 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 641 Boff = Boffs[0]; 642 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 644 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 645 ierr = ISDestroy(Me);CHKERRQ(ierr); 646 ierr = ISDestroy(Notme);CHKERRQ(ierr); 647 648 PetscFunctionReturn(0); 649 } 650 EXTERN_C_END 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 655 { 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 PetscErrorCode ierr; 658 659 PetscFunctionBegin; 660 /* do nondiagonal part */ 661 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 662 /* send it on its way */ 663 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 664 /* do local part */ 665 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 666 /* receive remote parts */ 667 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This only works correctly for square matrices where the subblock A->A is the 673 diagonal block 674 */ 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 678 { 679 PetscErrorCode ierr; 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 682 PetscFunctionBegin; 683 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 684 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 685 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 686 } 687 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatScale_MPIAIJ" 693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 696 PetscErrorCode ierr; 697 698 PetscFunctionBegin; 699 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 700 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatDestroy_MPIAIJ" 706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 709 PetscErrorCode ierr; 710 711 PetscFunctionBegin; 712 #if defined(PETSC_USE_LOG) 713 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 714 #endif 715 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 717 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 718 #if defined (PETSC_USE_CTABLE) 719 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 720 #else 721 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 722 #endif 723 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 724 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 725 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 726 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatView_MPIAIJ_Binary" 741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 744 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 745 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 746 PetscErrorCode ierr; 747 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 748 int fd; 749 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 750 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 751 PetscScalar *column_values; 752 753 PetscFunctionBegin; 754 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 755 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 756 nz = A->nz + B->nz; 757 if (!rank) { 758 header[0] = MAT_FILE_COOKIE; 759 header[1] = mat->rmap.N; 760 header[2] = mat->cmap.N; 761 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 762 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 763 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 764 /* get largest number of rows any processor has */ 765 rlen = mat->rmap.n; 766 range = mat->rmap.range; 767 for (i=1; i<size; i++) { 768 rlen = PetscMax(rlen,range[i+1] - range[i]); 769 } 770 } else { 771 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 772 rlen = mat->rmap.n; 773 } 774 775 /* load up the local row counts */ 776 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 777 for (i=0; i<mat->rmap.n; i++) { 778 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 779 } 780 781 /* store the row lengths to the file */ 782 if (!rank) { 783 MPI_Status status; 784 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 785 for (i=1; i<size; i++) { 786 rlen = range[i+1] - range[i]; 787 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 788 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 789 } 790 } else { 791 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 792 } 793 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 794 795 /* load up the local column indices */ 796 nzmax = nz; /* )th processor needs space a largest processor needs */ 797 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 798 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 799 cnt = 0; 800 for (i=0; i<mat->rmap.n; i++) { 801 for (j=B->i[i]; j<B->i[i+1]; j++) { 802 if ( (col = garray[B->j[j]]) > cstart) break; 803 column_indices[cnt++] = col; 804 } 805 for (k=A->i[i]; k<A->i[i+1]; k++) { 806 column_indices[cnt++] = A->j[k] + cstart; 807 } 808 for (; j<B->i[i+1]; j++) { 809 column_indices[cnt++] = garray[B->j[j]]; 810 } 811 } 812 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 813 814 /* store the column indices to the file */ 815 if (!rank) { 816 MPI_Status status; 817 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 818 for (i=1; i<size; i++) { 819 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 820 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 821 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 823 } 824 } else { 825 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 826 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 } 828 ierr = PetscFree(column_indices);CHKERRQ(ierr); 829 830 /* load up the local column values */ 831 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 832 cnt = 0; 833 for (i=0; i<mat->rmap.n; i++) { 834 for (j=B->i[i]; j<B->i[i+1]; j++) { 835 if ( garray[B->j[j]] > cstart) break; 836 column_values[cnt++] = B->a[j]; 837 } 838 for (k=A->i[i]; k<A->i[i+1]; k++) { 839 column_values[cnt++] = A->a[k]; 840 } 841 for (; j<B->i[i+1]; j++) { 842 column_values[cnt++] = B->a[j]; 843 } 844 } 845 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 846 847 /* store the column values to the file */ 848 if (!rank) { 849 MPI_Status status; 850 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 851 for (i=1; i<size; i++) { 852 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 853 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 854 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 856 } 857 } else { 858 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 859 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 860 } 861 ierr = PetscFree(column_values);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 868 { 869 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 870 PetscErrorCode ierr; 871 PetscMPIInt rank = aij->rank,size = aij->size; 872 PetscTruth isdraw,iascii,isbinary; 873 PetscViewer sviewer; 874 PetscViewerFormat format; 875 876 PetscFunctionBegin; 877 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 880 if (iascii) { 881 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 882 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 883 MatInfo info; 884 PetscTruth inodes; 885 886 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 887 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 889 if (!inodes) { 890 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 891 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 892 } else { 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 894 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 895 } 896 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 898 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 900 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 901 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } else if (format == PETSC_VIEWER_ASCII_INFO) { 904 PetscInt inodecount,inodelimit,*inodes; 905 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 906 if (inodes) { 907 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 908 } else { 909 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 910 } 911 PetscFunctionReturn(0); 912 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 913 PetscFunctionReturn(0); 914 } 915 } else if (isbinary) { 916 if (size == 1) { 917 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 918 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 919 } else { 920 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 921 } 922 PetscFunctionReturn(0); 923 } else if (isdraw) { 924 PetscDraw draw; 925 PetscTruth isnull; 926 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 927 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 928 } 929 930 if (size == 1) { 931 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 932 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 933 } else { 934 /* assemble the entire matrix onto first processor. */ 935 Mat A; 936 Mat_SeqAIJ *Aloc; 937 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 938 PetscScalar *a; 939 940 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 941 if (!rank) { 942 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 943 } else { 944 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 945 } 946 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 947 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 948 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 949 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 950 951 /* copy over the A part */ 952 Aloc = (Mat_SeqAIJ*)aij->A->data; 953 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 954 row = mat->rmap.rstart; 955 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 956 for (i=0; i<m; i++) { 957 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 958 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 959 } 960 aj = Aloc->j; 961 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 962 963 /* copy over the B part */ 964 Aloc = (Mat_SeqAIJ*)aij->B->data; 965 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 966 row = mat->rmap.rstart; 967 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 968 ct = cols; 969 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 970 for (i=0; i<m; i++) { 971 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 972 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 973 } 974 ierr = PetscFree(ct);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 /* 978 Everyone has to call to draw the matrix since the graphics waits are 979 synchronized across all processors that share the PetscDraw object 980 */ 981 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 982 if (!rank) { 983 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 984 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 985 } 986 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 ierr = MatDestroy(A);CHKERRQ(ierr); 988 } 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatView_MPIAIJ" 994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 995 { 996 PetscErrorCode ierr; 997 PetscTruth iascii,isdraw,issocket,isbinary; 998 999 PetscFunctionBegin; 1000 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1004 if (iascii || isdraw || isbinary || issocket) { 1005 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1006 } else { 1007 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatRelax_MPIAIJ" 1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscErrorCode ierr; 1020 Vec bb1; 1021 1022 PetscFunctionBegin; 1023 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1024 1025 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1026 1027 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1028 if (flag & SOR_ZERO_INITIAL_GUESS) { 1029 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1030 its--; 1031 } 1032 1033 while (its--) { 1034 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1036 1037 /* update rhs: bb1 = bb - B*x */ 1038 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1039 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1040 1041 /* local sweep */ 1042 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1043 CHKERRQ(ierr); 1044 } 1045 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1046 if (flag & SOR_ZERO_INITIAL_GUESS) { 1047 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1048 its--; 1049 } 1050 while (its--) { 1051 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 1054 /* update rhs: bb1 = bb - B*x */ 1055 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1056 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1057 1058 /* local sweep */ 1059 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1060 CHKERRQ(ierr); 1061 } 1062 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1063 if (flag & SOR_ZERO_INITIAL_GUESS) { 1064 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1065 its--; 1066 } 1067 while (its--) { 1068 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 1071 /* update rhs: bb1 = bb - B*x */ 1072 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1073 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1074 1075 /* local sweep */ 1076 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1077 CHKERRQ(ierr); 1078 } 1079 } else { 1080 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1081 } 1082 1083 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatPermute_MPIAIJ" 1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1090 { 1091 MPI_Comm comm,pcomm; 1092 PetscInt first,local_size,nrows,*rows; 1093 int ntids; 1094 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1099 /* make a collective version of 'rowp' */ 1100 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1101 if (pcomm==comm) { 1102 crowp = rowp; 1103 } else { 1104 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1105 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1107 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1108 } 1109 /* collect the global row permutation and invert it */ 1110 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1111 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1112 if (pcomm!=comm) { 1113 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1114 } 1115 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1116 /* get the local target indices */ 1117 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1118 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1119 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1121 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1122 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1123 /* the column permutation is so much easier; 1124 make a local version of 'colp' and invert it */ 1125 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1126 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1127 if (ntids==1) { 1128 lcolp = colp; 1129 } else { 1130 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1131 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1132 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1133 } 1134 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1135 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1136 if (ntids>1) { 1137 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1138 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1139 } 1140 /* now we just get the submatrix */ 1141 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1142 /* clean up */ 1143 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1144 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1151 { 1152 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1153 Mat A = mat->A,B = mat->B; 1154 PetscErrorCode ierr; 1155 PetscReal isend[5],irecv[5]; 1156 1157 PetscFunctionBegin; 1158 info->block_size = 1.0; 1159 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1160 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1161 isend[3] = info->memory; isend[4] = info->mallocs; 1162 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1163 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1164 isend[3] += info->memory; isend[4] += info->mallocs; 1165 if (flag == MAT_LOCAL) { 1166 info->nz_used = isend[0]; 1167 info->nz_allocated = isend[1]; 1168 info->nz_unneeded = isend[2]; 1169 info->memory = isend[3]; 1170 info->mallocs = isend[4]; 1171 } else if (flag == MAT_GLOBAL_MAX) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else if (flag == MAT_GLOBAL_SUM) { 1179 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1180 info->nz_used = irecv[0]; 1181 info->nz_allocated = irecv[1]; 1182 info->nz_unneeded = irecv[2]; 1183 info->memory = irecv[3]; 1184 info->mallocs = irecv[4]; 1185 } 1186 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1187 info->fill_ratio_needed = 0; 1188 info->factor_mallocs = 0; 1189 info->rows_global = (double)matin->rmap.N; 1190 info->columns_global = (double)matin->cmap.N; 1191 info->rows_local = (double)matin->rmap.n; 1192 info->columns_local = (double)matin->cmap.N; 1193 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatSetOption_MPIAIJ" 1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1200 { 1201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 switch (op) { 1206 case MAT_NO_NEW_NONZERO_LOCATIONS: 1207 case MAT_YES_NEW_NONZERO_LOCATIONS: 1208 case MAT_COLUMNS_UNSORTED: 1209 case MAT_COLUMNS_SORTED: 1210 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1211 case MAT_KEEP_ZEROED_ROWS: 1212 case MAT_NEW_NONZERO_LOCATION_ERR: 1213 case MAT_USE_INODES: 1214 case MAT_DO_NOT_USE_INODES: 1215 case MAT_IGNORE_ZERO_ENTRIES: 1216 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1217 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1218 break; 1219 case MAT_ROW_ORIENTED: 1220 a->roworiented = PETSC_TRUE; 1221 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1222 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1223 break; 1224 case MAT_ROWS_SORTED: 1225 case MAT_ROWS_UNSORTED: 1226 case MAT_YES_NEW_DIAGONALS: 1227 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1228 break; 1229 case MAT_COLUMN_ORIENTED: 1230 a->roworiented = PETSC_FALSE; 1231 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1232 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1233 break; 1234 case MAT_IGNORE_OFF_PROC_ENTRIES: 1235 a->donotstash = PETSC_TRUE; 1236 break; 1237 case MAT_NO_NEW_DIAGONALS: 1238 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1239 case MAT_SYMMETRIC: 1240 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1241 break; 1242 case MAT_STRUCTURALLY_SYMMETRIC: 1243 case MAT_HERMITIAN: 1244 case MAT_SYMMETRY_ETERNAL: 1245 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1246 break; 1247 case MAT_NOT_SYMMETRIC: 1248 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1249 case MAT_NOT_HERMITIAN: 1250 case MAT_NOT_SYMMETRY_ETERNAL: 1251 break; 1252 default: 1253 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatGetRow_MPIAIJ" 1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1261 { 1262 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1263 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1264 PetscErrorCode ierr; 1265 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1266 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1267 PetscInt *cmap,*idx_p; 1268 1269 PetscFunctionBegin; 1270 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1271 mat->getrowactive = PETSC_TRUE; 1272 1273 if (!mat->rowvalues && (idx || v)) { 1274 /* 1275 allocate enough space to hold information from the longest row. 1276 */ 1277 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1278 PetscInt max = 1,tmp; 1279 for (i=0; i<matin->rmap.n; i++) { 1280 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1281 if (max < tmp) { max = tmp; } 1282 } 1283 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1284 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1285 } 1286 1287 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1288 lrow = row - rstart; 1289 1290 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1291 if (!v) {pvA = 0; pvB = 0;} 1292 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1293 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1294 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1295 nztot = nzA + nzB; 1296 1297 cmap = mat->garray; 1298 if (v || idx) { 1299 if (nztot) { 1300 /* Sort by increasing column numbers, assuming A and B already sorted */ 1301 PetscInt imark = -1; 1302 if (v) { 1303 *v = v_p = mat->rowvalues; 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1306 else break; 1307 } 1308 imark = i; 1309 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1310 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1311 } 1312 if (idx) { 1313 *idx = idx_p = mat->rowindices; 1314 if (imark > -1) { 1315 for (i=0; i<imark; i++) { 1316 idx_p[i] = cmap[cworkB[i]]; 1317 } 1318 } else { 1319 for (i=0; i<nzB; i++) { 1320 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1321 else break; 1322 } 1323 imark = i; 1324 } 1325 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1326 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1327 } 1328 } else { 1329 if (idx) *idx = 0; 1330 if (v) *v = 0; 1331 } 1332 } 1333 *nz = nztot; 1334 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1335 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1342 { 1343 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1344 1345 PetscFunctionBegin; 1346 if (!aij->getrowactive) { 1347 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1348 } 1349 aij->getrowactive = PETSC_FALSE; 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatNorm_MPIAIJ" 1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1356 { 1357 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1358 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1359 PetscErrorCode ierr; 1360 PetscInt i,j,cstart = mat->cmap.rstart; 1361 PetscReal sum = 0.0; 1362 PetscScalar *v; 1363 1364 PetscFunctionBegin; 1365 if (aij->size == 1) { 1366 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1367 } else { 1368 if (type == NORM_FROBENIUS) { 1369 v = amat->a; 1370 for (i=0; i<amat->nz; i++) { 1371 #if defined(PETSC_USE_COMPLEX) 1372 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1373 #else 1374 sum += (*v)*(*v); v++; 1375 #endif 1376 } 1377 v = bmat->a; 1378 for (i=0; i<bmat->nz; i++) { 1379 #if defined(PETSC_USE_COMPLEX) 1380 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1381 #else 1382 sum += (*v)*(*v); v++; 1383 #endif 1384 } 1385 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1386 *norm = sqrt(*norm); 1387 } else if (type == NORM_1) { /* max column norm */ 1388 PetscReal *tmp,*tmp2; 1389 PetscInt *jj,*garray = aij->garray; 1390 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1391 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1392 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1393 *norm = 0.0; 1394 v = amat->a; jj = amat->j; 1395 for (j=0; j<amat->nz; j++) { 1396 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1397 } 1398 v = bmat->a; jj = bmat->j; 1399 for (j=0; j<bmat->nz; j++) { 1400 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1401 } 1402 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1403 for (j=0; j<mat->cmap.N; j++) { 1404 if (tmp2[j] > *norm) *norm = tmp2[j]; 1405 } 1406 ierr = PetscFree(tmp);CHKERRQ(ierr); 1407 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1408 } else if (type == NORM_INFINITY) { /* max row norm */ 1409 PetscReal ntemp = 0.0; 1410 for (j=0; j<aij->A->rmap.n; j++) { 1411 v = amat->a + amat->i[j]; 1412 sum = 0.0; 1413 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1414 sum += PetscAbsScalar(*v); v++; 1415 } 1416 v = bmat->a + bmat->i[j]; 1417 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1418 sum += PetscAbsScalar(*v); v++; 1419 } 1420 if (sum > ntemp) ntemp = sum; 1421 } 1422 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1423 } else { 1424 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1425 } 1426 } 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatTranspose_MPIAIJ" 1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1433 { 1434 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1435 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1436 PetscErrorCode ierr; 1437 PetscInt M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1438 Mat B; 1439 PetscScalar *array; 1440 1441 PetscFunctionBegin; 1442 if (!matout && M != N) { 1443 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1444 } 1445 1446 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1447 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1448 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1449 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1450 1451 /* copy over the A part */ 1452 Aloc = (Mat_SeqAIJ*)a->A->data; 1453 m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1454 row = A->rmap.rstart; 1455 for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;} 1456 for (i=0; i<m; i++) { 1457 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1458 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1459 } 1460 aj = Aloc->j; 1461 for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;} 1462 1463 /* copy over the B part */ 1464 Aloc = (Mat_SeqAIJ*)a->B->data; 1465 m = a->B->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1466 row = A->rmap.rstart; 1467 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1468 ct = cols; 1469 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1470 for (i=0; i<m; i++) { 1471 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1472 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1473 } 1474 ierr = PetscFree(ct);CHKERRQ(ierr); 1475 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1476 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1477 if (matout) { 1478 *matout = B; 1479 } else { 1480 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1487 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1488 { 1489 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1490 Mat a = aij->A,b = aij->B; 1491 PetscErrorCode ierr; 1492 PetscInt s1,s2,s3; 1493 1494 PetscFunctionBegin; 1495 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1496 if (rr) { 1497 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1498 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1499 /* Overlap communication with computation. */ 1500 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1501 } 1502 if (ll) { 1503 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1504 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1505 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1506 } 1507 /* scale the diagonal block */ 1508 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1509 1510 if (rr) { 1511 /* Do a scatter end and then right scale the off-diagonal block */ 1512 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1513 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1514 } 1515 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1521 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1522 { 1523 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1524 PetscErrorCode ierr; 1525 1526 PetscFunctionBegin; 1527 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1528 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1533 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1534 { 1535 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1536 PetscErrorCode ierr; 1537 1538 PetscFunctionBegin; 1539 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatEqual_MPIAIJ" 1545 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1546 { 1547 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1548 Mat a,b,c,d; 1549 PetscTruth flg; 1550 PetscErrorCode ierr; 1551 1552 PetscFunctionBegin; 1553 a = matA->A; b = matA->B; 1554 c = matB->A; d = matB->B; 1555 1556 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1557 if (flg) { 1558 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1559 } 1560 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 1564 #undef __FUNCT__ 1565 #define __FUNCT__ "MatCopy_MPIAIJ" 1566 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1567 { 1568 PetscErrorCode ierr; 1569 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1570 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1571 1572 PetscFunctionBegin; 1573 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1574 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1575 /* because of the column compression in the off-processor part of the matrix a->B, 1576 the number of columns in a->B and b->B may be different, hence we cannot call 1577 the MatCopy() directly on the two parts. If need be, we can provide a more 1578 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1579 then copying the submatrices */ 1580 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1581 } else { 1582 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1583 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1584 } 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1590 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1591 { 1592 PetscErrorCode ierr; 1593 1594 PetscFunctionBegin; 1595 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1596 PetscFunctionReturn(0); 1597 } 1598 1599 #include "petscblaslapack.h" 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatAXPY_MPIAIJ" 1602 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1603 { 1604 PetscErrorCode ierr; 1605 PetscInt i; 1606 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1607 PetscBLASInt bnz,one=1; 1608 Mat_SeqAIJ *x,*y; 1609 1610 PetscFunctionBegin; 1611 if (str == SAME_NONZERO_PATTERN) { 1612 PetscScalar alpha = a; 1613 x = (Mat_SeqAIJ *)xx->A->data; 1614 y = (Mat_SeqAIJ *)yy->A->data; 1615 bnz = (PetscBLASInt)x->nz; 1616 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1617 x = (Mat_SeqAIJ *)xx->B->data; 1618 y = (Mat_SeqAIJ *)yy->B->data; 1619 bnz = (PetscBLASInt)x->nz; 1620 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1621 } else if (str == SUBSET_NONZERO_PATTERN) { 1622 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1623 1624 x = (Mat_SeqAIJ *)xx->B->data; 1625 y = (Mat_SeqAIJ *)yy->B->data; 1626 if (y->xtoy && y->XtoY != xx->B) { 1627 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1628 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1629 } 1630 if (!y->xtoy) { /* get xtoy */ 1631 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1632 y->XtoY = xx->B; 1633 } 1634 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1635 } else { 1636 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1637 } 1638 PetscFunctionReturn(0); 1639 } 1640 1641 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1642 1643 #undef __FUNCT__ 1644 #define __FUNCT__ "MatConjugate_MPIAIJ" 1645 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1646 { 1647 #if defined(PETSC_USE_COMPLEX) 1648 PetscErrorCode ierr; 1649 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1650 1651 PetscFunctionBegin; 1652 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1653 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1654 #else 1655 PetscFunctionBegin; 1656 #endif 1657 PetscFunctionReturn(0); 1658 } 1659 1660 #undef __FUNCT__ 1661 #define __FUNCT__ "MatRealPart_MPIAIJ" 1662 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1663 { 1664 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1665 PetscErrorCode ierr; 1666 1667 PetscFunctionBegin; 1668 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1669 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1675 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1676 { 1677 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1678 PetscErrorCode ierr; 1679 1680 PetscFunctionBegin; 1681 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1682 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1683 PetscFunctionReturn(0); 1684 } 1685 1686 /* -------------------------------------------------------------------*/ 1687 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1688 MatGetRow_MPIAIJ, 1689 MatRestoreRow_MPIAIJ, 1690 MatMult_MPIAIJ, 1691 /* 4*/ MatMultAdd_MPIAIJ, 1692 MatMultTranspose_MPIAIJ, 1693 MatMultTransposeAdd_MPIAIJ, 1694 0, 1695 0, 1696 0, 1697 /*10*/ 0, 1698 0, 1699 0, 1700 MatRelax_MPIAIJ, 1701 MatTranspose_MPIAIJ, 1702 /*15*/ MatGetInfo_MPIAIJ, 1703 MatEqual_MPIAIJ, 1704 MatGetDiagonal_MPIAIJ, 1705 MatDiagonalScale_MPIAIJ, 1706 MatNorm_MPIAIJ, 1707 /*20*/ MatAssemblyBegin_MPIAIJ, 1708 MatAssemblyEnd_MPIAIJ, 1709 0, 1710 MatSetOption_MPIAIJ, 1711 MatZeroEntries_MPIAIJ, 1712 /*25*/ MatZeroRows_MPIAIJ, 1713 0, 1714 0, 1715 0, 1716 0, 1717 /*30*/ MatSetUpPreallocation_MPIAIJ, 1718 0, 1719 0, 1720 0, 1721 0, 1722 /*35*/ MatDuplicate_MPIAIJ, 1723 0, 1724 0, 1725 0, 1726 0, 1727 /*40*/ MatAXPY_MPIAIJ, 1728 MatGetSubMatrices_MPIAIJ, 1729 MatIncreaseOverlap_MPIAIJ, 1730 MatGetValues_MPIAIJ, 1731 MatCopy_MPIAIJ, 1732 /*45*/ 0, 1733 MatScale_MPIAIJ, 1734 0, 1735 0, 1736 0, 1737 /*50*/ MatSetBlockSize_MPIAIJ, 1738 0, 1739 0, 1740 0, 1741 0, 1742 /*55*/ MatFDColoringCreate_MPIAIJ, 1743 0, 1744 MatSetUnfactored_MPIAIJ, 1745 MatPermute_MPIAIJ, 1746 0, 1747 /*60*/ MatGetSubMatrix_MPIAIJ, 1748 MatDestroy_MPIAIJ, 1749 MatView_MPIAIJ, 1750 0, 1751 0, 1752 /*65*/ 0, 1753 0, 1754 0, 1755 0, 1756 0, 1757 /*70*/ 0, 1758 0, 1759 MatSetColoring_MPIAIJ, 1760 #if defined(PETSC_HAVE_ADIC) 1761 MatSetValuesAdic_MPIAIJ, 1762 #else 1763 0, 1764 #endif 1765 MatSetValuesAdifor_MPIAIJ, 1766 /*75*/ 0, 1767 0, 1768 0, 1769 0, 1770 0, 1771 /*80*/ 0, 1772 0, 1773 0, 1774 0, 1775 /*84*/ MatLoad_MPIAIJ, 1776 0, 1777 0, 1778 0, 1779 0, 1780 0, 1781 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1782 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1783 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1784 MatPtAP_Basic, 1785 MatPtAPSymbolic_MPIAIJ, 1786 /*95*/ MatPtAPNumeric_MPIAIJ, 1787 0, 1788 0, 1789 0, 1790 0, 1791 /*100*/0, 1792 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1793 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1794 MatConjugate_MPIAIJ, 1795 0, 1796 /*105*/MatSetValuesRow_MPIAIJ, 1797 MatRealPart_MPIAIJ, 1798 MatImaginaryPart_MPIAIJ}; 1799 1800 /* ----------------------------------------------------------------------------------------*/ 1801 1802 EXTERN_C_BEGIN 1803 #undef __FUNCT__ 1804 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1805 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1806 { 1807 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1808 PetscErrorCode ierr; 1809 1810 PetscFunctionBegin; 1811 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1812 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1813 PetscFunctionReturn(0); 1814 } 1815 EXTERN_C_END 1816 1817 EXTERN_C_BEGIN 1818 #undef __FUNCT__ 1819 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1820 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1821 { 1822 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1823 PetscErrorCode ierr; 1824 1825 PetscFunctionBegin; 1826 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1827 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1828 PetscFunctionReturn(0); 1829 } 1830 EXTERN_C_END 1831 1832 #include "petscpc.h" 1833 EXTERN_C_BEGIN 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1836 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1837 { 1838 Mat_MPIAIJ *b; 1839 PetscErrorCode ierr; 1840 PetscInt i; 1841 1842 PetscFunctionBegin; 1843 B->preallocated = PETSC_TRUE; 1844 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1845 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1846 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1847 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1848 1849 B->rmap.bs = B->cmap.bs = 1; 1850 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1851 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1852 if (d_nnz) { 1853 for (i=0; i<B->rmap.n; i++) { 1854 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1855 } 1856 } 1857 if (o_nnz) { 1858 for (i=0; i<B->rmap.n; i++) { 1859 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1860 } 1861 } 1862 b = (Mat_MPIAIJ*)B->data; 1863 1864 /* Explicitly create 2 MATSEQAIJ matrices. */ 1865 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1866 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1867 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1868 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1869 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1870 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1871 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1872 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1873 1874 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1875 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1876 1877 PetscFunctionReturn(0); 1878 } 1879 EXTERN_C_END 1880 1881 #undef __FUNCT__ 1882 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1883 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1884 { 1885 Mat mat; 1886 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1887 PetscErrorCode ierr; 1888 1889 PetscFunctionBegin; 1890 *newmat = 0; 1891 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1892 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 1893 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1894 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1895 a = (Mat_MPIAIJ*)mat->data; 1896 1897 mat->factor = matin->factor; 1898 mat->rmap.bs = matin->rmap.bs; 1899 mat->assembled = PETSC_TRUE; 1900 mat->insertmode = NOT_SET_VALUES; 1901 mat->preallocated = PETSC_TRUE; 1902 1903 a->size = oldmat->size; 1904 a->rank = oldmat->rank; 1905 a->donotstash = oldmat->donotstash; 1906 a->roworiented = oldmat->roworiented; 1907 a->rowindices = 0; 1908 a->rowvalues = 0; 1909 a->getrowactive = PETSC_FALSE; 1910 1911 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 1912 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 1913 1914 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1915 if (oldmat->colmap) { 1916 #if defined (PETSC_USE_CTABLE) 1917 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1918 #else 1919 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1920 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 1921 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 1922 #endif 1923 } else a->colmap = 0; 1924 if (oldmat->garray) { 1925 PetscInt len; 1926 len = oldmat->B->cmap.n; 1927 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1928 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1929 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1930 } else a->garray = 0; 1931 1932 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1933 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1934 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1935 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1936 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1937 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1938 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1939 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1940 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1941 *newmat = mat; 1942 PetscFunctionReturn(0); 1943 } 1944 1945 #include "petscsys.h" 1946 1947 #undef __FUNCT__ 1948 #define __FUNCT__ "MatLoad_MPIAIJ" 1949 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1950 { 1951 Mat A; 1952 PetscScalar *vals,*svals; 1953 MPI_Comm comm = ((PetscObject)viewer)->comm; 1954 MPI_Status status; 1955 PetscErrorCode ierr; 1956 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1957 PetscInt i,nz,j,rstart,rend,mmax; 1958 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1959 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 1960 PetscInt cend,cstart,n,*rowners; 1961 int fd; 1962 1963 PetscFunctionBegin; 1964 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1965 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1966 if (!rank) { 1967 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1968 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1969 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1970 } 1971 1972 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1973 M = header[1]; N = header[2]; 1974 /* determine ownership of all rows */ 1975 m = M/size + ((M % size) > rank); 1976 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1977 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1978 1979 /* First process needs enough room for process with most rows */ 1980 if (!rank) { 1981 mmax = rowners[1]; 1982 for (i=2; i<size; i++) { 1983 mmax = PetscMax(mmax,rowners[i]); 1984 } 1985 } else mmax = m; 1986 1987 rowners[0] = 0; 1988 for (i=2; i<=size; i++) { 1989 mmax = PetscMax(mmax,rowners[i]); 1990 rowners[i] += rowners[i-1]; 1991 } 1992 rstart = rowners[rank]; 1993 rend = rowners[rank+1]; 1994 1995 /* distribute row lengths to all processors */ 1996 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 1997 if (!rank) { 1998 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1999 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2000 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2001 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2002 for (j=0; j<m; j++) { 2003 procsnz[0] += ourlens[j]; 2004 } 2005 for (i=1; i<size; i++) { 2006 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2007 /* calculate the number of nonzeros on each processor */ 2008 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2009 procsnz[i] += rowlengths[j]; 2010 } 2011 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2012 } 2013 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2014 } else { 2015 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2016 } 2017 2018 if (!rank) { 2019 /* determine max buffer needed and allocate it */ 2020 maxnz = 0; 2021 for (i=0; i<size; i++) { 2022 maxnz = PetscMax(maxnz,procsnz[i]); 2023 } 2024 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2025 2026 /* read in my part of the matrix column indices */ 2027 nz = procsnz[0]; 2028 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2029 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2030 2031 /* read in every one elses and ship off */ 2032 for (i=1; i<size; i++) { 2033 nz = procsnz[i]; 2034 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2035 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2036 } 2037 ierr = PetscFree(cols);CHKERRQ(ierr); 2038 } else { 2039 /* determine buffer space needed for message */ 2040 nz = 0; 2041 for (i=0; i<m; i++) { 2042 nz += ourlens[i]; 2043 } 2044 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2045 2046 /* receive message of column indices*/ 2047 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2048 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2049 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2050 } 2051 2052 /* determine column ownership if matrix is not square */ 2053 if (N != M) { 2054 n = N/size + ((N % size) > rank); 2055 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2056 cstart = cend - n; 2057 } else { 2058 cstart = rstart; 2059 cend = rend; 2060 n = cend - cstart; 2061 } 2062 2063 /* loop over local rows, determining number of off diagonal entries */ 2064 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2065 jj = 0; 2066 for (i=0; i<m; i++) { 2067 for (j=0; j<ourlens[i]; j++) { 2068 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2069 jj++; 2070 } 2071 } 2072 2073 /* create our matrix */ 2074 for (i=0; i<m; i++) { 2075 ourlens[i] -= offlens[i]; 2076 } 2077 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2078 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2079 ierr = MatSetType(A,type);CHKERRQ(ierr); 2080 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2081 2082 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2083 for (i=0; i<m; i++) { 2084 ourlens[i] += offlens[i]; 2085 } 2086 2087 if (!rank) { 2088 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2089 2090 /* read in my part of the matrix numerical values */ 2091 nz = procsnz[0]; 2092 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2093 2094 /* insert into matrix */ 2095 jj = rstart; 2096 smycols = mycols; 2097 svals = vals; 2098 for (i=0; i<m; i++) { 2099 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2100 smycols += ourlens[i]; 2101 svals += ourlens[i]; 2102 jj++; 2103 } 2104 2105 /* read in other processors and ship out */ 2106 for (i=1; i<size; i++) { 2107 nz = procsnz[i]; 2108 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2109 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2110 } 2111 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2112 } else { 2113 /* receive numeric values */ 2114 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2115 2116 /* receive message of values*/ 2117 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2118 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2119 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2120 2121 /* insert into matrix */ 2122 jj = rstart; 2123 smycols = mycols; 2124 svals = vals; 2125 for (i=0; i<m; i++) { 2126 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2127 smycols += ourlens[i]; 2128 svals += ourlens[i]; 2129 jj++; 2130 } 2131 } 2132 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2133 ierr = PetscFree(vals);CHKERRQ(ierr); 2134 ierr = PetscFree(mycols);CHKERRQ(ierr); 2135 ierr = PetscFree(rowners);CHKERRQ(ierr); 2136 2137 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2138 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2139 *newmat = A; 2140 PetscFunctionReturn(0); 2141 } 2142 2143 #undef __FUNCT__ 2144 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2145 /* 2146 Not great since it makes two copies of the submatrix, first an SeqAIJ 2147 in local and then by concatenating the local matrices the end result. 2148 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2149 */ 2150 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2151 { 2152 PetscErrorCode ierr; 2153 PetscMPIInt rank,size; 2154 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2155 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2156 Mat *local,M,Mreuse; 2157 PetscScalar *vwork,*aa; 2158 MPI_Comm comm = mat->comm; 2159 Mat_SeqAIJ *aij; 2160 2161 2162 PetscFunctionBegin; 2163 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2164 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2165 2166 if (call == MAT_REUSE_MATRIX) { 2167 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2168 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2169 local = &Mreuse; 2170 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2171 } else { 2172 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2173 Mreuse = *local; 2174 ierr = PetscFree(local);CHKERRQ(ierr); 2175 } 2176 2177 /* 2178 m - number of local rows 2179 n - number of columns (same on all processors) 2180 rstart - first row in new global matrix generated 2181 */ 2182 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2183 if (call == MAT_INITIAL_MATRIX) { 2184 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2185 ii = aij->i; 2186 jj = aij->j; 2187 2188 /* 2189 Determine the number of non-zeros in the diagonal and off-diagonal 2190 portions of the matrix in order to do correct preallocation 2191 */ 2192 2193 /* first get start and end of "diagonal" columns */ 2194 if (csize == PETSC_DECIDE) { 2195 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2196 if (mglobal == n) { /* square matrix */ 2197 nlocal = m; 2198 } else { 2199 nlocal = n/size + ((n % size) > rank); 2200 } 2201 } else { 2202 nlocal = csize; 2203 } 2204 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2205 rstart = rend - nlocal; 2206 if (rank == size - 1 && rend != n) { 2207 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2208 } 2209 2210 /* next, compute all the lengths */ 2211 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2212 olens = dlens + m; 2213 for (i=0; i<m; i++) { 2214 jend = ii[i+1] - ii[i]; 2215 olen = 0; 2216 dlen = 0; 2217 for (j=0; j<jend; j++) { 2218 if (*jj < rstart || *jj >= rend) olen++; 2219 else dlen++; 2220 jj++; 2221 } 2222 olens[i] = olen; 2223 dlens[i] = dlen; 2224 } 2225 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2226 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2227 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2228 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2229 ierr = PetscFree(dlens);CHKERRQ(ierr); 2230 } else { 2231 PetscInt ml,nl; 2232 2233 M = *newmat; 2234 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2235 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2236 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2237 /* 2238 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2239 rather than the slower MatSetValues(). 2240 */ 2241 M->was_assembled = PETSC_TRUE; 2242 M->assembled = PETSC_FALSE; 2243 } 2244 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2245 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2246 ii = aij->i; 2247 jj = aij->j; 2248 aa = aij->a; 2249 for (i=0; i<m; i++) { 2250 row = rstart + i; 2251 nz = ii[i+1] - ii[i]; 2252 cwork = jj; jj += nz; 2253 vwork = aa; aa += nz; 2254 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2255 } 2256 2257 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2258 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2259 *newmat = M; 2260 2261 /* save submatrix used in processor for next request */ 2262 if (call == MAT_INITIAL_MATRIX) { 2263 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2264 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2265 } 2266 2267 PetscFunctionReturn(0); 2268 } 2269 2270 EXTERN_C_BEGIN 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2273 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2274 { 2275 PetscInt m,cstart, cend,j,nnz,i,d; 2276 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2277 const PetscInt *JJ; 2278 PetscScalar *values; 2279 PetscErrorCode ierr; 2280 2281 PetscFunctionBegin; 2282 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2283 2284 B->rmap.bs = B->cmap.bs = 1; 2285 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2286 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2287 m = B->rmap.n; 2288 cstart = B->cmap.rstart; 2289 cend = B->cmap.rend; 2290 rstart = B->rmap.rstart; 2291 2292 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2293 o_nnz = d_nnz + m; 2294 2295 for (i=0; i<m; i++) { 2296 nnz = Ii[i+1]- Ii[i]; 2297 JJ = J + Ii[i]; 2298 nnz_max = PetscMax(nnz_max,nnz); 2299 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2300 for (j=0; j<nnz; j++) { 2301 if (*JJ >= cstart) break; 2302 JJ++; 2303 } 2304 d = 0; 2305 for (; j<nnz; j++) { 2306 if (*JJ++ >= cend) break; 2307 d++; 2308 } 2309 d_nnz[i] = d; 2310 o_nnz[i] = nnz - d; 2311 } 2312 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2313 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2314 2315 if (v) values = (PetscScalar*)v; 2316 else { 2317 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2318 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2319 } 2320 2321 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2322 for (i=0; i<m; i++) { 2323 ii = i + rstart; 2324 nnz = Ii[i+1]- Ii[i]; 2325 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2326 } 2327 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2328 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2329 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2330 2331 if (!v) { 2332 ierr = PetscFree(values);CHKERRQ(ierr); 2333 } 2334 PetscFunctionReturn(0); 2335 } 2336 EXTERN_C_END 2337 2338 #undef __FUNCT__ 2339 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2340 /*@ 2341 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2342 (the default parallel PETSc format). 2343 2344 Collective on MPI_Comm 2345 2346 Input Parameters: 2347 + B - the matrix 2348 . i - the indices into j for the start of each local row (starts with zero) 2349 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2350 - v - optional values in the matrix 2351 2352 Level: developer 2353 2354 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2355 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2356 2357 .keywords: matrix, aij, compressed row, sparse, parallel 2358 2359 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2360 MatCreateSeqAIJWithArrays() 2361 @*/ 2362 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2363 { 2364 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2365 2366 PetscFunctionBegin; 2367 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2368 if (f) { 2369 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2370 } 2371 PetscFunctionReturn(0); 2372 } 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2376 /*@C 2377 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2378 (the default parallel PETSc format). For good matrix assembly performance 2379 the user should preallocate the matrix storage by setting the parameters 2380 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2381 performance can be increased by more than a factor of 50. 2382 2383 Collective on MPI_Comm 2384 2385 Input Parameters: 2386 + A - the matrix 2387 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2388 (same value is used for all local rows) 2389 . d_nnz - array containing the number of nonzeros in the various rows of the 2390 DIAGONAL portion of the local submatrix (possibly different for each row) 2391 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2392 The size of this array is equal to the number of local rows, i.e 'm'. 2393 You must leave room for the diagonal entry even if it is zero. 2394 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2395 submatrix (same value is used for all local rows). 2396 - o_nnz - array containing the number of nonzeros in the various rows of the 2397 OFF-DIAGONAL portion of the local submatrix (possibly different for 2398 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2399 structure. The size of this array is equal to the number 2400 of local rows, i.e 'm'. 2401 2402 If the *_nnz parameter is given then the *_nz parameter is ignored 2403 2404 The AIJ format (also called the Yale sparse matrix format or 2405 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2406 storage. The stored row and column indices begin with zero. See the users manual for details. 2407 2408 The parallel matrix is partitioned such that the first m0 rows belong to 2409 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2410 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2411 2412 The DIAGONAL portion of the local submatrix of a processor can be defined 2413 as the submatrix which is obtained by extraction the part corresponding 2414 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2415 first row that belongs to the processor, and r2 is the last row belonging 2416 to the this processor. This is a square mxm matrix. The remaining portion 2417 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2418 2419 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2420 2421 Example usage: 2422 2423 Consider the following 8x8 matrix with 34 non-zero values, that is 2424 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2425 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2426 as follows: 2427 2428 .vb 2429 1 2 0 | 0 3 0 | 0 4 2430 Proc0 0 5 6 | 7 0 0 | 8 0 2431 9 0 10 | 11 0 0 | 12 0 2432 ------------------------------------- 2433 13 0 14 | 15 16 17 | 0 0 2434 Proc1 0 18 0 | 19 20 21 | 0 0 2435 0 0 0 | 22 23 0 | 24 0 2436 ------------------------------------- 2437 Proc2 25 26 27 | 0 0 28 | 29 0 2438 30 0 0 | 31 32 33 | 0 34 2439 .ve 2440 2441 This can be represented as a collection of submatrices as: 2442 2443 .vb 2444 A B C 2445 D E F 2446 G H I 2447 .ve 2448 2449 Where the submatrices A,B,C are owned by proc0, D,E,F are 2450 owned by proc1, G,H,I are owned by proc2. 2451 2452 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2453 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2454 The 'M','N' parameters are 8,8, and have the same values on all procs. 2455 2456 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2457 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2458 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2459 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2460 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2461 matrix, ans [DF] as another SeqAIJ matrix. 2462 2463 When d_nz, o_nz parameters are specified, d_nz storage elements are 2464 allocated for every row of the local diagonal submatrix, and o_nz 2465 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2466 One way to choose d_nz and o_nz is to use the max nonzerors per local 2467 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2468 In this case, the values of d_nz,o_nz are: 2469 .vb 2470 proc0 : dnz = 2, o_nz = 2 2471 proc1 : dnz = 3, o_nz = 2 2472 proc2 : dnz = 1, o_nz = 4 2473 .ve 2474 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2475 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2476 for proc3. i.e we are using 12+15+10=37 storage locations to store 2477 34 values. 2478 2479 When d_nnz, o_nnz parameters are specified, the storage is specified 2480 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2481 In the above case the values for d_nnz,o_nnz are: 2482 .vb 2483 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2484 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2485 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2486 .ve 2487 Here the space allocated is sum of all the above values i.e 34, and 2488 hence pre-allocation is perfect. 2489 2490 Level: intermediate 2491 2492 .keywords: matrix, aij, compressed row, sparse, parallel 2493 2494 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2495 MPIAIJ 2496 @*/ 2497 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2498 { 2499 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2500 2501 PetscFunctionBegin; 2502 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2503 if (f) { 2504 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2505 } 2506 PetscFunctionReturn(0); 2507 } 2508 2509 #undef __FUNCT__ 2510 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 2511 /*@C 2512 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 2513 CSR format the local rows. 2514 2515 Collective on MPI_Comm 2516 2517 Input Parameters: 2518 + comm - MPI communicator 2519 . m - number of local rows (Cannot be PETSC_DECIDE) 2520 . n - This value should be the same as the local size used in creating the 2521 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2522 calculated if N is given) For square matrices n is almost always m. 2523 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2524 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2525 . i - row indices 2526 . j - column indices 2527 - a - matrix values 2528 2529 Output Parameter: 2530 . mat - the matrix 2531 Level: intermediate 2532 2533 Notes: 2534 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2535 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2536 called this routine. 2537 2538 The i and j indices are 0 based 2539 2540 .keywords: matrix, aij, compressed row, sparse, parallel 2541 2542 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2543 MPIAIJ, MatCreateMPIAIJ() 2544 @*/ 2545 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2546 { 2547 PetscErrorCode ierr; 2548 2549 PetscFunctionBegin; 2550 if (i[0]) { 2551 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2552 } 2553 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2554 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2555 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 2556 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 2557 PetscFunctionReturn(0); 2558 } 2559 2560 #undef __FUNCT__ 2561 #define __FUNCT__ "MatCreateMPIAIJ" 2562 /*@C 2563 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2564 (the default parallel PETSc format). For good matrix assembly performance 2565 the user should preallocate the matrix storage by setting the parameters 2566 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2567 performance can be increased by more than a factor of 50. 2568 2569 Collective on MPI_Comm 2570 2571 Input Parameters: 2572 + comm - MPI communicator 2573 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2574 This value should be the same as the local size used in creating the 2575 y vector for the matrix-vector product y = Ax. 2576 . n - This value should be the same as the local size used in creating the 2577 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2578 calculated if N is given) For square matrices n is almost always m. 2579 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2580 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2581 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2582 (same value is used for all local rows) 2583 . d_nnz - array containing the number of nonzeros in the various rows of the 2584 DIAGONAL portion of the local submatrix (possibly different for each row) 2585 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2586 The size of this array is equal to the number of local rows, i.e 'm'. 2587 You must leave room for the diagonal entry even if it is zero. 2588 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2589 submatrix (same value is used for all local rows). 2590 - o_nnz - array containing the number of nonzeros in the various rows of the 2591 OFF-DIAGONAL portion of the local submatrix (possibly different for 2592 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2593 structure. The size of this array is equal to the number 2594 of local rows, i.e 'm'. 2595 2596 Output Parameter: 2597 . A - the matrix 2598 2599 Notes: 2600 If the *_nnz parameter is given then the *_nz parameter is ignored 2601 2602 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2603 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2604 storage requirements for this matrix. 2605 2606 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2607 processor than it must be used on all processors that share the object for 2608 that argument. 2609 2610 The user MUST specify either the local or global matrix dimensions 2611 (possibly both). 2612 2613 The parallel matrix is partitioned such that the first m0 rows belong to 2614 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2615 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2616 2617 The DIAGONAL portion of the local submatrix of a processor can be defined 2618 as the submatrix which is obtained by extraction the part corresponding 2619 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2620 first row that belongs to the processor, and r2 is the last row belonging 2621 to the this processor. This is a square mxm matrix. The remaining portion 2622 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2623 2624 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2625 2626 When calling this routine with a single process communicator, a matrix of 2627 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2628 type of communicator, use the construction mechanism: 2629 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2630 2631 By default, this format uses inodes (identical nodes) when possible. 2632 We search for consecutive rows with the same nonzero structure, thereby 2633 reusing matrix information to achieve increased efficiency. 2634 2635 Options Database Keys: 2636 + -mat_no_inode - Do not use inodes 2637 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2638 - -mat_aij_oneindex - Internally use indexing starting at 1 2639 rather than 0. Note that when calling MatSetValues(), 2640 the user still MUST index entries starting at 0! 2641 2642 2643 Example usage: 2644 2645 Consider the following 8x8 matrix with 34 non-zero values, that is 2646 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2647 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2648 as follows: 2649 2650 .vb 2651 1 2 0 | 0 3 0 | 0 4 2652 Proc0 0 5 6 | 7 0 0 | 8 0 2653 9 0 10 | 11 0 0 | 12 0 2654 ------------------------------------- 2655 13 0 14 | 15 16 17 | 0 0 2656 Proc1 0 18 0 | 19 20 21 | 0 0 2657 0 0 0 | 22 23 0 | 24 0 2658 ------------------------------------- 2659 Proc2 25 26 27 | 0 0 28 | 29 0 2660 30 0 0 | 31 32 33 | 0 34 2661 .ve 2662 2663 This can be represented as a collection of submatrices as: 2664 2665 .vb 2666 A B C 2667 D E F 2668 G H I 2669 .ve 2670 2671 Where the submatrices A,B,C are owned by proc0, D,E,F are 2672 owned by proc1, G,H,I are owned by proc2. 2673 2674 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2675 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2676 The 'M','N' parameters are 8,8, and have the same values on all procs. 2677 2678 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2679 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2680 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2681 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2682 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2683 matrix, ans [DF] as another SeqAIJ matrix. 2684 2685 When d_nz, o_nz parameters are specified, d_nz storage elements are 2686 allocated for every row of the local diagonal submatrix, and o_nz 2687 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2688 One way to choose d_nz and o_nz is to use the max nonzerors per local 2689 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2690 In this case, the values of d_nz,o_nz are: 2691 .vb 2692 proc0 : dnz = 2, o_nz = 2 2693 proc1 : dnz = 3, o_nz = 2 2694 proc2 : dnz = 1, o_nz = 4 2695 .ve 2696 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2697 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2698 for proc3. i.e we are using 12+15+10=37 storage locations to store 2699 34 values. 2700 2701 When d_nnz, o_nnz parameters are specified, the storage is specified 2702 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2703 In the above case the values for d_nnz,o_nnz are: 2704 .vb 2705 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2706 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2707 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2708 .ve 2709 Here the space allocated is sum of all the above values i.e 34, and 2710 hence pre-allocation is perfect. 2711 2712 Level: intermediate 2713 2714 .keywords: matrix, aij, compressed row, sparse, parallel 2715 2716 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2717 MPIAIJ, MatCreateMPIAIJWithArrays() 2718 @*/ 2719 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2720 { 2721 PetscErrorCode ierr; 2722 PetscMPIInt size; 2723 2724 PetscFunctionBegin; 2725 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2726 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2727 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2728 if (size > 1) { 2729 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2730 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2731 } else { 2732 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2733 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2734 } 2735 PetscFunctionReturn(0); 2736 } 2737 2738 #undef __FUNCT__ 2739 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2740 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2741 { 2742 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2743 2744 PetscFunctionBegin; 2745 *Ad = a->A; 2746 *Ao = a->B; 2747 *colmap = a->garray; 2748 PetscFunctionReturn(0); 2749 } 2750 2751 #undef __FUNCT__ 2752 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2753 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2754 { 2755 PetscErrorCode ierr; 2756 PetscInt i; 2757 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2758 2759 PetscFunctionBegin; 2760 if (coloring->ctype == IS_COLORING_LOCAL) { 2761 ISColoringValue *allcolors,*colors; 2762 ISColoring ocoloring; 2763 2764 /* set coloring for diagonal portion */ 2765 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2766 2767 /* set coloring for off-diagonal portion */ 2768 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2769 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2770 for (i=0; i<a->B->cmap.n; i++) { 2771 colors[i] = allcolors[a->garray[i]]; 2772 } 2773 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2774 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2775 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2776 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2777 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2778 ISColoringValue *colors; 2779 PetscInt *larray; 2780 ISColoring ocoloring; 2781 2782 /* set coloring for diagonal portion */ 2783 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2784 for (i=0; i<a->A->cmap.n; i++) { 2785 larray[i] = i + A->cmap.rstart; 2786 } 2787 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2788 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2789 for (i=0; i<a->A->cmap.n; i++) { 2790 colors[i] = coloring->colors[larray[i]]; 2791 } 2792 ierr = PetscFree(larray);CHKERRQ(ierr); 2793 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2794 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2795 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2796 2797 /* set coloring for off-diagonal portion */ 2798 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2799 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2800 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2801 for (i=0; i<a->B->cmap.n; i++) { 2802 colors[i] = coloring->colors[larray[i]]; 2803 } 2804 ierr = PetscFree(larray);CHKERRQ(ierr); 2805 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2806 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2807 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2808 } else { 2809 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2810 } 2811 2812 PetscFunctionReturn(0); 2813 } 2814 2815 #if defined(PETSC_HAVE_ADIC) 2816 #undef __FUNCT__ 2817 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2818 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2819 { 2820 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2821 PetscErrorCode ierr; 2822 2823 PetscFunctionBegin; 2824 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2825 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2826 PetscFunctionReturn(0); 2827 } 2828 #endif 2829 2830 #undef __FUNCT__ 2831 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2832 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2833 { 2834 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2835 PetscErrorCode ierr; 2836 2837 PetscFunctionBegin; 2838 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2839 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2840 PetscFunctionReturn(0); 2841 } 2842 2843 #undef __FUNCT__ 2844 #define __FUNCT__ "MatMerge" 2845 /*@C 2846 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2847 matrices from each processor 2848 2849 Collective on MPI_Comm 2850 2851 Input Parameters: 2852 + comm - the communicators the parallel matrix will live on 2853 . inmat - the input sequential matrices 2854 . n - number of local columns (or PETSC_DECIDE) 2855 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2856 2857 Output Parameter: 2858 . outmat - the parallel matrix generated 2859 2860 Level: advanced 2861 2862 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2863 2864 @*/ 2865 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2866 { 2867 PetscErrorCode ierr; 2868 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 2869 PetscInt *indx; 2870 PetscScalar *values; 2871 2872 PetscFunctionBegin; 2873 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2874 if (scall == MAT_INITIAL_MATRIX){ 2875 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2876 if (n == PETSC_DECIDE){ 2877 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 2878 } 2879 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2880 rstart -= m; 2881 2882 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2883 for (i=0;i<m;i++) { 2884 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2885 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2886 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2887 } 2888 /* This routine will ONLY return MPIAIJ type matrix */ 2889 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2890 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2891 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2892 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2893 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2894 2895 } else if (scall == MAT_REUSE_MATRIX){ 2896 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2897 } else { 2898 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2899 } 2900 2901 for (i=0;i<m;i++) { 2902 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2903 Ii = i + rstart; 2904 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2905 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2906 } 2907 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2908 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2909 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2910 2911 PetscFunctionReturn(0); 2912 } 2913 2914 #undef __FUNCT__ 2915 #define __FUNCT__ "MatFileSplit" 2916 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2917 { 2918 PetscErrorCode ierr; 2919 PetscMPIInt rank; 2920 PetscInt m,N,i,rstart,nnz; 2921 size_t len; 2922 const PetscInt *indx; 2923 PetscViewer out; 2924 char *name; 2925 Mat B; 2926 const PetscScalar *values; 2927 2928 PetscFunctionBegin; 2929 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2930 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2931 /* Should this be the type of the diagonal block of A? */ 2932 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2933 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2934 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2935 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2936 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2937 for (i=0;i<m;i++) { 2938 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2939 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2940 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2941 } 2942 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2943 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2944 2945 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2946 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2947 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2948 sprintf(name,"%s.%d",outfile,rank); 2949 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 2950 ierr = PetscFree(name); 2951 ierr = MatView(B,out);CHKERRQ(ierr); 2952 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2953 ierr = MatDestroy(B);CHKERRQ(ierr); 2954 PetscFunctionReturn(0); 2955 } 2956 2957 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2958 #undef __FUNCT__ 2959 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2960 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2961 { 2962 PetscErrorCode ierr; 2963 Mat_Merge_SeqsToMPI *merge; 2964 PetscObjectContainer container; 2965 2966 PetscFunctionBegin; 2967 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2968 if (container) { 2969 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2970 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2971 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2972 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2973 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2974 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2975 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2976 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2977 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 2978 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 2979 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 2980 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 2981 2982 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2983 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2984 } 2985 ierr = PetscFree(merge);CHKERRQ(ierr); 2986 2987 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2988 PetscFunctionReturn(0); 2989 } 2990 2991 #include "src/mat/utils/freespace.h" 2992 #include "petscbt.h" 2993 static PetscEvent logkey_seqstompinum = 0; 2994 #undef __FUNCT__ 2995 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2996 /*@C 2997 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2998 matrices from each processor 2999 3000 Collective on MPI_Comm 3001 3002 Input Parameters: 3003 + comm - the communicators the parallel matrix will live on 3004 . seqmat - the input sequential matrices 3005 . m - number of local rows (or PETSC_DECIDE) 3006 . n - number of local columns (or PETSC_DECIDE) 3007 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3008 3009 Output Parameter: 3010 . mpimat - the parallel matrix generated 3011 3012 Level: advanced 3013 3014 Notes: 3015 The dimensions of the sequential matrix in each processor MUST be the same. 3016 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3017 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3018 @*/ 3019 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3020 { 3021 PetscErrorCode ierr; 3022 MPI_Comm comm=mpimat->comm; 3023 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3024 PetscMPIInt size,rank,taga,*len_s; 3025 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3026 PetscInt proc,m; 3027 PetscInt **buf_ri,**buf_rj; 3028 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3029 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3030 MPI_Request *s_waits,*r_waits; 3031 MPI_Status *status; 3032 MatScalar *aa=a->a,**abuf_r,*ba_i; 3033 Mat_Merge_SeqsToMPI *merge; 3034 PetscObjectContainer container; 3035 3036 PetscFunctionBegin; 3037 if (!logkey_seqstompinum) { 3038 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3039 } 3040 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3041 3042 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3043 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3044 3045 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3046 if (container) { 3047 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3048 } 3049 bi = merge->bi; 3050 bj = merge->bj; 3051 buf_ri = merge->buf_ri; 3052 buf_rj = merge->buf_rj; 3053 3054 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3055 owners = merge->rowmap.range; 3056 len_s = merge->len_s; 3057 3058 /* send and recv matrix values */ 3059 /*-----------------------------*/ 3060 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3061 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3062 3063 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3064 for (proc=0,k=0; proc<size; proc++){ 3065 if (!len_s[proc]) continue; 3066 i = owners[proc]; 3067 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3068 k++; 3069 } 3070 3071 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3072 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3073 ierr = PetscFree(status);CHKERRQ(ierr); 3074 3075 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3076 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3077 3078 /* insert mat values of mpimat */ 3079 /*----------------------------*/ 3080 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3081 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3082 nextrow = buf_ri_k + merge->nrecv; 3083 nextai = nextrow + merge->nrecv; 3084 3085 for (k=0; k<merge->nrecv; k++){ 3086 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3087 nrows = *(buf_ri_k[k]); 3088 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3089 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3090 } 3091 3092 /* set values of ba */ 3093 m = merge->rowmap.n; 3094 for (i=0; i<m; i++) { 3095 arow = owners[rank] + i; 3096 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3097 bnzi = bi[i+1] - bi[i]; 3098 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3099 3100 /* add local non-zero vals of this proc's seqmat into ba */ 3101 anzi = ai[arow+1] - ai[arow]; 3102 aj = a->j + ai[arow]; 3103 aa = a->a + ai[arow]; 3104 nextaj = 0; 3105 for (j=0; nextaj<anzi; j++){ 3106 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3107 ba_i[j] += aa[nextaj++]; 3108 } 3109 } 3110 3111 /* add received vals into ba */ 3112 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3113 /* i-th row */ 3114 if (i == *nextrow[k]) { 3115 anzi = *(nextai[k]+1) - *nextai[k]; 3116 aj = buf_rj[k] + *(nextai[k]); 3117 aa = abuf_r[k] + *(nextai[k]); 3118 nextaj = 0; 3119 for (j=0; nextaj<anzi; j++){ 3120 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3121 ba_i[j] += aa[nextaj++]; 3122 } 3123 } 3124 nextrow[k]++; nextai[k]++; 3125 } 3126 } 3127 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3128 } 3129 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3130 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3131 3132 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3133 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3134 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3135 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3136 PetscFunctionReturn(0); 3137 } 3138 3139 static PetscEvent logkey_seqstompisym = 0; 3140 #undef __FUNCT__ 3141 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3142 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3143 { 3144 PetscErrorCode ierr; 3145 Mat B_mpi; 3146 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3147 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3148 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3149 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3150 PetscInt len,proc,*dnz,*onz; 3151 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3152 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3153 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3154 MPI_Status *status; 3155 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3156 PetscBT lnkbt; 3157 Mat_Merge_SeqsToMPI *merge; 3158 PetscObjectContainer container; 3159 3160 PetscFunctionBegin; 3161 if (!logkey_seqstompisym) { 3162 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3163 } 3164 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3165 3166 /* make sure it is a PETSc comm */ 3167 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3168 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3169 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3170 3171 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3172 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3173 3174 /* determine row ownership */ 3175 /*---------------------------------------------------------*/ 3176 merge->rowmap.n = m; 3177 merge->rowmap.N = M; 3178 merge->rowmap.bs = 1; 3179 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3180 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3181 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3182 3183 m = merge->rowmap.n; 3184 M = merge->rowmap.N; 3185 owners = merge->rowmap.range; 3186 3187 /* determine the number of messages to send, their lengths */ 3188 /*---------------------------------------------------------*/ 3189 len_s = merge->len_s; 3190 3191 len = 0; /* length of buf_si[] */ 3192 merge->nsend = 0; 3193 for (proc=0; proc<size; proc++){ 3194 len_si[proc] = 0; 3195 if (proc == rank){ 3196 len_s[proc] = 0; 3197 } else { 3198 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3199 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3200 } 3201 if (len_s[proc]) { 3202 merge->nsend++; 3203 nrows = 0; 3204 for (i=owners[proc]; i<owners[proc+1]; i++){ 3205 if (ai[i+1] > ai[i]) nrows++; 3206 } 3207 len_si[proc] = 2*(nrows+1); 3208 len += len_si[proc]; 3209 } 3210 } 3211 3212 /* determine the number and length of messages to receive for ij-structure */ 3213 /*-------------------------------------------------------------------------*/ 3214 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3215 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3216 3217 /* post the Irecv of j-structure */ 3218 /*-------------------------------*/ 3219 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3220 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3221 3222 /* post the Isend of j-structure */ 3223 /*--------------------------------*/ 3224 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3225 sj_waits = si_waits + merge->nsend; 3226 3227 for (proc=0, k=0; proc<size; proc++){ 3228 if (!len_s[proc]) continue; 3229 i = owners[proc]; 3230 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3231 k++; 3232 } 3233 3234 /* receives and sends of j-structure are complete */ 3235 /*------------------------------------------------*/ 3236 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3237 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3238 3239 /* send and recv i-structure */ 3240 /*---------------------------*/ 3241 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3242 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3243 3244 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3245 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3246 for (proc=0,k=0; proc<size; proc++){ 3247 if (!len_s[proc]) continue; 3248 /* form outgoing message for i-structure: 3249 buf_si[0]: nrows to be sent 3250 [1:nrows]: row index (global) 3251 [nrows+1:2*nrows+1]: i-structure index 3252 */ 3253 /*-------------------------------------------*/ 3254 nrows = len_si[proc]/2 - 1; 3255 buf_si_i = buf_si + nrows+1; 3256 buf_si[0] = nrows; 3257 buf_si_i[0] = 0; 3258 nrows = 0; 3259 for (i=owners[proc]; i<owners[proc+1]; i++){ 3260 anzi = ai[i+1] - ai[i]; 3261 if (anzi) { 3262 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3263 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3264 nrows++; 3265 } 3266 } 3267 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3268 k++; 3269 buf_si += len_si[proc]; 3270 } 3271 3272 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3273 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3274 3275 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3276 for (i=0; i<merge->nrecv; i++){ 3277 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3278 } 3279 3280 ierr = PetscFree(len_si);CHKERRQ(ierr); 3281 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3282 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3283 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3284 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3285 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3286 ierr = PetscFree(status);CHKERRQ(ierr); 3287 3288 /* compute a local seq matrix in each processor */ 3289 /*----------------------------------------------*/ 3290 /* allocate bi array and free space for accumulating nonzero column info */ 3291 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3292 bi[0] = 0; 3293 3294 /* create and initialize a linked list */ 3295 nlnk = N+1; 3296 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3297 3298 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3299 len = 0; 3300 len = ai[owners[rank+1]] - ai[owners[rank]]; 3301 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3302 current_space = free_space; 3303 3304 /* determine symbolic info for each local row */ 3305 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3306 nextrow = buf_ri_k + merge->nrecv; 3307 nextai = nextrow + merge->nrecv; 3308 for (k=0; k<merge->nrecv; k++){ 3309 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3310 nrows = *buf_ri_k[k]; 3311 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3312 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3313 } 3314 3315 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3316 len = 0; 3317 for (i=0;i<m;i++) { 3318 bnzi = 0; 3319 /* add local non-zero cols of this proc's seqmat into lnk */ 3320 arow = owners[rank] + i; 3321 anzi = ai[arow+1] - ai[arow]; 3322 aj = a->j + ai[arow]; 3323 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3324 bnzi += nlnk; 3325 /* add received col data into lnk */ 3326 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3327 if (i == *nextrow[k]) { /* i-th row */ 3328 anzi = *(nextai[k]+1) - *nextai[k]; 3329 aj = buf_rj[k] + *nextai[k]; 3330 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3331 bnzi += nlnk; 3332 nextrow[k]++; nextai[k]++; 3333 } 3334 } 3335 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3336 3337 /* if free space is not available, make more free space */ 3338 if (current_space->local_remaining<bnzi) { 3339 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3340 nspacedouble++; 3341 } 3342 /* copy data into free space, then initialize lnk */ 3343 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3344 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3345 3346 current_space->array += bnzi; 3347 current_space->local_used += bnzi; 3348 current_space->local_remaining -= bnzi; 3349 3350 bi[i+1] = bi[i] + bnzi; 3351 } 3352 3353 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3354 3355 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3356 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3357 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3358 3359 /* create symbolic parallel matrix B_mpi */ 3360 /*---------------------------------------*/ 3361 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3362 if (n==PETSC_DECIDE) { 3363 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3364 } else { 3365 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3366 } 3367 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3368 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3369 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3370 3371 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3372 B_mpi->assembled = PETSC_FALSE; 3373 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3374 merge->bi = bi; 3375 merge->bj = bj; 3376 merge->buf_ri = buf_ri; 3377 merge->buf_rj = buf_rj; 3378 merge->coi = PETSC_NULL; 3379 merge->coj = PETSC_NULL; 3380 merge->owners_co = PETSC_NULL; 3381 3382 /* attach the supporting struct to B_mpi for reuse */ 3383 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3384 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3385 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3386 *mpimat = B_mpi; 3387 3388 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3389 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3390 PetscFunctionReturn(0); 3391 } 3392 3393 static PetscEvent logkey_seqstompi = 0; 3394 #undef __FUNCT__ 3395 #define __FUNCT__ "MatMerge_SeqsToMPI" 3396 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3397 { 3398 PetscErrorCode ierr; 3399 3400 PetscFunctionBegin; 3401 if (!logkey_seqstompi) { 3402 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3403 } 3404 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3405 if (scall == MAT_INITIAL_MATRIX){ 3406 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3407 } 3408 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3409 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3410 PetscFunctionReturn(0); 3411 } 3412 static PetscEvent logkey_getlocalmat = 0; 3413 #undef __FUNCT__ 3414 #define __FUNCT__ "MatGetLocalMat" 3415 /*@C 3416 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3417 3418 Not Collective 3419 3420 Input Parameters: 3421 + A - the matrix 3422 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3423 3424 Output Parameter: 3425 . A_loc - the local sequential matrix generated 3426 3427 Level: developer 3428 3429 @*/ 3430 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3431 { 3432 PetscErrorCode ierr; 3433 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3434 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3435 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3436 PetscScalar *aa=a->a,*ba=b->a,*ca; 3437 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3438 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3439 3440 PetscFunctionBegin; 3441 if (!logkey_getlocalmat) { 3442 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3443 } 3444 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3445 if (scall == MAT_INITIAL_MATRIX){ 3446 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3447 ci[0] = 0; 3448 for (i=0; i<am; i++){ 3449 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3450 } 3451 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3452 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3453 k = 0; 3454 for (i=0; i<am; i++) { 3455 ncols_o = bi[i+1] - bi[i]; 3456 ncols_d = ai[i+1] - ai[i]; 3457 /* off-diagonal portion of A */ 3458 for (jo=0; jo<ncols_o; jo++) { 3459 col = cmap[*bj]; 3460 if (col >= cstart) break; 3461 cj[k] = col; bj++; 3462 ca[k++] = *ba++; 3463 } 3464 /* diagonal portion of A */ 3465 for (j=0; j<ncols_d; j++) { 3466 cj[k] = cstart + *aj++; 3467 ca[k++] = *aa++; 3468 } 3469 /* off-diagonal portion of A */ 3470 for (j=jo; j<ncols_o; j++) { 3471 cj[k] = cmap[*bj++]; 3472 ca[k++] = *ba++; 3473 } 3474 } 3475 /* put together the new matrix */ 3476 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3477 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3478 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3479 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3480 mat->free_a = PETSC_TRUE; 3481 mat->free_ij = PETSC_TRUE; 3482 mat->nonew = 0; 3483 } else if (scall == MAT_REUSE_MATRIX){ 3484 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3485 ci = mat->i; cj = mat->j; ca = mat->a; 3486 for (i=0; i<am; i++) { 3487 /* off-diagonal portion of A */ 3488 ncols_o = bi[i+1] - bi[i]; 3489 for (jo=0; jo<ncols_o; jo++) { 3490 col = cmap[*bj]; 3491 if (col >= cstart) break; 3492 *ca++ = *ba++; bj++; 3493 } 3494 /* diagonal portion of A */ 3495 ncols_d = ai[i+1] - ai[i]; 3496 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3497 /* off-diagonal portion of A */ 3498 for (j=jo; j<ncols_o; j++) { 3499 *ca++ = *ba++; bj++; 3500 } 3501 } 3502 } else { 3503 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3504 } 3505 3506 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3507 PetscFunctionReturn(0); 3508 } 3509 3510 static PetscEvent logkey_getlocalmatcondensed = 0; 3511 #undef __FUNCT__ 3512 #define __FUNCT__ "MatGetLocalMatCondensed" 3513 /*@C 3514 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3515 3516 Not Collective 3517 3518 Input Parameters: 3519 + A - the matrix 3520 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3521 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3522 3523 Output Parameter: 3524 . A_loc - the local sequential matrix generated 3525 3526 Level: developer 3527 3528 @*/ 3529 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3530 { 3531 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3532 PetscErrorCode ierr; 3533 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3534 IS isrowa,iscola; 3535 Mat *aloc; 3536 3537 PetscFunctionBegin; 3538 if (!logkey_getlocalmatcondensed) { 3539 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3540 } 3541 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3542 if (!row){ 3543 start = A->rmap.rstart; end = A->rmap.rend; 3544 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3545 } else { 3546 isrowa = *row; 3547 } 3548 if (!col){ 3549 start = A->cmap.rstart; 3550 cmap = a->garray; 3551 nzA = a->A->cmap.n; 3552 nzB = a->B->cmap.n; 3553 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3554 ncols = 0; 3555 for (i=0; i<nzB; i++) { 3556 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3557 else break; 3558 } 3559 imark = i; 3560 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3561 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3562 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3563 ierr = PetscFree(idx);CHKERRQ(ierr); 3564 } else { 3565 iscola = *col; 3566 } 3567 if (scall != MAT_INITIAL_MATRIX){ 3568 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3569 aloc[0] = *A_loc; 3570 } 3571 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3572 *A_loc = aloc[0]; 3573 ierr = PetscFree(aloc);CHKERRQ(ierr); 3574 if (!row){ 3575 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3576 } 3577 if (!col){ 3578 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3579 } 3580 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3581 PetscFunctionReturn(0); 3582 } 3583 3584 static PetscEvent logkey_GetBrowsOfAcols = 0; 3585 #undef __FUNCT__ 3586 #define __FUNCT__ "MatGetBrowsOfAcols" 3587 /*@C 3588 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3589 3590 Collective on Mat 3591 3592 Input Parameters: 3593 + A,B - the matrices in mpiaij format 3594 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3595 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3596 3597 Output Parameter: 3598 + rowb, colb - index sets of rows and columns of B to extract 3599 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 3600 - B_seq - the sequential matrix generated 3601 3602 Level: developer 3603 3604 @*/ 3605 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3606 { 3607 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3608 PetscErrorCode ierr; 3609 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3610 IS isrowb,iscolb; 3611 Mat *bseq; 3612 3613 PetscFunctionBegin; 3614 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3615 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3616 } 3617 if (!logkey_GetBrowsOfAcols) { 3618 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3619 } 3620 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3621 3622 if (scall == MAT_INITIAL_MATRIX){ 3623 start = A->cmap.rstart; 3624 cmap = a->garray; 3625 nzA = a->A->cmap.n; 3626 nzB = a->B->cmap.n; 3627 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3628 ncols = 0; 3629 for (i=0; i<nzB; i++) { /* row < local row index */ 3630 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3631 else break; 3632 } 3633 imark = i; 3634 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3635 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3636 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3637 ierr = PetscFree(idx);CHKERRQ(ierr); 3638 *brstart = imark; 3639 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 3640 } else { 3641 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3642 isrowb = *rowb; iscolb = *colb; 3643 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3644 bseq[0] = *B_seq; 3645 } 3646 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3647 *B_seq = bseq[0]; 3648 ierr = PetscFree(bseq);CHKERRQ(ierr); 3649 if (!rowb){ 3650 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3651 } else { 3652 *rowb = isrowb; 3653 } 3654 if (!colb){ 3655 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3656 } else { 3657 *colb = iscolb; 3658 } 3659 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3660 PetscFunctionReturn(0); 3661 } 3662 3663 static PetscEvent logkey_GetBrowsOfAocols = 0; 3664 #undef __FUNCT__ 3665 #define __FUNCT__ "MatGetBrowsOfAoCols" 3666 /*@C 3667 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3668 of the OFF-DIAGONAL portion of local A 3669 3670 Collective on Mat 3671 3672 Input Parameters: 3673 + A,B - the matrices in mpiaij format 3674 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3675 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3676 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3677 3678 Output Parameter: 3679 + B_oth - the sequential matrix generated 3680 3681 Level: developer 3682 3683 @*/ 3684 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3685 { 3686 VecScatter_MPI_General *gen_to,*gen_from; 3687 PetscErrorCode ierr; 3688 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3689 Mat_SeqAIJ *b_oth; 3690 VecScatter ctx=a->Mvctx; 3691 MPI_Comm comm=ctx->comm; 3692 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3693 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 3694 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3695 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3696 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 3697 MPI_Status *sstatus,rstatus; 3698 PetscInt *cols; 3699 PetscScalar *vals; 3700 PetscMPIInt j; 3701 3702 PetscFunctionBegin; 3703 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3704 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3705 } 3706 if (!logkey_GetBrowsOfAocols) { 3707 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3708 } 3709 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3710 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3711 3712 gen_to = (VecScatter_MPI_General*)ctx->todata; 3713 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3714 rvalues = gen_from->values; /* holds the length of sending row */ 3715 svalues = gen_to->values; /* holds the length of receiving row */ 3716 nrecvs = gen_from->n; 3717 nsends = gen_to->n; 3718 3719 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 3720 srow = gen_to->indices; /* local row index to be sent */ 3721 rstarts = gen_from->starts; 3722 sstarts = gen_to->starts; 3723 rprocs = gen_from->procs; 3724 sprocs = gen_to->procs; 3725 sstatus = gen_to->sstatus; 3726 3727 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3728 if (scall == MAT_INITIAL_MATRIX){ 3729 /* i-array */ 3730 /*---------*/ 3731 /* post receives */ 3732 for (i=0; i<nrecvs; i++){ 3733 rowlen = (PetscInt*)rvalues + rstarts[i]; 3734 nrows = rstarts[i+1]-rstarts[i]; 3735 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3736 } 3737 3738 /* pack the outgoing message */ 3739 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3740 rstartsj = sstartsj + nsends +1; 3741 sstartsj[0] = 0; rstartsj[0] = 0; 3742 len = 0; /* total length of j or a array to be sent */ 3743 k = 0; 3744 for (i=0; i<nsends; i++){ 3745 rowlen = (PetscInt*)svalues + sstarts[i]; 3746 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3747 for (j=0; j<nrows; j++) { 3748 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 3749 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3750 len += rowlen[j]; 3751 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3752 k++; 3753 } 3754 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3755 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3756 } 3757 /* recvs and sends of i-array are completed */ 3758 i = nrecvs; 3759 while (i--) { 3760 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3761 } 3762 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3763 /* allocate buffers for sending j and a arrays */ 3764 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3765 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3766 3767 /* create i-array of B_oth */ 3768 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3769 b_othi[0] = 0; 3770 len = 0; /* total length of j or a array to be received */ 3771 k = 0; 3772 for (i=0; i<nrecvs; i++){ 3773 rowlen = (PetscInt*)rvalues + rstarts[i]; 3774 nrows = rstarts[i+1]-rstarts[i]; 3775 for (j=0; j<nrows; j++) { 3776 b_othi[k+1] = b_othi[k] + rowlen[j]; 3777 len += rowlen[j]; k++; 3778 } 3779 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3780 } 3781 3782 /* allocate space for j and a arrrays of B_oth */ 3783 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3784 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3785 3786 /* j-array */ 3787 /*---------*/ 3788 /* post receives of j-array */ 3789 for (i=0; i<nrecvs; i++){ 3790 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3791 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3792 } 3793 k = 0; 3794 for (i=0; i<nsends; i++){ 3795 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3796 bufJ = bufj+sstartsj[i]; 3797 for (j=0; j<nrows; j++) { 3798 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3799 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3800 for (l=0; l<ncols; l++){ 3801 *bufJ++ = cols[l]; 3802 } 3803 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3804 } 3805 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3806 } 3807 3808 /* recvs and sends of j-array are completed */ 3809 i = nrecvs; 3810 while (i--) { 3811 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3812 } 3813 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3814 } else if (scall == MAT_REUSE_MATRIX){ 3815 sstartsj = *startsj; 3816 rstartsj = sstartsj + nsends +1; 3817 bufa = *bufa_ptr; 3818 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3819 b_otha = b_oth->a; 3820 } else { 3821 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3822 } 3823 3824 /* a-array */ 3825 /*---------*/ 3826 /* post receives of a-array */ 3827 for (i=0; i<nrecvs; i++){ 3828 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3829 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3830 } 3831 k = 0; 3832 for (i=0; i<nsends; i++){ 3833 nrows = sstarts[i+1]-sstarts[i]; 3834 bufA = bufa+sstartsj[i]; 3835 for (j=0; j<nrows; j++) { 3836 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3837 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3838 for (l=0; l<ncols; l++){ 3839 *bufA++ = vals[l]; 3840 } 3841 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3842 3843 } 3844 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3845 } 3846 /* recvs and sends of a-array are completed */ 3847 i = nrecvs; 3848 while (i--) { 3849 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3850 } 3851 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3852 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 3853 3854 if (scall == MAT_INITIAL_MATRIX){ 3855 /* put together the new matrix */ 3856 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3857 3858 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3859 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3860 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3861 b_oth->free_a = PETSC_TRUE; 3862 b_oth->free_ij = PETSC_TRUE; 3863 b_oth->nonew = 0; 3864 3865 ierr = PetscFree(bufj);CHKERRQ(ierr); 3866 if (!startsj || !bufa_ptr){ 3867 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3868 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3869 } else { 3870 *startsj = sstartsj; 3871 *bufa_ptr = bufa; 3872 } 3873 } 3874 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3875 3876 PetscFunctionReturn(0); 3877 } 3878 3879 #undef __FUNCT__ 3880 #define __FUNCT__ "MatGetCommunicationStructs" 3881 /*@C 3882 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 3883 3884 Not Collective 3885 3886 Input Parameters: 3887 . A - The matrix in mpiaij format 3888 3889 Output Parameter: 3890 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 3891 . colmap - A map from global column index to local index into lvec 3892 - multScatter - A scatter from the argument of a matrix-vector product to lvec 3893 3894 Level: developer 3895 3896 @*/ 3897 #if defined (PETSC_USE_CTABLE) 3898 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 3899 #else 3900 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 3901 #endif 3902 { 3903 Mat_MPIAIJ *a; 3904 3905 PetscFunctionBegin; 3906 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 3907 PetscValidPointer(lvec, 2) 3908 PetscValidPointer(colmap, 3) 3909 PetscValidPointer(multScatter, 4) 3910 a = (Mat_MPIAIJ *) A->data; 3911 if (lvec) *lvec = a->lvec; 3912 if (colmap) *colmap = a->colmap; 3913 if (multScatter) *multScatter = a->Mvctx; 3914 PetscFunctionReturn(0); 3915 } 3916 3917 /*MC 3918 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3919 3920 Options Database Keys: 3921 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3922 3923 Level: beginner 3924 3925 .seealso: MatCreateMPIAIJ 3926 M*/ 3927 3928 EXTERN_C_BEGIN 3929 #undef __FUNCT__ 3930 #define __FUNCT__ "MatCreate_MPIAIJ" 3931 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3932 { 3933 Mat_MPIAIJ *b; 3934 PetscErrorCode ierr; 3935 PetscMPIInt size; 3936 3937 PetscFunctionBegin; 3938 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3939 3940 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3941 B->data = (void*)b; 3942 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3943 B->factor = 0; 3944 B->rmap.bs = 1; 3945 B->assembled = PETSC_FALSE; 3946 B->mapping = 0; 3947 3948 B->insertmode = NOT_SET_VALUES; 3949 b->size = size; 3950 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3951 3952 /* build cache for off array entries formed */ 3953 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3954 b->donotstash = PETSC_FALSE; 3955 b->colmap = 0; 3956 b->garray = 0; 3957 b->roworiented = PETSC_TRUE; 3958 3959 /* stuff used for matrix vector multiply */ 3960 b->lvec = PETSC_NULL; 3961 b->Mvctx = PETSC_NULL; 3962 3963 /* stuff for MatGetRow() */ 3964 b->rowindices = 0; 3965 b->rowvalues = 0; 3966 b->getrowactive = PETSC_FALSE; 3967 3968 3969 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3970 "MatStoreValues_MPIAIJ", 3971 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3972 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3973 "MatRetrieveValues_MPIAIJ", 3974 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3975 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3976 "MatGetDiagonalBlock_MPIAIJ", 3977 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3978 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3979 "MatIsTranspose_MPIAIJ", 3980 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3981 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3982 "MatMPIAIJSetPreallocation_MPIAIJ", 3983 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3984 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3985 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3986 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3987 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3988 "MatDiagonalScaleLocal_MPIAIJ", 3989 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3990 PetscFunctionReturn(0); 3991 } 3992 EXTERN_C_END 3993 3994 /* 3995 Special version for direct calls from Fortran 3996 */ 3997 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3998 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 3999 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4000 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4001 #endif 4002 4003 /* Change these macros so can be used in void function */ 4004 #undef CHKERRQ 4005 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4006 #undef SETERRQ2 4007 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4008 #undef SETERRQ 4009 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4010 4011 EXTERN_C_BEGIN 4012 #undef __FUNCT__ 4013 #define __FUNCT__ "matsetvaluesmpiaij_" 4014 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4015 { 4016 Mat mat = *mmat; 4017 PetscInt m = *mm, n = *mn; 4018 InsertMode addv = *maddv; 4019 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4020 PetscScalar value; 4021 PetscErrorCode ierr; 4022 4023 MatPreallocated(mat); 4024 if (mat->insertmode == NOT_SET_VALUES) { 4025 mat->insertmode = addv; 4026 } 4027 #if defined(PETSC_USE_DEBUG) 4028 else if (mat->insertmode != addv) { 4029 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4030 } 4031 #endif 4032 { 4033 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4034 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4035 PetscTruth roworiented = aij->roworiented; 4036 4037 /* Some Variables required in the macro */ 4038 Mat A = aij->A; 4039 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4040 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4041 PetscScalar *aa = a->a; 4042 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4043 Mat B = aij->B; 4044 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4045 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4046 PetscScalar *ba = b->a; 4047 4048 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4049 PetscInt nonew = a->nonew; 4050 PetscScalar *ap1,*ap2; 4051 4052 PetscFunctionBegin; 4053 for (i=0; i<m; i++) { 4054 if (im[i] < 0) continue; 4055 #if defined(PETSC_USE_DEBUG) 4056 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4057 #endif 4058 if (im[i] >= rstart && im[i] < rend) { 4059 row = im[i] - rstart; 4060 lastcol1 = -1; 4061 rp1 = aj + ai[row]; 4062 ap1 = aa + ai[row]; 4063 rmax1 = aimax[row]; 4064 nrow1 = ailen[row]; 4065 low1 = 0; 4066 high1 = nrow1; 4067 lastcol2 = -1; 4068 rp2 = bj + bi[row]; 4069 ap2 = ba + bi[row]; 4070 rmax2 = bimax[row]; 4071 nrow2 = bilen[row]; 4072 low2 = 0; 4073 high2 = nrow2; 4074 4075 for (j=0; j<n; j++) { 4076 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4077 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4078 if (in[j] >= cstart && in[j] < cend){ 4079 col = in[j] - cstart; 4080 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4081 } else if (in[j] < 0) continue; 4082 #if defined(PETSC_USE_DEBUG) 4083 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4084 #endif 4085 else { 4086 if (mat->was_assembled) { 4087 if (!aij->colmap) { 4088 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4089 } 4090 #if defined (PETSC_USE_CTABLE) 4091 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4092 col--; 4093 #else 4094 col = aij->colmap[in[j]] - 1; 4095 #endif 4096 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4097 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4098 col = in[j]; 4099 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4100 B = aij->B; 4101 b = (Mat_SeqAIJ*)B->data; 4102 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4103 rp2 = bj + bi[row]; 4104 ap2 = ba + bi[row]; 4105 rmax2 = bimax[row]; 4106 nrow2 = bilen[row]; 4107 low2 = 0; 4108 high2 = nrow2; 4109 bm = aij->B->rmap.n; 4110 ba = b->a; 4111 } 4112 } else col = in[j]; 4113 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4114 } 4115 } 4116 } else { 4117 if (!aij->donotstash) { 4118 if (roworiented) { 4119 if (ignorezeroentries && v[i*n] == 0.0) continue; 4120 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4121 } else { 4122 if (ignorezeroentries && v[i] == 0.0) continue; 4123 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4124 } 4125 } 4126 } 4127 }} 4128 PetscFunctionReturnVoid(); 4129 } 4130 EXTERN_C_END 4131