1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 141 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->m; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 246 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscVerboseInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 358 b = (Mat_SeqAIJ *)aij->B->data; 359 360 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 361 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 362 } 363 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 364 b->compressedrow.use = PETSC_TRUE; 365 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 366 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 367 368 if (aij->rowvalues) { 369 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 370 aij->rowvalues = 0; 371 } 372 373 /* used by MatAXPY() */ 374 a->xtoy = 0; b->xtoy = 0; 375 a->XtoY = 0; b->XtoY = 0; 376 377 PetscFunctionReturn(0); 378 } 379 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 382 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 383 { 384 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 385 PetscErrorCode ierr; 386 387 PetscFunctionBegin; 388 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 389 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNCT__ 394 #define __FUNCT__ "MatZeroRows_MPIAIJ" 395 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 396 { 397 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 398 PetscErrorCode ierr; 399 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 400 PetscInt i,*owners = l->rowners; 401 PetscInt *nprocs,j,idx,nsends,row; 402 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 403 PetscInt *rvalues,count,base,slen,*source; 404 PetscInt *lens,*lrows,*values,rstart=l->rstart; 405 MPI_Comm comm = A->comm; 406 MPI_Request *send_waits,*recv_waits; 407 MPI_Status recv_status,*send_status; 408 #if defined(PETSC_DEBUG) 409 PetscTruth found = PETSC_FALSE; 410 #endif 411 412 PetscFunctionBegin; 413 /* first count number of contributors to each processor */ 414 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 415 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 416 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 417 j = 0; 418 for (i=0; i<N; i++) { 419 if (lastidx > (idx = rows[i])) j = 0; 420 lastidx = idx; 421 for (; j<size; j++) { 422 if (idx >= owners[j] && idx < owners[j+1]) { 423 nprocs[2*j]++; 424 nprocs[2*j+1] = 1; 425 owner[i] = j; 426 #if defined(PETSC_DEBUG) 427 found = PETSC_TRUE; 428 #endif 429 break; 430 } 431 } 432 #if defined(PETSC_DEBUG) 433 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 434 found = PETSC_FALSE; 435 #endif 436 } 437 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 438 439 /* inform other processors of number of messages and max length*/ 440 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 441 442 /* post receives: */ 443 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 444 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 445 for (i=0; i<nrecvs; i++) { 446 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 447 } 448 449 /* do sends: 450 1) starts[i] gives the starting index in svalues for stuff going to 451 the ith processor 452 */ 453 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 454 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 455 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 456 starts[0] = 0; 457 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 458 for (i=0; i<N; i++) { 459 svalues[starts[owner[i]]++] = rows[i]; 460 } 461 462 starts[0] = 0; 463 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 464 count = 0; 465 for (i=0; i<size; i++) { 466 if (nprocs[2*i+1]) { 467 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 468 } 469 } 470 ierr = PetscFree(starts);CHKERRQ(ierr); 471 472 base = owners[rank]; 473 474 /* wait on receives */ 475 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 476 source = lens + nrecvs; 477 count = nrecvs; slen = 0; 478 while (count) { 479 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 480 /* unpack receives into our local space */ 481 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 482 source[imdex] = recv_status.MPI_SOURCE; 483 lens[imdex] = n; 484 slen += n; 485 count--; 486 } 487 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 488 489 /* move the data into the send scatter */ 490 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 491 count = 0; 492 for (i=0; i<nrecvs; i++) { 493 values = rvalues + i*nmax; 494 for (j=0; j<lens[i]; j++) { 495 lrows[count++] = values[j] - base; 496 } 497 } 498 ierr = PetscFree(rvalues);CHKERRQ(ierr); 499 ierr = PetscFree(lens);CHKERRQ(ierr); 500 ierr = PetscFree(owner);CHKERRQ(ierr); 501 ierr = PetscFree(nprocs);CHKERRQ(ierr); 502 503 /* actually zap the local rows */ 504 /* 505 Zero the required rows. If the "diagonal block" of the matrix 506 is square and the user wishes to set the diagonal we use separate 507 code so that MatSetValues() is not called for each diagonal allocating 508 new memory, thus calling lots of mallocs and slowing things down. 509 510 Contributed by: Matthew Knepley 511 */ 512 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 513 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 514 if ((diag != 0.0) && (l->A->M == l->A->N)) { 515 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 516 } else if (diag != 0.0) { 517 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 518 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 519 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 520 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 521 } 522 for (i = 0; i < slen; i++) { 523 row = lrows[i] + rstart; 524 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 525 } 526 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 } else { 529 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 530 } 531 ierr = PetscFree(lrows);CHKERRQ(ierr); 532 533 /* wait on sends */ 534 if (nsends) { 535 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 536 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 537 ierr = PetscFree(send_status);CHKERRQ(ierr); 538 } 539 ierr = PetscFree(send_waits);CHKERRQ(ierr); 540 ierr = PetscFree(svalues);CHKERRQ(ierr); 541 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatMult_MPIAIJ" 547 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 548 { 549 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 550 PetscErrorCode ierr; 551 PetscInt nt; 552 553 PetscFunctionBegin; 554 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 555 if (nt != A->n) { 556 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 557 } 558 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 559 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 560 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 561 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 562 PetscFunctionReturn(0); 563 } 564 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatMultAdd_MPIAIJ" 567 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 568 { 569 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 570 PetscErrorCode ierr; 571 572 PetscFunctionBegin; 573 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 574 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 575 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 576 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 582 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 583 { 584 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 585 PetscErrorCode ierr; 586 PetscTruth merged; 587 588 PetscFunctionBegin; 589 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 590 /* do nondiagonal part */ 591 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 592 if (!merged) { 593 /* send it on its way */ 594 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 595 /* do local part */ 596 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 597 /* receive remote parts: note this assumes the values are not actually */ 598 /* added in yy until the next line, */ 599 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 600 } else { 601 /* do local part */ 602 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 603 /* send it on its way */ 604 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 605 /* values actually were received in the Begin() but we need to call this nop */ 606 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 607 } 608 PetscFunctionReturn(0); 609 } 610 611 EXTERN_C_BEGIN 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 614 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 615 { 616 MPI_Comm comm; 617 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 618 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 619 IS Me,Notme; 620 PetscErrorCode ierr; 621 PetscInt M,N,first,last,*notme,i; 622 PetscMPIInt size; 623 624 PetscFunctionBegin; 625 626 /* Easy test: symmetric diagonal block */ 627 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 628 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 629 if (!*f) PetscFunctionReturn(0); 630 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 631 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 632 if (size == 1) PetscFunctionReturn(0); 633 634 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 635 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 636 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 637 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 638 for (i=0; i<first; i++) notme[i] = i; 639 for (i=last; i<M; i++) notme[i-last+first] = i; 640 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 641 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 642 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 643 Aoff = Aoffs[0]; 644 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 645 Boff = Boffs[0]; 646 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 647 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 648 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 649 ierr = ISDestroy(Me);CHKERRQ(ierr); 650 ierr = ISDestroy(Notme);CHKERRQ(ierr); 651 652 PetscFunctionReturn(0); 653 } 654 EXTERN_C_END 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 658 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 659 { 660 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 661 PetscErrorCode ierr; 662 663 PetscFunctionBegin; 664 /* do nondiagonal part */ 665 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 666 /* send it on its way */ 667 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 668 /* do local part */ 669 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 670 /* receive remote parts */ 671 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This only works correctly for square matrices where the subblock A->A is the 677 diagonal block 678 */ 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 681 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 682 { 683 PetscErrorCode ierr; 684 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 685 686 PetscFunctionBegin; 687 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 688 if (a->rstart != a->cstart || a->rend != a->cend) { 689 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 690 } 691 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatScale_MPIAIJ" 697 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 698 { 699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 700 PetscErrorCode ierr; 701 702 PetscFunctionBegin; 703 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 704 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatDestroy_MPIAIJ" 710 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 711 { 712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 713 PetscErrorCode ierr; 714 715 PetscFunctionBegin; 716 #if defined(PETSC_USE_LOG) 717 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 718 #endif 719 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 720 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 721 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 722 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 723 #if defined (PETSC_USE_CTABLE) 724 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 725 #else 726 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 727 #endif 728 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 729 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 730 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 731 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 732 ierr = PetscFree(aij);CHKERRQ(ierr); 733 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 737 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 738 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 739 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 740 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatView_MPIAIJ_Binary" 746 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 747 { 748 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 749 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 750 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 751 PetscErrorCode ierr; 752 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 753 int fd; 754 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 755 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 756 PetscScalar *column_values; 757 758 PetscFunctionBegin; 759 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 760 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 761 nz = A->nz + B->nz; 762 if (!rank) { 763 header[0] = MAT_FILE_COOKIE; 764 header[1] = mat->M; 765 header[2] = mat->N; 766 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 767 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 768 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 769 /* get largest number of rows any processor has */ 770 rlen = mat->m; 771 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 772 for (i=1; i<size; i++) { 773 rlen = PetscMax(rlen,range[i+1] - range[i]); 774 } 775 } else { 776 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 777 rlen = mat->m; 778 } 779 780 /* load up the local row counts */ 781 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 782 for (i=0; i<mat->m; i++) { 783 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 784 } 785 786 /* store the row lengths to the file */ 787 if (!rank) { 788 MPI_Status status; 789 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 for (i=1; i<size; i++) { 791 rlen = range[i+1] - range[i]; 792 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 793 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 794 } 795 } else { 796 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 797 } 798 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 799 800 /* load up the local column indices */ 801 nzmax = nz; /* )th processor needs space a largest processor needs */ 802 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 803 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 804 cnt = 0; 805 for (i=0; i<mat->m; i++) { 806 for (j=B->i[i]; j<B->i[i+1]; j++) { 807 if ( (col = garray[B->j[j]]) > cstart) break; 808 column_indices[cnt++] = col; 809 } 810 for (k=A->i[i]; k<A->i[i+1]; k++) { 811 column_indices[cnt++] = A->j[k] + cstart; 812 } 813 for (; j<B->i[i+1]; j++) { 814 column_indices[cnt++] = garray[B->j[j]]; 815 } 816 } 817 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 818 819 /* store the column indices to the file */ 820 if (!rank) { 821 MPI_Status status; 822 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 823 for (i=1; i<size; i++) { 824 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 825 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 826 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 827 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 828 } 829 } else { 830 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 831 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 832 } 833 ierr = PetscFree(column_indices);CHKERRQ(ierr); 834 835 /* load up the local column values */ 836 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 837 cnt = 0; 838 for (i=0; i<mat->m; i++) { 839 for (j=B->i[i]; j<B->i[i+1]; j++) { 840 if ( garray[B->j[j]] > cstart) break; 841 column_values[cnt++] = B->a[j]; 842 } 843 for (k=A->i[i]; k<A->i[i+1]; k++) { 844 column_values[cnt++] = A->a[k]; 845 } 846 for (; j<B->i[i+1]; j++) { 847 column_values[cnt++] = B->a[j]; 848 } 849 } 850 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 851 852 /* store the column values to the file */ 853 if (!rank) { 854 MPI_Status status; 855 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 856 for (i=1; i<size; i++) { 857 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 858 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 859 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 860 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 861 } 862 } else { 863 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 864 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 865 } 866 ierr = PetscFree(column_values);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 872 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 873 { 874 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 875 PetscErrorCode ierr; 876 PetscMPIInt rank = aij->rank,size = aij->size; 877 PetscTruth isdraw,iascii,isbinary; 878 PetscViewer sviewer; 879 PetscViewerFormat format; 880 881 PetscFunctionBegin; 882 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 883 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 884 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 885 if (iascii) { 886 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 887 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 888 MatInfo info; 889 PetscTruth inodes; 890 891 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 892 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 893 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 894 if (!inodes) { 895 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 896 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 897 } else { 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 899 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 900 } 901 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 902 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 903 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 904 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 905 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 906 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } else if (format == PETSC_VIEWER_ASCII_INFO) { 909 PetscInt inodecount,inodelimit,*inodes; 910 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 911 if (inodes) { 912 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 913 } else { 914 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 915 } 916 PetscFunctionReturn(0); 917 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 918 PetscFunctionReturn(0); 919 } 920 } else if (isbinary) { 921 if (size == 1) { 922 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 923 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 924 } else { 925 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 926 } 927 PetscFunctionReturn(0); 928 } else if (isdraw) { 929 PetscDraw draw; 930 PetscTruth isnull; 931 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 932 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 933 } 934 935 if (size == 1) { 936 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 937 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 938 } else { 939 /* assemble the entire matrix onto first processor. */ 940 Mat A; 941 Mat_SeqAIJ *Aloc; 942 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 943 PetscScalar *a; 944 945 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 946 if (!rank) { 947 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 948 } else { 949 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 950 } 951 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 952 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 953 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 954 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 955 956 /* copy over the A part */ 957 Aloc = (Mat_SeqAIJ*)aij->A->data; 958 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 959 row = aij->rstart; 960 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 961 for (i=0; i<m; i++) { 962 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 963 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 964 } 965 aj = Aloc->j; 966 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 967 968 /* copy over the B part */ 969 Aloc = (Mat_SeqAIJ*)aij->B->data; 970 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 971 row = aij->rstart; 972 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 973 ct = cols; 974 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 975 for (i=0; i<m; i++) { 976 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 977 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 978 } 979 ierr = PetscFree(ct);CHKERRQ(ierr); 980 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 982 /* 983 Everyone has to call to draw the matrix since the graphics waits are 984 synchronized across all processors that share the PetscDraw object 985 */ 986 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 if (!rank) { 988 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 989 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 990 } 991 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 992 ierr = MatDestroy(A);CHKERRQ(ierr); 993 } 994 PetscFunctionReturn(0); 995 } 996 997 #undef __FUNCT__ 998 #define __FUNCT__ "MatView_MPIAIJ" 999 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1000 { 1001 PetscErrorCode ierr; 1002 PetscTruth iascii,isdraw,issocket,isbinary; 1003 1004 PetscFunctionBegin; 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1006 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1007 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1008 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1009 if (iascii || isdraw || isbinary || issocket) { 1010 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1011 } else { 1012 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1013 } 1014 PetscFunctionReturn(0); 1015 } 1016 1017 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatRelax_MPIAIJ" 1021 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1022 { 1023 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1024 PetscErrorCode ierr; 1025 Vec bb1; 1026 1027 PetscFunctionBegin; 1028 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1029 1030 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1031 1032 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1033 if (flag & SOR_ZERO_INITIAL_GUESS) { 1034 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1035 its--; 1036 } 1037 1038 while (its--) { 1039 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1040 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1041 1042 /* update rhs: bb1 = bb - B*x */ 1043 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1044 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1045 1046 /* local sweep */ 1047 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1048 CHKERRQ(ierr); 1049 } 1050 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1051 if (flag & SOR_ZERO_INITIAL_GUESS) { 1052 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1053 its--; 1054 } 1055 while (its--) { 1056 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1057 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1058 1059 /* update rhs: bb1 = bb - B*x */ 1060 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1061 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1062 1063 /* local sweep */ 1064 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1065 CHKERRQ(ierr); 1066 } 1067 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1068 if (flag & SOR_ZERO_INITIAL_GUESS) { 1069 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1070 its--; 1071 } 1072 while (its--) { 1073 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1074 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1075 1076 /* update rhs: bb1 = bb - B*x */ 1077 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1078 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1079 1080 /* local sweep */ 1081 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1082 CHKERRQ(ierr); 1083 } 1084 } else { 1085 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1086 } 1087 1088 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatPermute_MPIAIJ" 1094 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1095 { 1096 MPI_Comm comm,pcomm; 1097 PetscInt first,local_size,nrows,*rows; 1098 int ntids; 1099 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1104 /* make a collective version of 'rowp' */ 1105 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1106 if (pcomm==comm) { 1107 crowp = rowp; 1108 } else { 1109 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1110 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1111 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1112 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1113 } 1114 /* collect the global row permutation and invert it */ 1115 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1116 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1117 if (pcomm!=comm) { 1118 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1119 } 1120 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1121 /* get the local target indices */ 1122 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1123 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1124 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1125 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1126 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1127 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1128 /* the column permutation is so much easier; 1129 make a local version of 'colp' and invert it */ 1130 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1131 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1132 if (ntids==1) { 1133 lcolp = colp; 1134 } else { 1135 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1136 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1137 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1138 } 1139 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1140 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1141 if (ntids>1) { 1142 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1143 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1144 } 1145 /* now we just get the submatrix */ 1146 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1147 /* clean up */ 1148 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1149 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1150 PetscFunctionReturn(0); 1151 } 1152 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1155 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1156 { 1157 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1158 Mat A = mat->A,B = mat->B; 1159 PetscErrorCode ierr; 1160 PetscReal isend[5],irecv[5]; 1161 1162 PetscFunctionBegin; 1163 info->block_size = 1.0; 1164 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1165 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1166 isend[3] = info->memory; isend[4] = info->mallocs; 1167 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1168 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1169 isend[3] += info->memory; isend[4] += info->mallocs; 1170 if (flag == MAT_LOCAL) { 1171 info->nz_used = isend[0]; 1172 info->nz_allocated = isend[1]; 1173 info->nz_unneeded = isend[2]; 1174 info->memory = isend[3]; 1175 info->mallocs = isend[4]; 1176 } else if (flag == MAT_GLOBAL_MAX) { 1177 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1178 info->nz_used = irecv[0]; 1179 info->nz_allocated = irecv[1]; 1180 info->nz_unneeded = irecv[2]; 1181 info->memory = irecv[3]; 1182 info->mallocs = irecv[4]; 1183 } else if (flag == MAT_GLOBAL_SUM) { 1184 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1185 info->nz_used = irecv[0]; 1186 info->nz_allocated = irecv[1]; 1187 info->nz_unneeded = irecv[2]; 1188 info->memory = irecv[3]; 1189 info->mallocs = irecv[4]; 1190 } 1191 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1192 info->fill_ratio_needed = 0; 1193 info->factor_mallocs = 0; 1194 info->rows_global = (double)matin->M; 1195 info->columns_global = (double)matin->N; 1196 info->rows_local = (double)matin->m; 1197 info->columns_local = (double)matin->N; 1198 1199 PetscFunctionReturn(0); 1200 } 1201 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatSetOption_MPIAIJ" 1204 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1205 { 1206 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1207 PetscErrorCode ierr; 1208 1209 PetscFunctionBegin; 1210 switch (op) { 1211 case MAT_NO_NEW_NONZERO_LOCATIONS: 1212 case MAT_YES_NEW_NONZERO_LOCATIONS: 1213 case MAT_COLUMNS_UNSORTED: 1214 case MAT_COLUMNS_SORTED: 1215 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1216 case MAT_KEEP_ZEROED_ROWS: 1217 case MAT_NEW_NONZERO_LOCATION_ERR: 1218 case MAT_USE_INODES: 1219 case MAT_DO_NOT_USE_INODES: 1220 case MAT_IGNORE_ZERO_ENTRIES: 1221 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1222 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1223 break; 1224 case MAT_ROW_ORIENTED: 1225 a->roworiented = PETSC_TRUE; 1226 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1227 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1228 break; 1229 case MAT_ROWS_SORTED: 1230 case MAT_ROWS_UNSORTED: 1231 case MAT_YES_NEW_DIAGONALS: 1232 ierr = PetscVerboseInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr); 1233 break; 1234 case MAT_COLUMN_ORIENTED: 1235 a->roworiented = PETSC_FALSE; 1236 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1237 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1238 break; 1239 case MAT_IGNORE_OFF_PROC_ENTRIES: 1240 a->donotstash = PETSC_TRUE; 1241 break; 1242 case MAT_NO_NEW_DIAGONALS: 1243 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1244 case MAT_SYMMETRIC: 1245 case MAT_STRUCTURALLY_SYMMETRIC: 1246 case MAT_HERMITIAN: 1247 case MAT_SYMMETRY_ETERNAL: 1248 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1249 break; 1250 case MAT_NOT_SYMMETRIC: 1251 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1252 case MAT_NOT_HERMITIAN: 1253 case MAT_NOT_SYMMETRY_ETERNAL: 1254 break; 1255 default: 1256 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1257 } 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatGetRow_MPIAIJ" 1263 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1264 { 1265 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1266 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1267 PetscErrorCode ierr; 1268 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1269 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1270 PetscInt *cmap,*idx_p; 1271 1272 PetscFunctionBegin; 1273 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1274 mat->getrowactive = PETSC_TRUE; 1275 1276 if (!mat->rowvalues && (idx || v)) { 1277 /* 1278 allocate enough space to hold information from the longest row. 1279 */ 1280 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1281 PetscInt max = 1,tmp; 1282 for (i=0; i<matin->m; i++) { 1283 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1284 if (max < tmp) { max = tmp; } 1285 } 1286 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1287 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1288 } 1289 1290 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1291 lrow = row - rstart; 1292 1293 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1294 if (!v) {pvA = 0; pvB = 0;} 1295 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1296 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1297 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1298 nztot = nzA + nzB; 1299 1300 cmap = mat->garray; 1301 if (v || idx) { 1302 if (nztot) { 1303 /* Sort by increasing column numbers, assuming A and B already sorted */ 1304 PetscInt imark = -1; 1305 if (v) { 1306 *v = v_p = mat->rowvalues; 1307 for (i=0; i<nzB; i++) { 1308 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1309 else break; 1310 } 1311 imark = i; 1312 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1313 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1314 } 1315 if (idx) { 1316 *idx = idx_p = mat->rowindices; 1317 if (imark > -1) { 1318 for (i=0; i<imark; i++) { 1319 idx_p[i] = cmap[cworkB[i]]; 1320 } 1321 } else { 1322 for (i=0; i<nzB; i++) { 1323 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1324 else break; 1325 } 1326 imark = i; 1327 } 1328 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1329 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1330 } 1331 } else { 1332 if (idx) *idx = 0; 1333 if (v) *v = 0; 1334 } 1335 } 1336 *nz = nztot; 1337 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1338 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1344 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1345 { 1346 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1347 1348 PetscFunctionBegin; 1349 if (!aij->getrowactive) { 1350 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1351 } 1352 aij->getrowactive = PETSC_FALSE; 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatNorm_MPIAIJ" 1358 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1359 { 1360 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1361 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1362 PetscErrorCode ierr; 1363 PetscInt i,j,cstart = aij->cstart; 1364 PetscReal sum = 0.0; 1365 PetscScalar *v; 1366 1367 PetscFunctionBegin; 1368 if (aij->size == 1) { 1369 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1370 } else { 1371 if (type == NORM_FROBENIUS) { 1372 v = amat->a; 1373 for (i=0; i<amat->nz; i++) { 1374 #if defined(PETSC_USE_COMPLEX) 1375 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1376 #else 1377 sum += (*v)*(*v); v++; 1378 #endif 1379 } 1380 v = bmat->a; 1381 for (i=0; i<bmat->nz; i++) { 1382 #if defined(PETSC_USE_COMPLEX) 1383 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1384 #else 1385 sum += (*v)*(*v); v++; 1386 #endif 1387 } 1388 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1389 *norm = sqrt(*norm); 1390 } else if (type == NORM_1) { /* max column norm */ 1391 PetscReal *tmp,*tmp2; 1392 PetscInt *jj,*garray = aij->garray; 1393 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1394 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1395 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1396 *norm = 0.0; 1397 v = amat->a; jj = amat->j; 1398 for (j=0; j<amat->nz; j++) { 1399 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1400 } 1401 v = bmat->a; jj = bmat->j; 1402 for (j=0; j<bmat->nz; j++) { 1403 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1404 } 1405 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1406 for (j=0; j<mat->N; j++) { 1407 if (tmp2[j] > *norm) *norm = tmp2[j]; 1408 } 1409 ierr = PetscFree(tmp);CHKERRQ(ierr); 1410 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1411 } else if (type == NORM_INFINITY) { /* max row norm */ 1412 PetscReal ntemp = 0.0; 1413 for (j=0; j<aij->A->m; j++) { 1414 v = amat->a + amat->i[j]; 1415 sum = 0.0; 1416 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1417 sum += PetscAbsScalar(*v); v++; 1418 } 1419 v = bmat->a + bmat->i[j]; 1420 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1421 sum += PetscAbsScalar(*v); v++; 1422 } 1423 if (sum > ntemp) ntemp = sum; 1424 } 1425 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1426 } else { 1427 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1428 } 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatTranspose_MPIAIJ" 1435 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1436 { 1437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1438 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1439 PetscErrorCode ierr; 1440 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1441 Mat B; 1442 PetscScalar *array; 1443 1444 PetscFunctionBegin; 1445 if (!matout && M != N) { 1446 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1447 } 1448 1449 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1450 ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr); 1451 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1452 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1453 1454 /* copy over the A part */ 1455 Aloc = (Mat_SeqAIJ*)a->A->data; 1456 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1457 row = a->rstart; 1458 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1459 for (i=0; i<m; i++) { 1460 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1461 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1462 } 1463 aj = Aloc->j; 1464 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1465 1466 /* copy over the B part */ 1467 Aloc = (Mat_SeqAIJ*)a->B->data; 1468 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1469 row = a->rstart; 1470 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1471 ct = cols; 1472 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1473 for (i=0; i<m; i++) { 1474 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1475 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1476 } 1477 ierr = PetscFree(ct);CHKERRQ(ierr); 1478 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1479 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1480 if (matout) { 1481 *matout = B; 1482 } else { 1483 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1484 } 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1490 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1491 { 1492 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1493 Mat a = aij->A,b = aij->B; 1494 PetscErrorCode ierr; 1495 PetscInt s1,s2,s3; 1496 1497 PetscFunctionBegin; 1498 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1499 if (rr) { 1500 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1501 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1502 /* Overlap communication with computation. */ 1503 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1504 } 1505 if (ll) { 1506 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1507 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1508 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1509 } 1510 /* scale the diagonal block */ 1511 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1512 1513 if (rr) { 1514 /* Do a scatter end and then right scale the off-diagonal block */ 1515 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1516 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1517 } 1518 1519 PetscFunctionReturn(0); 1520 } 1521 1522 1523 #undef __FUNCT__ 1524 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1525 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1526 { 1527 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1528 PetscErrorCode ierr; 1529 1530 PetscFunctionBegin; 1531 if (!a->rank) { 1532 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1539 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1540 { 1541 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1546 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1547 PetscFunctionReturn(0); 1548 } 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1551 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1552 { 1553 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1554 PetscErrorCode ierr; 1555 1556 PetscFunctionBegin; 1557 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatEqual_MPIAIJ" 1563 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1564 { 1565 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1566 Mat a,b,c,d; 1567 PetscTruth flg; 1568 PetscErrorCode ierr; 1569 1570 PetscFunctionBegin; 1571 a = matA->A; b = matA->B; 1572 c = matB->A; d = matB->B; 1573 1574 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1575 if (flg) { 1576 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1577 } 1578 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1579 PetscFunctionReturn(0); 1580 } 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatCopy_MPIAIJ" 1584 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1585 { 1586 PetscErrorCode ierr; 1587 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1588 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1589 1590 PetscFunctionBegin; 1591 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1592 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1593 /* because of the column compression in the off-processor part of the matrix a->B, 1594 the number of columns in a->B and b->B may be different, hence we cannot call 1595 the MatCopy() directly on the two parts. If need be, we can provide a more 1596 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1597 then copying the submatrices */ 1598 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1599 } else { 1600 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1601 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1602 } 1603 PetscFunctionReturn(0); 1604 } 1605 1606 #undef __FUNCT__ 1607 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1608 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1609 { 1610 PetscErrorCode ierr; 1611 1612 PetscFunctionBegin; 1613 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #include "petscblaslapack.h" 1618 #undef __FUNCT__ 1619 #define __FUNCT__ "MatAXPY_MPIAIJ" 1620 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1621 { 1622 PetscErrorCode ierr; 1623 PetscInt i; 1624 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1625 PetscBLASInt bnz,one=1; 1626 Mat_SeqAIJ *x,*y; 1627 1628 PetscFunctionBegin; 1629 if (str == SAME_NONZERO_PATTERN) { 1630 PetscScalar alpha = a; 1631 x = (Mat_SeqAIJ *)xx->A->data; 1632 y = (Mat_SeqAIJ *)yy->A->data; 1633 bnz = (PetscBLASInt)x->nz; 1634 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1635 x = (Mat_SeqAIJ *)xx->B->data; 1636 y = (Mat_SeqAIJ *)yy->B->data; 1637 bnz = (PetscBLASInt)x->nz; 1638 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1639 } else if (str == SUBSET_NONZERO_PATTERN) { 1640 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1641 1642 x = (Mat_SeqAIJ *)xx->B->data; 1643 y = (Mat_SeqAIJ *)yy->B->data; 1644 if (y->xtoy && y->XtoY != xx->B) { 1645 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1646 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1647 } 1648 if (!y->xtoy) { /* get xtoy */ 1649 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1650 y->XtoY = xx->B; 1651 } 1652 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1653 } else { 1654 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1655 } 1656 PetscFunctionReturn(0); 1657 } 1658 1659 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1660 1661 #undef __FUNCT__ 1662 #define __FUNCT__ "MatConjugate_MPIAIJ" 1663 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1664 { 1665 #if defined(PETSC_USE_COMPLEX) 1666 PetscErrorCode ierr; 1667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1668 1669 PetscFunctionBegin; 1670 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1671 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1672 #else 1673 PetscFunctionBegin; 1674 #endif 1675 PetscFunctionReturn(0); 1676 } 1677 1678 #undef __FUNCT__ 1679 #define __FUNCT__ "MatRealPart_MPIAIJ" 1680 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1681 { 1682 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1683 PetscErrorCode ierr; 1684 1685 PetscFunctionBegin; 1686 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1687 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1693 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1694 { 1695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1696 PetscErrorCode ierr; 1697 1698 PetscFunctionBegin; 1699 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1700 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 1704 /* -------------------------------------------------------------------*/ 1705 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1706 MatGetRow_MPIAIJ, 1707 MatRestoreRow_MPIAIJ, 1708 MatMult_MPIAIJ, 1709 /* 4*/ MatMultAdd_MPIAIJ, 1710 MatMultTranspose_MPIAIJ, 1711 MatMultTransposeAdd_MPIAIJ, 1712 0, 1713 0, 1714 0, 1715 /*10*/ 0, 1716 0, 1717 0, 1718 MatRelax_MPIAIJ, 1719 MatTranspose_MPIAIJ, 1720 /*15*/ MatGetInfo_MPIAIJ, 1721 MatEqual_MPIAIJ, 1722 MatGetDiagonal_MPIAIJ, 1723 MatDiagonalScale_MPIAIJ, 1724 MatNorm_MPIAIJ, 1725 /*20*/ MatAssemblyBegin_MPIAIJ, 1726 MatAssemblyEnd_MPIAIJ, 1727 0, 1728 MatSetOption_MPIAIJ, 1729 MatZeroEntries_MPIAIJ, 1730 /*25*/ MatZeroRows_MPIAIJ, 1731 0, 1732 0, 1733 0, 1734 0, 1735 /*30*/ MatSetUpPreallocation_MPIAIJ, 1736 0, 1737 0, 1738 0, 1739 0, 1740 /*35*/ MatDuplicate_MPIAIJ, 1741 0, 1742 0, 1743 0, 1744 0, 1745 /*40*/ MatAXPY_MPIAIJ, 1746 MatGetSubMatrices_MPIAIJ, 1747 MatIncreaseOverlap_MPIAIJ, 1748 MatGetValues_MPIAIJ, 1749 MatCopy_MPIAIJ, 1750 /*45*/ MatPrintHelp_MPIAIJ, 1751 MatScale_MPIAIJ, 1752 0, 1753 0, 1754 0, 1755 /*50*/ MatSetBlockSize_MPIAIJ, 1756 0, 1757 0, 1758 0, 1759 0, 1760 /*55*/ MatFDColoringCreate_MPIAIJ, 1761 0, 1762 MatSetUnfactored_MPIAIJ, 1763 MatPermute_MPIAIJ, 1764 0, 1765 /*60*/ MatGetSubMatrix_MPIAIJ, 1766 MatDestroy_MPIAIJ, 1767 MatView_MPIAIJ, 1768 MatGetPetscMaps_Petsc, 1769 0, 1770 /*65*/ 0, 1771 0, 1772 0, 1773 0, 1774 0, 1775 /*70*/ 0, 1776 0, 1777 MatSetColoring_MPIAIJ, 1778 #if defined(PETSC_HAVE_ADIC) 1779 MatSetValuesAdic_MPIAIJ, 1780 #else 1781 0, 1782 #endif 1783 MatSetValuesAdifor_MPIAIJ, 1784 /*75*/ 0, 1785 0, 1786 0, 1787 0, 1788 0, 1789 /*80*/ 0, 1790 0, 1791 0, 1792 0, 1793 /*84*/ MatLoad_MPIAIJ, 1794 0, 1795 0, 1796 0, 1797 0, 1798 0, 1799 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1800 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1801 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1802 MatPtAP_Basic, 1803 MatPtAPSymbolic_MPIAIJ, 1804 /*95*/ MatPtAPNumeric_MPIAIJ, 1805 0, 1806 0, 1807 0, 1808 0, 1809 /*100*/0, 1810 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1811 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1812 MatConjugate_MPIAIJ, 1813 0, 1814 /*105*/MatSetValuesRow_MPIAIJ, 1815 MatRealPart_MPIAIJ, 1816 MatImaginaryPart_MPIAIJ}; 1817 1818 /* ----------------------------------------------------------------------------------------*/ 1819 1820 EXTERN_C_BEGIN 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1823 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1824 { 1825 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1826 PetscErrorCode ierr; 1827 1828 PetscFunctionBegin; 1829 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1830 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 EXTERN_C_END 1834 1835 EXTERN_C_BEGIN 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1838 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1839 { 1840 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1841 PetscErrorCode ierr; 1842 1843 PetscFunctionBegin; 1844 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1845 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1846 PetscFunctionReturn(0); 1847 } 1848 EXTERN_C_END 1849 1850 #include "petscpc.h" 1851 EXTERN_C_BEGIN 1852 #undef __FUNCT__ 1853 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1854 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1855 { 1856 Mat_MPIAIJ *b; 1857 PetscErrorCode ierr; 1858 PetscInt i; 1859 1860 PetscFunctionBegin; 1861 B->preallocated = PETSC_TRUE; 1862 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1863 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1864 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1865 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1866 if (d_nnz) { 1867 for (i=0; i<B->m; i++) { 1868 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1869 } 1870 } 1871 if (o_nnz) { 1872 for (i=0; i<B->m; i++) { 1873 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1874 } 1875 } 1876 b = (Mat_MPIAIJ*)B->data; 1877 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1878 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1879 1880 PetscFunctionReturn(0); 1881 } 1882 EXTERN_C_END 1883 1884 #undef __FUNCT__ 1885 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1886 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1887 { 1888 Mat mat; 1889 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1890 PetscErrorCode ierr; 1891 1892 PetscFunctionBegin; 1893 *newmat = 0; 1894 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1895 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 1896 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1897 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1898 a = (Mat_MPIAIJ*)mat->data; 1899 1900 mat->factor = matin->factor; 1901 mat->bs = matin->bs; 1902 mat->assembled = PETSC_TRUE; 1903 mat->insertmode = NOT_SET_VALUES; 1904 mat->preallocated = PETSC_TRUE; 1905 1906 a->rstart = oldmat->rstart; 1907 a->rend = oldmat->rend; 1908 a->cstart = oldmat->cstart; 1909 a->cend = oldmat->cend; 1910 a->size = oldmat->size; 1911 a->rank = oldmat->rank; 1912 a->donotstash = oldmat->donotstash; 1913 a->roworiented = oldmat->roworiented; 1914 a->rowindices = 0; 1915 a->rowvalues = 0; 1916 a->getrowactive = PETSC_FALSE; 1917 1918 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1919 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1920 if (oldmat->colmap) { 1921 #if defined (PETSC_USE_CTABLE) 1922 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1923 #else 1924 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1925 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1926 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1927 #endif 1928 } else a->colmap = 0; 1929 if (oldmat->garray) { 1930 PetscInt len; 1931 len = oldmat->B->n; 1932 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1933 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1934 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1935 } else a->garray = 0; 1936 1937 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1938 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1939 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1940 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1941 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1942 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1943 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1944 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1945 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1946 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1947 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1948 *newmat = mat; 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #include "petscsys.h" 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatLoad_MPIAIJ" 1956 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1957 { 1958 Mat A; 1959 PetscScalar *vals,*svals; 1960 MPI_Comm comm = ((PetscObject)viewer)->comm; 1961 MPI_Status status; 1962 PetscErrorCode ierr; 1963 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1964 PetscInt i,nz,j,rstart,rend,mmax; 1965 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1966 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1967 PetscInt cend,cstart,n,*rowners; 1968 int fd; 1969 1970 PetscFunctionBegin; 1971 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1972 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1973 if (!rank) { 1974 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1975 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1976 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1977 } 1978 1979 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1980 M = header[1]; N = header[2]; 1981 /* determine ownership of all rows */ 1982 m = M/size + ((M % size) > rank); 1983 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1984 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1985 1986 /* First process needs enough room for process with most rows */ 1987 if (!rank) { 1988 mmax = rowners[1]; 1989 for (i=2; i<size; i++) { 1990 mmax = PetscMax(mmax,rowners[i]); 1991 } 1992 } else mmax = m; 1993 1994 rowners[0] = 0; 1995 for (i=2; i<=size; i++) { 1996 mmax = PetscMax(mmax,rowners[i]); 1997 rowners[i] += rowners[i-1]; 1998 } 1999 rstart = rowners[rank]; 2000 rend = rowners[rank+1]; 2001 2002 /* distribute row lengths to all processors */ 2003 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2004 if (!rank) { 2005 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2006 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2007 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2008 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2009 for (j=0; j<m; j++) { 2010 procsnz[0] += ourlens[j]; 2011 } 2012 for (i=1; i<size; i++) { 2013 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2014 /* calculate the number of nonzeros on each processor */ 2015 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2016 procsnz[i] += rowlengths[j]; 2017 } 2018 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2019 } 2020 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2021 } else { 2022 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2023 } 2024 2025 if (!rank) { 2026 /* determine max buffer needed and allocate it */ 2027 maxnz = 0; 2028 for (i=0; i<size; i++) { 2029 maxnz = PetscMax(maxnz,procsnz[i]); 2030 } 2031 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2032 2033 /* read in my part of the matrix column indices */ 2034 nz = procsnz[0]; 2035 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2036 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2037 2038 /* read in every one elses and ship off */ 2039 for (i=1; i<size; i++) { 2040 nz = procsnz[i]; 2041 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2042 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2043 } 2044 ierr = PetscFree(cols);CHKERRQ(ierr); 2045 } else { 2046 /* determine buffer space needed for message */ 2047 nz = 0; 2048 for (i=0; i<m; i++) { 2049 nz += ourlens[i]; 2050 } 2051 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2052 2053 /* receive message of column indices*/ 2054 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2055 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2056 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2057 } 2058 2059 /* determine column ownership if matrix is not square */ 2060 if (N != M) { 2061 n = N/size + ((N % size) > rank); 2062 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2063 cstart = cend - n; 2064 } else { 2065 cstart = rstart; 2066 cend = rend; 2067 n = cend - cstart; 2068 } 2069 2070 /* loop over local rows, determining number of off diagonal entries */ 2071 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2072 jj = 0; 2073 for (i=0; i<m; i++) { 2074 for (j=0; j<ourlens[i]; j++) { 2075 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2076 jj++; 2077 } 2078 } 2079 2080 /* create our matrix */ 2081 for (i=0; i<m; i++) { 2082 ourlens[i] -= offlens[i]; 2083 } 2084 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2085 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2086 ierr = MatSetType(A,type);CHKERRQ(ierr); 2087 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2088 2089 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2090 for (i=0; i<m; i++) { 2091 ourlens[i] += offlens[i]; 2092 } 2093 2094 if (!rank) { 2095 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2096 2097 /* read in my part of the matrix numerical values */ 2098 nz = procsnz[0]; 2099 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2100 2101 /* insert into matrix */ 2102 jj = rstart; 2103 smycols = mycols; 2104 svals = vals; 2105 for (i=0; i<m; i++) { 2106 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2107 smycols += ourlens[i]; 2108 svals += ourlens[i]; 2109 jj++; 2110 } 2111 2112 /* read in other processors and ship out */ 2113 for (i=1; i<size; i++) { 2114 nz = procsnz[i]; 2115 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2116 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2117 } 2118 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2119 } else { 2120 /* receive numeric values */ 2121 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2122 2123 /* receive message of values*/ 2124 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2125 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2126 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2127 2128 /* insert into matrix */ 2129 jj = rstart; 2130 smycols = mycols; 2131 svals = vals; 2132 for (i=0; i<m; i++) { 2133 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2134 smycols += ourlens[i]; 2135 svals += ourlens[i]; 2136 jj++; 2137 } 2138 } 2139 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2140 ierr = PetscFree(vals);CHKERRQ(ierr); 2141 ierr = PetscFree(mycols);CHKERRQ(ierr); 2142 ierr = PetscFree(rowners);CHKERRQ(ierr); 2143 2144 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2145 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2146 *newmat = A; 2147 PetscFunctionReturn(0); 2148 } 2149 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2152 /* 2153 Not great since it makes two copies of the submatrix, first an SeqAIJ 2154 in local and then by concatenating the local matrices the end result. 2155 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2156 */ 2157 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2158 { 2159 PetscErrorCode ierr; 2160 PetscMPIInt rank,size; 2161 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2162 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2163 Mat *local,M,Mreuse; 2164 PetscScalar *vwork,*aa; 2165 MPI_Comm comm = mat->comm; 2166 Mat_SeqAIJ *aij; 2167 2168 2169 PetscFunctionBegin; 2170 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2171 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2172 2173 if (call == MAT_REUSE_MATRIX) { 2174 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2175 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2176 local = &Mreuse; 2177 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2178 } else { 2179 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2180 Mreuse = *local; 2181 ierr = PetscFree(local);CHKERRQ(ierr); 2182 } 2183 2184 /* 2185 m - number of local rows 2186 n - number of columns (same on all processors) 2187 rstart - first row in new global matrix generated 2188 */ 2189 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2190 if (call == MAT_INITIAL_MATRIX) { 2191 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2192 ii = aij->i; 2193 jj = aij->j; 2194 2195 /* 2196 Determine the number of non-zeros in the diagonal and off-diagonal 2197 portions of the matrix in order to do correct preallocation 2198 */ 2199 2200 /* first get start and end of "diagonal" columns */ 2201 if (csize == PETSC_DECIDE) { 2202 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2203 if (mglobal == n) { /* square matrix */ 2204 nlocal = m; 2205 } else { 2206 nlocal = n/size + ((n % size) > rank); 2207 } 2208 } else { 2209 nlocal = csize; 2210 } 2211 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2212 rstart = rend - nlocal; 2213 if (rank == size - 1 && rend != n) { 2214 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2215 } 2216 2217 /* next, compute all the lengths */ 2218 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2219 olens = dlens + m; 2220 for (i=0; i<m; i++) { 2221 jend = ii[i+1] - ii[i]; 2222 olen = 0; 2223 dlen = 0; 2224 for (j=0; j<jend; j++) { 2225 if (*jj < rstart || *jj >= rend) olen++; 2226 else dlen++; 2227 jj++; 2228 } 2229 olens[i] = olen; 2230 dlens[i] = dlen; 2231 } 2232 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2233 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2234 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2235 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2236 ierr = PetscFree(dlens);CHKERRQ(ierr); 2237 } else { 2238 PetscInt ml,nl; 2239 2240 M = *newmat; 2241 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2242 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2243 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2244 /* 2245 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2246 rather than the slower MatSetValues(). 2247 */ 2248 M->was_assembled = PETSC_TRUE; 2249 M->assembled = PETSC_FALSE; 2250 } 2251 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2252 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2253 ii = aij->i; 2254 jj = aij->j; 2255 aa = aij->a; 2256 for (i=0; i<m; i++) { 2257 row = rstart + i; 2258 nz = ii[i+1] - ii[i]; 2259 cwork = jj; jj += nz; 2260 vwork = aa; aa += nz; 2261 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2262 } 2263 2264 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2265 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2266 *newmat = M; 2267 2268 /* save submatrix used in processor for next request */ 2269 if (call == MAT_INITIAL_MATRIX) { 2270 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2271 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2272 } 2273 2274 PetscFunctionReturn(0); 2275 } 2276 2277 EXTERN_C_BEGIN 2278 #undef __FUNCT__ 2279 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2280 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2281 { 2282 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2283 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2284 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2285 const PetscInt *JJ; 2286 PetscScalar *values; 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]); 2291 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2292 o_nnz = d_nnz + m; 2293 2294 for (i=0; i<m; i++) { 2295 nnz = I[i+1]- I[i]; 2296 JJ = J + I[i]; 2297 nnz_max = PetscMax(nnz_max,nnz); 2298 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2299 for (j=0; j<nnz; j++) { 2300 if (*JJ >= cstart) break; 2301 JJ++; 2302 } 2303 d = 0; 2304 for (; j<nnz; j++) { 2305 if (*JJ++ >= cend) break; 2306 d++; 2307 } 2308 d_nnz[i] = d; 2309 o_nnz[i] = nnz - d; 2310 } 2311 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2312 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2313 2314 if (v) values = (PetscScalar*)v; 2315 else { 2316 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2317 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2318 } 2319 2320 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2321 for (i=0; i<m; i++) { 2322 ii = i + rstart; 2323 nnz = I[i+1]- I[i]; 2324 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2325 } 2326 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2327 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2328 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2329 2330 if (!v) { 2331 ierr = PetscFree(values);CHKERRQ(ierr); 2332 } 2333 PetscFunctionReturn(0); 2334 } 2335 EXTERN_C_END 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2339 /*@ 2340 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2341 (the default parallel PETSc format). 2342 2343 Collective on MPI_Comm 2344 2345 Input Parameters: 2346 + B - the matrix 2347 . i - the indices into j for the start of each local row (starts with zero) 2348 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2349 - v - optional values in the matrix 2350 2351 Level: developer 2352 2353 .keywords: matrix, aij, compressed row, sparse, parallel 2354 2355 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2356 @*/ 2357 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2358 { 2359 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2360 2361 PetscFunctionBegin; 2362 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2363 if (f) { 2364 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2365 } 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2371 /*@C 2372 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2373 (the default parallel PETSc format). For good matrix assembly performance 2374 the user should preallocate the matrix storage by setting the parameters 2375 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2376 performance can be increased by more than a factor of 50. 2377 2378 Collective on MPI_Comm 2379 2380 Input Parameters: 2381 + A - the matrix 2382 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2383 (same value is used for all local rows) 2384 . d_nnz - array containing the number of nonzeros in the various rows of the 2385 DIAGONAL portion of the local submatrix (possibly different for each row) 2386 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2387 The size of this array is equal to the number of local rows, i.e 'm'. 2388 You must leave room for the diagonal entry even if it is zero. 2389 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2390 submatrix (same value is used for all local rows). 2391 - o_nnz - array containing the number of nonzeros in the various rows of the 2392 OFF-DIAGONAL portion of the local submatrix (possibly different for 2393 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2394 structure. The size of this array is equal to the number 2395 of local rows, i.e 'm'. 2396 2397 If the *_nnz parameter is given then the *_nz parameter is ignored 2398 2399 The AIJ format (also called the Yale sparse matrix format or 2400 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2401 storage. The stored row and column indices begin with zero. See the users manual for details. 2402 2403 The parallel matrix is partitioned such that the first m0 rows belong to 2404 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2405 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2406 2407 The DIAGONAL portion of the local submatrix of a processor can be defined 2408 as the submatrix which is obtained by extraction the part corresponding 2409 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2410 first row that belongs to the processor, and r2 is the last row belonging 2411 to the this processor. This is a square mxm matrix. The remaining portion 2412 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2413 2414 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2415 2416 Example usage: 2417 2418 Consider the following 8x8 matrix with 34 non-zero values, that is 2419 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2420 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2421 as follows: 2422 2423 .vb 2424 1 2 0 | 0 3 0 | 0 4 2425 Proc0 0 5 6 | 7 0 0 | 8 0 2426 9 0 10 | 11 0 0 | 12 0 2427 ------------------------------------- 2428 13 0 14 | 15 16 17 | 0 0 2429 Proc1 0 18 0 | 19 20 21 | 0 0 2430 0 0 0 | 22 23 0 | 24 0 2431 ------------------------------------- 2432 Proc2 25 26 27 | 0 0 28 | 29 0 2433 30 0 0 | 31 32 33 | 0 34 2434 .ve 2435 2436 This can be represented as a collection of submatrices as: 2437 2438 .vb 2439 A B C 2440 D E F 2441 G H I 2442 .ve 2443 2444 Where the submatrices A,B,C are owned by proc0, D,E,F are 2445 owned by proc1, G,H,I are owned by proc2. 2446 2447 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2448 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2449 The 'M','N' parameters are 8,8, and have the same values on all procs. 2450 2451 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2452 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2453 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2454 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2455 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2456 matrix, ans [DF] as another SeqAIJ matrix. 2457 2458 When d_nz, o_nz parameters are specified, d_nz storage elements are 2459 allocated for every row of the local diagonal submatrix, and o_nz 2460 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2461 One way to choose d_nz and o_nz is to use the max nonzerors per local 2462 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2463 In this case, the values of d_nz,o_nz are: 2464 .vb 2465 proc0 : dnz = 2, o_nz = 2 2466 proc1 : dnz = 3, o_nz = 2 2467 proc2 : dnz = 1, o_nz = 4 2468 .ve 2469 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2470 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2471 for proc3. i.e we are using 12+15+10=37 storage locations to store 2472 34 values. 2473 2474 When d_nnz, o_nnz parameters are specified, the storage is specified 2475 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2476 In the above case the values for d_nnz,o_nnz are: 2477 .vb 2478 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2479 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2480 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2481 .ve 2482 Here the space allocated is sum of all the above values i.e 34, and 2483 hence pre-allocation is perfect. 2484 2485 Level: intermediate 2486 2487 .keywords: matrix, aij, compressed row, sparse, parallel 2488 2489 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2490 MPIAIJ 2491 @*/ 2492 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2493 { 2494 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2495 2496 PetscFunctionBegin; 2497 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2498 if (f) { 2499 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2500 } 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatCreateMPIAIJ" 2506 /*@C 2507 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2508 (the default parallel PETSc format). For good matrix assembly performance 2509 the user should preallocate the matrix storage by setting the parameters 2510 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2511 performance can be increased by more than a factor of 50. 2512 2513 Collective on MPI_Comm 2514 2515 Input Parameters: 2516 + comm - MPI communicator 2517 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2518 This value should be the same as the local size used in creating the 2519 y vector for the matrix-vector product y = Ax. 2520 . n - This value should be the same as the local size used in creating the 2521 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2522 calculated if N is given) For square matrices n is almost always m. 2523 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2524 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2525 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2526 (same value is used for all local rows) 2527 . d_nnz - array containing the number of nonzeros in the various rows of the 2528 DIAGONAL portion of the local submatrix (possibly different for each row) 2529 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2530 The size of this array is equal to the number of local rows, i.e 'm'. 2531 You must leave room for the diagonal entry even if it is zero. 2532 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2533 submatrix (same value is used for all local rows). 2534 - o_nnz - array containing the number of nonzeros in the various rows of the 2535 OFF-DIAGONAL portion of the local submatrix (possibly different for 2536 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2537 structure. The size of this array is equal to the number 2538 of local rows, i.e 'm'. 2539 2540 Output Parameter: 2541 . A - the matrix 2542 2543 Notes: 2544 If the *_nnz parameter is given then the *_nz parameter is ignored 2545 2546 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2547 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2548 storage requirements for this matrix. 2549 2550 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2551 processor than it must be used on all processors that share the object for 2552 that argument. 2553 2554 The user MUST specify either the local or global matrix dimensions 2555 (possibly both). 2556 2557 The parallel matrix is partitioned such that the first m0 rows belong to 2558 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2559 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2560 2561 The DIAGONAL portion of the local submatrix of a processor can be defined 2562 as the submatrix which is obtained by extraction the part corresponding 2563 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2564 first row that belongs to the processor, and r2 is the last row belonging 2565 to the this processor. This is a square mxm matrix. The remaining portion 2566 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2567 2568 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2569 2570 When calling this routine with a single process communicator, a matrix of 2571 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2572 type of communicator, use the construction mechanism: 2573 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2574 2575 By default, this format uses inodes (identical nodes) when possible. 2576 We search for consecutive rows with the same nonzero structure, thereby 2577 reusing matrix information to achieve increased efficiency. 2578 2579 Options Database Keys: 2580 + -mat_no_inode - Do not use inodes 2581 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2582 - -mat_aij_oneindex - Internally use indexing starting at 1 2583 rather than 0. Note that when calling MatSetValues(), 2584 the user still MUST index entries starting at 0! 2585 2586 2587 Example usage: 2588 2589 Consider the following 8x8 matrix with 34 non-zero values, that is 2590 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2591 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2592 as follows: 2593 2594 .vb 2595 1 2 0 | 0 3 0 | 0 4 2596 Proc0 0 5 6 | 7 0 0 | 8 0 2597 9 0 10 | 11 0 0 | 12 0 2598 ------------------------------------- 2599 13 0 14 | 15 16 17 | 0 0 2600 Proc1 0 18 0 | 19 20 21 | 0 0 2601 0 0 0 | 22 23 0 | 24 0 2602 ------------------------------------- 2603 Proc2 25 26 27 | 0 0 28 | 29 0 2604 30 0 0 | 31 32 33 | 0 34 2605 .ve 2606 2607 This can be represented as a collection of submatrices as: 2608 2609 .vb 2610 A B C 2611 D E F 2612 G H I 2613 .ve 2614 2615 Where the submatrices A,B,C are owned by proc0, D,E,F are 2616 owned by proc1, G,H,I are owned by proc2. 2617 2618 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2619 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2620 The 'M','N' parameters are 8,8, and have the same values on all procs. 2621 2622 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2623 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2624 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2625 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2626 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2627 matrix, ans [DF] as another SeqAIJ matrix. 2628 2629 When d_nz, o_nz parameters are specified, d_nz storage elements are 2630 allocated for every row of the local diagonal submatrix, and o_nz 2631 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2632 One way to choose d_nz and o_nz is to use the max nonzerors per local 2633 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2634 In this case, the values of d_nz,o_nz are: 2635 .vb 2636 proc0 : dnz = 2, o_nz = 2 2637 proc1 : dnz = 3, o_nz = 2 2638 proc2 : dnz = 1, o_nz = 4 2639 .ve 2640 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2641 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2642 for proc3. i.e we are using 12+15+10=37 storage locations to store 2643 34 values. 2644 2645 When d_nnz, o_nnz parameters are specified, the storage is specified 2646 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2647 In the above case the values for d_nnz,o_nnz are: 2648 .vb 2649 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2650 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2651 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2652 .ve 2653 Here the space allocated is sum of all the above values i.e 34, and 2654 hence pre-allocation is perfect. 2655 2656 Level: intermediate 2657 2658 .keywords: matrix, aij, compressed row, sparse, parallel 2659 2660 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2661 MPIAIJ 2662 @*/ 2663 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2664 { 2665 PetscErrorCode ierr; 2666 PetscMPIInt size; 2667 2668 PetscFunctionBegin; 2669 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2670 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2671 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2672 if (size > 1) { 2673 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2674 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2675 } else { 2676 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2677 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2678 } 2679 PetscFunctionReturn(0); 2680 } 2681 2682 #undef __FUNCT__ 2683 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2684 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2685 { 2686 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2687 2688 PetscFunctionBegin; 2689 *Ad = a->A; 2690 *Ao = a->B; 2691 *colmap = a->garray; 2692 PetscFunctionReturn(0); 2693 } 2694 2695 #undef __FUNCT__ 2696 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2697 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2698 { 2699 PetscErrorCode ierr; 2700 PetscInt i; 2701 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2702 2703 PetscFunctionBegin; 2704 if (coloring->ctype == IS_COLORING_LOCAL) { 2705 ISColoringValue *allcolors,*colors; 2706 ISColoring ocoloring; 2707 2708 /* set coloring for diagonal portion */ 2709 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2710 2711 /* set coloring for off-diagonal portion */ 2712 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2713 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2714 for (i=0; i<a->B->n; i++) { 2715 colors[i] = allcolors[a->garray[i]]; 2716 } 2717 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2718 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2719 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2720 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2721 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2722 ISColoringValue *colors; 2723 PetscInt *larray; 2724 ISColoring ocoloring; 2725 2726 /* set coloring for diagonal portion */ 2727 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2728 for (i=0; i<a->A->n; i++) { 2729 larray[i] = i + a->cstart; 2730 } 2731 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2732 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2733 for (i=0; i<a->A->n; i++) { 2734 colors[i] = coloring->colors[larray[i]]; 2735 } 2736 ierr = PetscFree(larray);CHKERRQ(ierr); 2737 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2738 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2739 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2740 2741 /* set coloring for off-diagonal portion */ 2742 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2743 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2744 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2745 for (i=0; i<a->B->n; i++) { 2746 colors[i] = coloring->colors[larray[i]]; 2747 } 2748 ierr = PetscFree(larray);CHKERRQ(ierr); 2749 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2750 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2751 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2752 } else { 2753 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2754 } 2755 2756 PetscFunctionReturn(0); 2757 } 2758 2759 #if defined(PETSC_HAVE_ADIC) 2760 #undef __FUNCT__ 2761 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2762 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2763 { 2764 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2765 PetscErrorCode ierr; 2766 2767 PetscFunctionBegin; 2768 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2769 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2770 PetscFunctionReturn(0); 2771 } 2772 #endif 2773 2774 #undef __FUNCT__ 2775 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2776 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2777 { 2778 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2779 PetscErrorCode ierr; 2780 2781 PetscFunctionBegin; 2782 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2783 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2784 PetscFunctionReturn(0); 2785 } 2786 2787 #undef __FUNCT__ 2788 #define __FUNCT__ "MatMerge" 2789 /*@C 2790 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2791 matrices from each processor 2792 2793 Collective on MPI_Comm 2794 2795 Input Parameters: 2796 + comm - the communicators the parallel matrix will live on 2797 . inmat - the input sequential matrices 2798 . n - number of local columns (or PETSC_DECIDE) 2799 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2800 2801 Output Parameter: 2802 . outmat - the parallel matrix generated 2803 2804 Level: advanced 2805 2806 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2807 2808 @*/ 2809 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2810 { 2811 PetscErrorCode ierr; 2812 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2813 PetscInt *indx; 2814 PetscScalar *values; 2815 PetscMap columnmap,rowmap; 2816 2817 PetscFunctionBegin; 2818 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2819 /* 2820 PetscMPIInt rank; 2821 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2822 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2823 */ 2824 if (scall == MAT_INITIAL_MATRIX){ 2825 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2826 if (n == PETSC_DECIDE){ 2827 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2828 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2829 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2830 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2831 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2832 } 2833 2834 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2835 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2836 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2837 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2838 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2839 2840 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2841 for (i=0;i<m;i++) { 2842 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2843 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2844 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2845 } 2846 /* This routine will ONLY return MPIAIJ type matrix */ 2847 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2848 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2849 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2850 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2851 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2852 2853 } else if (scall == MAT_REUSE_MATRIX){ 2854 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2855 } else { 2856 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2857 } 2858 2859 for (i=0;i<m;i++) { 2860 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2861 I = i + rstart; 2862 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2863 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2864 } 2865 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2866 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2867 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2868 2869 PetscFunctionReturn(0); 2870 } 2871 2872 #undef __FUNCT__ 2873 #define __FUNCT__ "MatFileSplit" 2874 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2875 { 2876 PetscErrorCode ierr; 2877 PetscMPIInt rank; 2878 PetscInt m,N,i,rstart,nnz; 2879 size_t len; 2880 const PetscInt *indx; 2881 PetscViewer out; 2882 char *name; 2883 Mat B; 2884 const PetscScalar *values; 2885 2886 PetscFunctionBegin; 2887 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2888 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2889 /* Should this be the type of the diagonal block of A? */ 2890 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2891 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2892 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2893 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2894 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2895 for (i=0;i<m;i++) { 2896 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2897 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2898 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2899 } 2900 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2901 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2902 2903 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2904 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2905 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2906 sprintf(name,"%s.%d",outfile,rank); 2907 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 2908 ierr = PetscFree(name); 2909 ierr = MatView(B,out);CHKERRQ(ierr); 2910 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2911 ierr = MatDestroy(B);CHKERRQ(ierr); 2912 PetscFunctionReturn(0); 2913 } 2914 2915 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2916 #undef __FUNCT__ 2917 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2918 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2919 { 2920 PetscErrorCode ierr; 2921 Mat_Merge_SeqsToMPI *merge; 2922 PetscObjectContainer container; 2923 2924 PetscFunctionBegin; 2925 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2926 if (container) { 2927 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2928 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2929 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2930 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2931 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2932 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2933 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2934 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2935 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2936 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2937 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2938 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2939 2940 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2941 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2942 } 2943 ierr = PetscFree(merge);CHKERRQ(ierr); 2944 2945 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2946 PetscFunctionReturn(0); 2947 } 2948 2949 #include "src/mat/utils/freespace.h" 2950 #include "petscbt.h" 2951 static PetscEvent logkey_seqstompinum = 0; 2952 #undef __FUNCT__ 2953 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2954 /*@C 2955 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2956 matrices from each processor 2957 2958 Collective on MPI_Comm 2959 2960 Input Parameters: 2961 + comm - the communicators the parallel matrix will live on 2962 . seqmat - the input sequential matrices 2963 . m - number of local rows (or PETSC_DECIDE) 2964 . n - number of local columns (or PETSC_DECIDE) 2965 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2966 2967 Output Parameter: 2968 . mpimat - the parallel matrix generated 2969 2970 Level: advanced 2971 2972 Notes: 2973 The dimensions of the sequential matrix in each processor MUST be the same. 2974 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2975 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2976 @*/ 2977 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2978 { 2979 PetscErrorCode ierr; 2980 MPI_Comm comm=mpimat->comm; 2981 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2982 PetscMPIInt size,rank,taga,*len_s; 2983 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2984 PetscInt proc,m; 2985 PetscInt **buf_ri,**buf_rj; 2986 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2987 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2988 MPI_Request *s_waits,*r_waits; 2989 MPI_Status *status; 2990 MatScalar *aa=a->a,**abuf_r,*ba_i; 2991 Mat_Merge_SeqsToMPI *merge; 2992 PetscObjectContainer container; 2993 2994 PetscFunctionBegin; 2995 if (!logkey_seqstompinum) { 2996 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2997 } 2998 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2999 3000 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3001 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3002 3003 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3004 if (container) { 3005 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3006 } 3007 bi = merge->bi; 3008 bj = merge->bj; 3009 buf_ri = merge->buf_ri; 3010 buf_rj = merge->buf_rj; 3011 3012 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3013 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3014 len_s = merge->len_s; 3015 3016 /* send and recv matrix values */ 3017 /*-----------------------------*/ 3018 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 3019 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3020 3021 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3022 for (proc=0,k=0; proc<size; proc++){ 3023 if (!len_s[proc]) continue; 3024 i = owners[proc]; 3025 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3026 k++; 3027 } 3028 3029 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3030 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3031 ierr = PetscFree(status);CHKERRQ(ierr); 3032 3033 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3034 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3035 3036 /* insert mat values of mpimat */ 3037 /*----------------------------*/ 3038 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3039 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3040 nextrow = buf_ri_k + merge->nrecv; 3041 nextai = nextrow + merge->nrecv; 3042 3043 for (k=0; k<merge->nrecv; k++){ 3044 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3045 nrows = *(buf_ri_k[k]); 3046 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3047 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3048 } 3049 3050 /* set values of ba */ 3051 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 3052 for (i=0; i<m; i++) { 3053 arow = owners[rank] + i; 3054 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3055 bnzi = bi[i+1] - bi[i]; 3056 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3057 3058 /* add local non-zero vals of this proc's seqmat into ba */ 3059 anzi = ai[arow+1] - ai[arow]; 3060 aj = a->j + ai[arow]; 3061 aa = a->a + ai[arow]; 3062 nextaj = 0; 3063 for (j=0; nextaj<anzi; j++){ 3064 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3065 ba_i[j] += aa[nextaj++]; 3066 } 3067 } 3068 3069 /* add received vals into ba */ 3070 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3071 /* i-th row */ 3072 if (i == *nextrow[k]) { 3073 anzi = *(nextai[k]+1) - *nextai[k]; 3074 aj = buf_rj[k] + *(nextai[k]); 3075 aa = abuf_r[k] + *(nextai[k]); 3076 nextaj = 0; 3077 for (j=0; nextaj<anzi; j++){ 3078 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3079 ba_i[j] += aa[nextaj++]; 3080 } 3081 } 3082 nextrow[k]++; nextai[k]++; 3083 } 3084 } 3085 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3086 } 3087 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3088 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3089 3090 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3091 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3092 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3093 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3094 PetscFunctionReturn(0); 3095 } 3096 3097 static PetscEvent logkey_seqstompisym = 0; 3098 #undef __FUNCT__ 3099 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3100 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3101 { 3102 PetscErrorCode ierr; 3103 Mat B_mpi; 3104 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3105 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3106 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3107 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3108 PetscInt len,proc,*dnz,*onz; 3109 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3110 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3111 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3112 MPI_Status *status; 3113 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3114 PetscBT lnkbt; 3115 Mat_Merge_SeqsToMPI *merge; 3116 PetscObjectContainer container; 3117 3118 PetscFunctionBegin; 3119 if (!logkey_seqstompisym) { 3120 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3121 } 3122 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3123 3124 /* make sure it is a PETSc comm */ 3125 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3126 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3127 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3128 3129 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3130 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3131 3132 /* determine row ownership */ 3133 /*---------------------------------------------------------*/ 3134 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3135 if (m == PETSC_DECIDE) { 3136 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3137 } else { 3138 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3139 } 3140 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3141 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3142 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3143 3144 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3145 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3146 3147 /* determine the number of messages to send, their lengths */ 3148 /*---------------------------------------------------------*/ 3149 len_s = merge->len_s; 3150 3151 len = 0; /* length of buf_si[] */ 3152 merge->nsend = 0; 3153 for (proc=0; proc<size; proc++){ 3154 len_si[proc] = 0; 3155 if (proc == rank){ 3156 len_s[proc] = 0; 3157 } else { 3158 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3159 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3160 } 3161 if (len_s[proc]) { 3162 merge->nsend++; 3163 nrows = 0; 3164 for (i=owners[proc]; i<owners[proc+1]; i++){ 3165 if (ai[i+1] > ai[i]) nrows++; 3166 } 3167 len_si[proc] = 2*(nrows+1); 3168 len += len_si[proc]; 3169 } 3170 } 3171 3172 /* determine the number and length of messages to receive for ij-structure */ 3173 /*-------------------------------------------------------------------------*/ 3174 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3175 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3176 3177 /* post the Irecv of j-structure */ 3178 /*-------------------------------*/ 3179 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3180 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3181 3182 /* post the Isend of j-structure */ 3183 /*--------------------------------*/ 3184 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3185 sj_waits = si_waits + merge->nsend; 3186 3187 for (proc=0, k=0; proc<size; proc++){ 3188 if (!len_s[proc]) continue; 3189 i = owners[proc]; 3190 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3191 k++; 3192 } 3193 3194 /* receives and sends of j-structure are complete */ 3195 /*------------------------------------------------*/ 3196 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3197 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3198 3199 /* send and recv i-structure */ 3200 /*---------------------------*/ 3201 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3202 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3203 3204 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3205 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3206 for (proc=0,k=0; proc<size; proc++){ 3207 if (!len_s[proc]) continue; 3208 /* form outgoing message for i-structure: 3209 buf_si[0]: nrows to be sent 3210 [1:nrows]: row index (global) 3211 [nrows+1:2*nrows+1]: i-structure index 3212 */ 3213 /*-------------------------------------------*/ 3214 nrows = len_si[proc]/2 - 1; 3215 buf_si_i = buf_si + nrows+1; 3216 buf_si[0] = nrows; 3217 buf_si_i[0] = 0; 3218 nrows = 0; 3219 for (i=owners[proc]; i<owners[proc+1]; i++){ 3220 anzi = ai[i+1] - ai[i]; 3221 if (anzi) { 3222 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3223 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3224 nrows++; 3225 } 3226 } 3227 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3228 k++; 3229 buf_si += len_si[proc]; 3230 } 3231 3232 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3233 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3234 3235 ierr = PetscVerboseInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr); 3236 for (i=0; i<merge->nrecv; i++){ 3237 ierr = PetscVerboseInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr); 3238 } 3239 3240 ierr = PetscFree(len_si);CHKERRQ(ierr); 3241 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3242 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3243 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3244 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3245 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3246 ierr = PetscFree(status);CHKERRQ(ierr); 3247 3248 /* compute a local seq matrix in each processor */ 3249 /*----------------------------------------------*/ 3250 /* allocate bi array and free space for accumulating nonzero column info */ 3251 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3252 bi[0] = 0; 3253 3254 /* create and initialize a linked list */ 3255 nlnk = N+1; 3256 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3257 3258 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3259 len = 0; 3260 len = ai[owners[rank+1]] - ai[owners[rank]]; 3261 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3262 current_space = free_space; 3263 3264 /* determine symbolic info for each local row */ 3265 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3266 nextrow = buf_ri_k + merge->nrecv; 3267 nextai = nextrow + merge->nrecv; 3268 for (k=0; k<merge->nrecv; k++){ 3269 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3270 nrows = *buf_ri_k[k]; 3271 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3272 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3273 } 3274 3275 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3276 len = 0; 3277 for (i=0;i<m;i++) { 3278 bnzi = 0; 3279 /* add local non-zero cols of this proc's seqmat into lnk */ 3280 arow = owners[rank] + i; 3281 anzi = ai[arow+1] - ai[arow]; 3282 aj = a->j + ai[arow]; 3283 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3284 bnzi += nlnk; 3285 /* add received col data into lnk */ 3286 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3287 if (i == *nextrow[k]) { /* i-th row */ 3288 anzi = *(nextai[k]+1) - *nextai[k]; 3289 aj = buf_rj[k] + *nextai[k]; 3290 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3291 bnzi += nlnk; 3292 nextrow[k]++; nextai[k]++; 3293 } 3294 } 3295 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3296 3297 /* if free space is not available, make more free space */ 3298 if (current_space->local_remaining<bnzi) { 3299 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3300 nspacedouble++; 3301 } 3302 /* copy data into free space, then initialize lnk */ 3303 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3304 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3305 3306 current_space->array += bnzi; 3307 current_space->local_used += bnzi; 3308 current_space->local_remaining -= bnzi; 3309 3310 bi[i+1] = bi[i] + bnzi; 3311 } 3312 3313 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3314 3315 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3316 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3317 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3318 3319 /* create symbolic parallel matrix B_mpi */ 3320 /*---------------------------------------*/ 3321 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3322 if (n==PETSC_DECIDE) { 3323 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3324 } else { 3325 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3326 } 3327 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3328 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3329 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3330 3331 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3332 B_mpi->assembled = PETSC_FALSE; 3333 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3334 merge->bi = bi; 3335 merge->bj = bj; 3336 merge->buf_ri = buf_ri; 3337 merge->buf_rj = buf_rj; 3338 merge->coi = PETSC_NULL; 3339 merge->coj = PETSC_NULL; 3340 merge->owners_co = PETSC_NULL; 3341 3342 /* attach the supporting struct to B_mpi for reuse */ 3343 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3344 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3345 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3346 *mpimat = B_mpi; 3347 3348 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3349 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3350 PetscFunctionReturn(0); 3351 } 3352 3353 static PetscEvent logkey_seqstompi = 0; 3354 #undef __FUNCT__ 3355 #define __FUNCT__ "MatMerge_SeqsToMPI" 3356 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3357 { 3358 PetscErrorCode ierr; 3359 3360 PetscFunctionBegin; 3361 if (!logkey_seqstompi) { 3362 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3363 } 3364 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3365 if (scall == MAT_INITIAL_MATRIX){ 3366 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3367 } 3368 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3369 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3370 PetscFunctionReturn(0); 3371 } 3372 static PetscEvent logkey_getlocalmat = 0; 3373 #undef __FUNCT__ 3374 #define __FUNCT__ "MatGetLocalMat" 3375 /*@C 3376 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3377 3378 Not Collective 3379 3380 Input Parameters: 3381 + A - the matrix 3382 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3383 3384 Output Parameter: 3385 . A_loc - the local sequential matrix generated 3386 3387 Level: developer 3388 3389 @*/ 3390 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3391 { 3392 PetscErrorCode ierr; 3393 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3394 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3395 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3396 PetscScalar *aa=a->a,*ba=b->a,*ca; 3397 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3398 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3399 3400 PetscFunctionBegin; 3401 if (!logkey_getlocalmat) { 3402 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3403 } 3404 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3405 if (scall == MAT_INITIAL_MATRIX){ 3406 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3407 ci[0] = 0; 3408 for (i=0; i<am; i++){ 3409 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3410 } 3411 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3412 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3413 k = 0; 3414 for (i=0; i<am; i++) { 3415 ncols_o = bi[i+1] - bi[i]; 3416 ncols_d = ai[i+1] - ai[i]; 3417 /* off-diagonal portion of A */ 3418 for (jo=0; jo<ncols_o; jo++) { 3419 col = cmap[*bj]; 3420 if (col >= cstart) break; 3421 cj[k] = col; bj++; 3422 ca[k++] = *ba++; 3423 } 3424 /* diagonal portion of A */ 3425 for (j=0; j<ncols_d; j++) { 3426 cj[k] = cstart + *aj++; 3427 ca[k++] = *aa++; 3428 } 3429 /* off-diagonal portion of A */ 3430 for (j=jo; j<ncols_o; j++) { 3431 cj[k] = cmap[*bj++]; 3432 ca[k++] = *ba++; 3433 } 3434 } 3435 /* put together the new matrix */ 3436 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3437 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3438 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3439 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3440 mat->freedata = PETSC_TRUE; 3441 mat->nonew = 0; 3442 } else if (scall == MAT_REUSE_MATRIX){ 3443 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3444 ci = mat->i; cj = mat->j; ca = mat->a; 3445 for (i=0; i<am; i++) { 3446 /* off-diagonal portion of A */ 3447 ncols_o = bi[i+1] - bi[i]; 3448 for (jo=0; jo<ncols_o; jo++) { 3449 col = cmap[*bj]; 3450 if (col >= cstart) break; 3451 *ca++ = *ba++; bj++; 3452 } 3453 /* diagonal portion of A */ 3454 ncols_d = ai[i+1] - ai[i]; 3455 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3456 /* off-diagonal portion of A */ 3457 for (j=jo; j<ncols_o; j++) { 3458 *ca++ = *ba++; bj++; 3459 } 3460 } 3461 } else { 3462 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3463 } 3464 3465 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3466 PetscFunctionReturn(0); 3467 } 3468 3469 static PetscEvent logkey_getlocalmatcondensed = 0; 3470 #undef __FUNCT__ 3471 #define __FUNCT__ "MatGetLocalMatCondensed" 3472 /*@C 3473 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3474 3475 Not Collective 3476 3477 Input Parameters: 3478 + A - the matrix 3479 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3480 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3481 3482 Output Parameter: 3483 . A_loc - the local sequential matrix generated 3484 3485 Level: developer 3486 3487 @*/ 3488 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3489 { 3490 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3491 PetscErrorCode ierr; 3492 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3493 IS isrowa,iscola; 3494 Mat *aloc; 3495 3496 PetscFunctionBegin; 3497 if (!logkey_getlocalmatcondensed) { 3498 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3499 } 3500 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3501 if (!row){ 3502 start = a->rstart; end = a->rend; 3503 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3504 } else { 3505 isrowa = *row; 3506 } 3507 if (!col){ 3508 start = a->cstart; 3509 cmap = a->garray; 3510 nzA = a->A->n; 3511 nzB = a->B->n; 3512 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3513 ncols = 0; 3514 for (i=0; i<nzB; i++) { 3515 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3516 else break; 3517 } 3518 imark = i; 3519 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3520 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3521 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3522 ierr = PetscFree(idx);CHKERRQ(ierr); 3523 } else { 3524 iscola = *col; 3525 } 3526 if (scall != MAT_INITIAL_MATRIX){ 3527 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3528 aloc[0] = *A_loc; 3529 } 3530 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3531 *A_loc = aloc[0]; 3532 ierr = PetscFree(aloc);CHKERRQ(ierr); 3533 if (!row){ 3534 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3535 } 3536 if (!col){ 3537 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3538 } 3539 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3540 PetscFunctionReturn(0); 3541 } 3542 3543 static PetscEvent logkey_GetBrowsOfAcols = 0; 3544 #undef __FUNCT__ 3545 #define __FUNCT__ "MatGetBrowsOfAcols" 3546 /*@C 3547 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3548 3549 Collective on Mat 3550 3551 Input Parameters: 3552 + A,B - the matrices in mpiaij format 3553 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3554 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3555 3556 Output Parameter: 3557 + rowb, colb - index sets of rows and columns of B to extract 3558 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3559 - B_seq - the sequential matrix generated 3560 3561 Level: developer 3562 3563 @*/ 3564 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3565 { 3566 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3567 PetscErrorCode ierr; 3568 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3569 IS isrowb,iscolb; 3570 Mat *bseq; 3571 3572 PetscFunctionBegin; 3573 if (a->cstart != b->rstart || a->cend != b->rend){ 3574 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3575 } 3576 if (!logkey_GetBrowsOfAcols) { 3577 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3578 } 3579 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3580 3581 if (scall == MAT_INITIAL_MATRIX){ 3582 start = a->cstart; 3583 cmap = a->garray; 3584 nzA = a->A->n; 3585 nzB = a->B->n; 3586 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3587 ncols = 0; 3588 for (i=0; i<nzB; i++) { /* row < local row index */ 3589 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3590 else break; 3591 } 3592 imark = i; 3593 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3594 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3595 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3596 ierr = PetscFree(idx);CHKERRQ(ierr); 3597 *brstart = imark; 3598 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3599 } else { 3600 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3601 isrowb = *rowb; iscolb = *colb; 3602 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3603 bseq[0] = *B_seq; 3604 } 3605 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3606 *B_seq = bseq[0]; 3607 ierr = PetscFree(bseq);CHKERRQ(ierr); 3608 if (!rowb){ 3609 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3610 } else { 3611 *rowb = isrowb; 3612 } 3613 if (!colb){ 3614 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3615 } else { 3616 *colb = iscolb; 3617 } 3618 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3619 PetscFunctionReturn(0); 3620 } 3621 3622 static PetscEvent logkey_GetBrowsOfAocols = 0; 3623 #undef __FUNCT__ 3624 #define __FUNCT__ "MatGetBrowsOfAoCols" 3625 /*@C 3626 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3627 of the OFF-DIAGONAL portion of local A 3628 3629 Collective on Mat 3630 3631 Input Parameters: 3632 + A,B - the matrices in mpiaij format 3633 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3634 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3635 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3636 3637 Output Parameter: 3638 + B_oth - the sequential matrix generated 3639 3640 Level: developer 3641 3642 @*/ 3643 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3644 { 3645 VecScatter_MPI_General *gen_to,*gen_from; 3646 PetscErrorCode ierr; 3647 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3648 Mat_SeqAIJ *b_oth; 3649 VecScatter ctx=a->Mvctx; 3650 MPI_Comm comm=ctx->comm; 3651 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3652 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3653 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3654 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3655 MPI_Request *rwaits,*swaits; 3656 MPI_Status *sstatus,rstatus; 3657 PetscInt *cols; 3658 PetscScalar *vals; 3659 PetscMPIInt j; 3660 3661 PetscFunctionBegin; 3662 if (a->cstart != b->rstart || a->cend != b->rend){ 3663 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3664 } 3665 if (!logkey_GetBrowsOfAocols) { 3666 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3667 } 3668 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3669 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3670 3671 gen_to = (VecScatter_MPI_General*)ctx->todata; 3672 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3673 rvalues = gen_from->values; /* holds the length of sending row */ 3674 svalues = gen_to->values; /* holds the length of receiving row */ 3675 nrecvs = gen_from->n; 3676 nsends = gen_to->n; 3677 rwaits = gen_from->requests; 3678 swaits = gen_to->requests; 3679 srow = gen_to->indices; /* local row index to be sent */ 3680 rstarts = gen_from->starts; 3681 sstarts = gen_to->starts; 3682 rprocs = gen_from->procs; 3683 sprocs = gen_to->procs; 3684 sstatus = gen_to->sstatus; 3685 3686 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3687 if (scall == MAT_INITIAL_MATRIX){ 3688 /* i-array */ 3689 /*---------*/ 3690 /* post receives */ 3691 for (i=0; i<nrecvs; i++){ 3692 rowlen = (PetscInt*)rvalues + rstarts[i]; 3693 nrows = rstarts[i+1]-rstarts[i]; 3694 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3695 } 3696 3697 /* pack the outgoing message */ 3698 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3699 rstartsj = sstartsj + nsends +1; 3700 sstartsj[0] = 0; rstartsj[0] = 0; 3701 len = 0; /* total length of j or a array to be sent */ 3702 k = 0; 3703 for (i=0; i<nsends; i++){ 3704 rowlen = (PetscInt*)svalues + sstarts[i]; 3705 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3706 for (j=0; j<nrows; j++) { 3707 row = srow[k] + b->rowners[rank]; /* global row idx */ 3708 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3709 len += rowlen[j]; 3710 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3711 k++; 3712 } 3713 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3714 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3715 } 3716 /* recvs and sends of i-array are completed */ 3717 i = nrecvs; 3718 while (i--) { 3719 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3720 } 3721 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3722 /* allocate buffers for sending j and a arrays */ 3723 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3724 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3725 3726 /* create i-array of B_oth */ 3727 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3728 b_othi[0] = 0; 3729 len = 0; /* total length of j or a array to be received */ 3730 k = 0; 3731 for (i=0; i<nrecvs; i++){ 3732 rowlen = (PetscInt*)rvalues + rstarts[i]; 3733 nrows = rstarts[i+1]-rstarts[i]; 3734 for (j=0; j<nrows; j++) { 3735 b_othi[k+1] = b_othi[k] + rowlen[j]; 3736 len += rowlen[j]; k++; 3737 } 3738 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3739 } 3740 3741 /* allocate space for j and a arrrays of B_oth */ 3742 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3743 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3744 3745 /* j-array */ 3746 /*---------*/ 3747 /* post receives of j-array */ 3748 for (i=0; i<nrecvs; i++){ 3749 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3750 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3751 } 3752 k = 0; 3753 for (i=0; i<nsends; i++){ 3754 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3755 bufJ = bufj+sstartsj[i]; 3756 for (j=0; j<nrows; j++) { 3757 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3758 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3759 for (l=0; l<ncols; l++){ 3760 *bufJ++ = cols[l]; 3761 } 3762 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3763 } 3764 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3765 } 3766 3767 /* recvs and sends of j-array are completed */ 3768 i = nrecvs; 3769 while (i--) { 3770 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3771 } 3772 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3773 } else if (scall == MAT_REUSE_MATRIX){ 3774 sstartsj = *startsj; 3775 rstartsj = sstartsj + nsends +1; 3776 bufa = *bufa_ptr; 3777 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3778 b_otha = b_oth->a; 3779 } else { 3780 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3781 } 3782 3783 /* a-array */ 3784 /*---------*/ 3785 /* post receives of a-array */ 3786 for (i=0; i<nrecvs; i++){ 3787 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3788 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3789 } 3790 k = 0; 3791 for (i=0; i<nsends; i++){ 3792 nrows = sstarts[i+1]-sstarts[i]; 3793 bufA = bufa+sstartsj[i]; 3794 for (j=0; j<nrows; j++) { 3795 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3796 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3797 for (l=0; l<ncols; l++){ 3798 *bufA++ = vals[l]; 3799 } 3800 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3801 3802 } 3803 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3804 } 3805 /* recvs and sends of a-array are completed */ 3806 i = nrecvs; 3807 while (i--) { 3808 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3809 } 3810 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3811 3812 if (scall == MAT_INITIAL_MATRIX){ 3813 /* put together the new matrix */ 3814 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3815 3816 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3817 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3818 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3819 b_oth->freedata = PETSC_TRUE; 3820 b_oth->nonew = 0; 3821 3822 ierr = PetscFree(bufj);CHKERRQ(ierr); 3823 if (!startsj || !bufa_ptr){ 3824 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3825 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3826 } else { 3827 *startsj = sstartsj; 3828 *bufa_ptr = bufa; 3829 } 3830 } 3831 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3832 3833 PetscFunctionReturn(0); 3834 } 3835 3836 #undef __FUNCT__ 3837 #define __FUNCT__ "MatGetCommunicationStructs" 3838 /*@C 3839 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 3840 3841 Not Collective 3842 3843 Input Parameters: 3844 . A - The matrix in mpiaij format 3845 3846 Output Parameter: 3847 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 3848 . colmap - A map from global column index to local index into lvec 3849 - multScatter - A scatter from the argument of a matrix-vector product to lvec 3850 3851 Level: developer 3852 3853 @*/ 3854 #if defined (PETSC_USE_CTABLE) 3855 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 3856 #else 3857 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 3858 #endif 3859 { 3860 Mat_MPIAIJ *a; 3861 3862 PetscFunctionBegin; 3863 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 3864 PetscValidPointer(lvec, 2) 3865 PetscValidPointer(colmap, 3) 3866 PetscValidPointer(multScatter, 4) 3867 a = (Mat_MPIAIJ *) A->data; 3868 if (lvec) *lvec = a->lvec; 3869 if (colmap) *colmap = a->colmap; 3870 if (multScatter) *multScatter = a->Mvctx; 3871 PetscFunctionReturn(0); 3872 } 3873 3874 /*MC 3875 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3876 3877 Options Database Keys: 3878 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3879 3880 Level: beginner 3881 3882 .seealso: MatCreateMPIAIJ 3883 M*/ 3884 3885 EXTERN_C_BEGIN 3886 #undef __FUNCT__ 3887 #define __FUNCT__ "MatCreate_MPIAIJ" 3888 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3889 { 3890 Mat_MPIAIJ *b; 3891 PetscErrorCode ierr; 3892 PetscInt i; 3893 PetscMPIInt size; 3894 3895 PetscFunctionBegin; 3896 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3897 3898 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3899 B->data = (void*)b; 3900 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3901 B->factor = 0; 3902 B->bs = 1; 3903 B->assembled = PETSC_FALSE; 3904 B->mapping = 0; 3905 3906 B->insertmode = NOT_SET_VALUES; 3907 b->size = size; 3908 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3909 3910 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3911 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3912 3913 /* the information in the maps duplicates the information computed below, eventually 3914 we should remove the duplicate information that is not contained in the maps */ 3915 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3916 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3917 3918 /* build local table of row and column ownerships */ 3919 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3920 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3921 b->cowners = b->rowners + b->size + 2; 3922 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3923 b->rowners[0] = 0; 3924 for (i=2; i<=b->size; i++) { 3925 b->rowners[i] += b->rowners[i-1]; 3926 } 3927 b->rstart = b->rowners[b->rank]; 3928 b->rend = b->rowners[b->rank+1]; 3929 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3930 b->cowners[0] = 0; 3931 for (i=2; i<=b->size; i++) { 3932 b->cowners[i] += b->cowners[i-1]; 3933 } 3934 b->cstart = b->cowners[b->rank]; 3935 b->cend = b->cowners[b->rank+1]; 3936 3937 /* build cache for off array entries formed */ 3938 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3939 b->donotstash = PETSC_FALSE; 3940 b->colmap = 0; 3941 b->garray = 0; 3942 b->roworiented = PETSC_TRUE; 3943 3944 /* stuff used for matrix vector multiply */ 3945 b->lvec = PETSC_NULL; 3946 b->Mvctx = PETSC_NULL; 3947 3948 /* stuff for MatGetRow() */ 3949 b->rowindices = 0; 3950 b->rowvalues = 0; 3951 b->getrowactive = PETSC_FALSE; 3952 3953 /* Explicitly create 2 MATSEQAIJ matrices. */ 3954 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3955 ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr); 3956 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3957 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3958 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3959 ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr); 3960 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3961 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3962 3963 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3964 "MatStoreValues_MPIAIJ", 3965 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3966 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3967 "MatRetrieveValues_MPIAIJ", 3968 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3969 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3970 "MatGetDiagonalBlock_MPIAIJ", 3971 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3972 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3973 "MatIsTranspose_MPIAIJ", 3974 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3975 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3976 "MatMPIAIJSetPreallocation_MPIAIJ", 3977 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3978 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3979 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3980 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3981 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3982 "MatDiagonalScaleLocal_MPIAIJ", 3983 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3984 PetscFunctionReturn(0); 3985 } 3986 EXTERN_C_END 3987 3988 /* 3989 Special version for direct calls from Fortran 3990 */ 3991 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3992 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 3993 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3994 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 3995 #endif 3996 3997 /* Change these macros so can be used in void function */ 3998 #undef CHKERRQ 3999 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4000 #undef SETERRQ2 4001 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4002 #undef SETERRQ 4003 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4004 4005 EXTERN_C_BEGIN 4006 #undef __FUNCT__ 4007 #define __FUNCT__ "matsetvaluesmpiaij_" 4008 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv) 4009 { 4010 Mat mat = *mmat; 4011 PetscInt m = *mm, n = *mn; 4012 InsertMode addv = *maddv; 4013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4014 PetscScalar value; 4015 PetscErrorCode ierr; 4016 MatPreallocated(mat); 4017 if (mat->insertmode == NOT_SET_VALUES) { 4018 mat->insertmode = addv; 4019 } 4020 #if defined(PETSC_USE_DEBUG) 4021 else if (mat->insertmode != addv) { 4022 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4023 } 4024 #endif 4025 { 4026 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 4027 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 4028 PetscTruth roworiented = aij->roworiented; 4029 4030 /* Some Variables required in the macro */ 4031 Mat A = aij->A; 4032 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4033 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4034 PetscScalar *aa = a->a; 4035 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4036 Mat B = aij->B; 4037 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4038 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 4039 PetscScalar *ba = b->a; 4040 4041 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4042 PetscInt nonew = a->nonew; 4043 PetscScalar *ap1,*ap2; 4044 4045 PetscFunctionBegin; 4046 for (i=0; i<m; i++) { 4047 if (im[i] < 0) continue; 4048 #if defined(PETSC_USE_DEBUG) 4049 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 4050 #endif 4051 if (im[i] >= rstart && im[i] < rend) { 4052 row = im[i] - rstart; 4053 lastcol1 = -1; 4054 rp1 = aj + ai[row]; 4055 ap1 = aa + ai[row]; 4056 rmax1 = aimax[row]; 4057 nrow1 = ailen[row]; 4058 low1 = 0; 4059 high1 = nrow1; 4060 lastcol2 = -1; 4061 rp2 = bj + bi[row]; 4062 ap2 = ba + bi[row]; 4063 rmax2 = bimax[row]; 4064 nrow2 = bilen[row]; 4065 low2 = 0; 4066 high2 = nrow2; 4067 4068 for (j=0; j<n; j++) { 4069 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4070 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4071 if (in[j] >= cstart && in[j] < cend){ 4072 col = in[j] - cstart; 4073 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4074 } else if (in[j] < 0) continue; 4075 #if defined(PETSC_USE_DEBUG) 4076 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 4077 #endif 4078 else { 4079 if (mat->was_assembled) { 4080 if (!aij->colmap) { 4081 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4082 } 4083 #if defined (PETSC_USE_CTABLE) 4084 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4085 col--; 4086 #else 4087 col = aij->colmap[in[j]] - 1; 4088 #endif 4089 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4090 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4091 col = in[j]; 4092 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4093 B = aij->B; 4094 b = (Mat_SeqAIJ*)B->data; 4095 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4096 rp2 = bj + bi[row]; 4097 ap2 = ba + bi[row]; 4098 rmax2 = bimax[row]; 4099 nrow2 = bilen[row]; 4100 low2 = 0; 4101 high2 = nrow2; 4102 bm = aij->B->m; 4103 ba = b->a; 4104 } 4105 } else col = in[j]; 4106 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4107 } 4108 } 4109 } else { 4110 if (!aij->donotstash) { 4111 if (roworiented) { 4112 if (ignorezeroentries && v[i*n] == 0.0) continue; 4113 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4114 } else { 4115 if (ignorezeroentries && v[i] == 0.0) continue; 4116 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4117 } 4118 } 4119 } 4120 }} 4121 PetscFunctionReturnVoid(); 4122 } 4123 EXTERN_C_END 4124