1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 246 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 324 PetscFunctionBegin; 325 if (!aij->donotstash) { 326 while (1) { 327 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 328 if (!flg) break; 329 330 for (i=0; i<n;) { 331 /* Now identify the consecutive vals belonging to the same row */ 332 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 333 if (j < n) ncols = j-i; 334 else ncols = n-i; 335 /* Now assemble all these values with a single function call */ 336 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 337 i = j; 338 } 339 } 340 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 341 } 342 a->compressedrow.use = PETSC_FALSE; 343 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 345 346 /* determine if any processor has disassembled, if so we must 347 also disassemble ourselfs, in order that we may reassemble. */ 348 /* 349 if nonzero structure of submatrix B cannot change then we know that 350 no processor disassembled thus we can skip this stuff 351 */ 352 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 353 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 354 if (mat->was_assembled && !other_disassembled) { 355 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 356 } 357 } 358 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 359 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 360 } 361 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 362 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 363 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 364 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 365 366 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 367 aij->rowvalues = 0; 368 369 /* used by MatAXPY() */ 370 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 371 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 372 373 PetscFunctionReturn(0); 374 } 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 379 { 380 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 381 PetscErrorCode ierr; 382 383 PetscFunctionBegin; 384 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 385 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatZeroRows_MPIAIJ" 391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 392 { 393 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 394 PetscErrorCode ierr; 395 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 396 PetscInt i,*owners = A->rmap.range; 397 PetscInt *nprocs,j,idx,nsends,row; 398 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 399 PetscInt *rvalues,count,base,slen,*source; 400 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 401 MPI_Comm comm = A->comm; 402 MPI_Request *send_waits,*recv_waits; 403 MPI_Status recv_status,*send_status; 404 #if defined(PETSC_DEBUG) 405 PetscTruth found = PETSC_FALSE; 406 #endif 407 408 PetscFunctionBegin; 409 /* first count number of contributors to each processor */ 410 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 411 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 412 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 413 j = 0; 414 for (i=0; i<N; i++) { 415 if (lastidx > (idx = rows[i])) j = 0; 416 lastidx = idx; 417 for (; j<size; j++) { 418 if (idx >= owners[j] && idx < owners[j+1]) { 419 nprocs[2*j]++; 420 nprocs[2*j+1] = 1; 421 owner[i] = j; 422 #if defined(PETSC_DEBUG) 423 found = PETSC_TRUE; 424 #endif 425 break; 426 } 427 } 428 #if defined(PETSC_DEBUG) 429 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 430 found = PETSC_FALSE; 431 #endif 432 } 433 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 434 435 /* inform other processors of number of messages and max length*/ 436 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 437 438 /* post receives: */ 439 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 440 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 441 for (i=0; i<nrecvs; i++) { 442 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 443 } 444 445 /* do sends: 446 1) starts[i] gives the starting index in svalues for stuff going to 447 the ith processor 448 */ 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 450 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 451 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 452 starts[0] = 0; 453 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 454 for (i=0; i<N; i++) { 455 svalues[starts[owner[i]]++] = rows[i]; 456 } 457 458 starts[0] = 0; 459 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 460 count = 0; 461 for (i=0; i<size; i++) { 462 if (nprocs[2*i+1]) { 463 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscFree(starts);CHKERRQ(ierr); 467 468 base = owners[rank]; 469 470 /* wait on receives */ 471 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 472 source = lens + nrecvs; 473 count = nrecvs; slen = 0; 474 while (count) { 475 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 476 /* unpack receives into our local space */ 477 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 478 source[imdex] = recv_status.MPI_SOURCE; 479 lens[imdex] = n; 480 slen += n; 481 count--; 482 } 483 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 484 485 /* move the data into the send scatter */ 486 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 487 count = 0; 488 for (i=0; i<nrecvs; i++) { 489 values = rvalues + i*nmax; 490 for (j=0; j<lens[i]; j++) { 491 lrows[count++] = values[j] - base; 492 } 493 } 494 ierr = PetscFree(rvalues);CHKERRQ(ierr); 495 ierr = PetscFree(lens);CHKERRQ(ierr); 496 ierr = PetscFree(owner);CHKERRQ(ierr); 497 ierr = PetscFree(nprocs);CHKERRQ(ierr); 498 499 /* actually zap the local rows */ 500 /* 501 Zero the required rows. If the "diagonal block" of the matrix 502 is square and the user wishes to set the diagonal we use separate 503 code so that MatSetValues() is not called for each diagonal allocating 504 new memory, thus calling lots of mallocs and slowing things down. 505 506 Contributed by: Matthew Knepley 507 */ 508 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 509 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 510 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 511 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 512 } else if (diag != 0.0) { 513 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 514 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 515 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 517 } 518 for (i = 0; i < slen; i++) { 519 row = lrows[i] + rstart; 520 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 521 } 522 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 524 } else { 525 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 526 } 527 ierr = PetscFree(lrows);CHKERRQ(ierr); 528 529 /* wait on sends */ 530 if (nsends) { 531 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 532 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 533 ierr = PetscFree(send_status);CHKERRQ(ierr); 534 } 535 ierr = PetscFree(send_waits);CHKERRQ(ierr); 536 ierr = PetscFree(svalues);CHKERRQ(ierr); 537 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatMult_MPIAIJ" 543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 544 { 545 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 546 PetscErrorCode ierr; 547 PetscInt nt; 548 549 PetscFunctionBegin; 550 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 551 if (nt != A->cmap.n) { 552 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 553 } 554 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 555 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 556 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 557 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatMultAdd_MPIAIJ" 563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 564 { 565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 570 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 571 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 572 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 579 { 580 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 581 PetscErrorCode ierr; 582 PetscTruth merged; 583 584 PetscFunctionBegin; 585 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 586 /* do nondiagonal part */ 587 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 588 if (!merged) { 589 /* send it on its way */ 590 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 591 /* do local part */ 592 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 593 /* receive remote parts: note this assumes the values are not actually */ 594 /* added in yy until the next line, */ 595 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 596 } else { 597 /* do local part */ 598 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 599 /* send it on its way */ 600 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 601 /* values actually were received in the Begin() but we need to call this nop */ 602 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 EXTERN_C_BEGIN 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 611 { 612 MPI_Comm comm; 613 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 614 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 615 IS Me,Notme; 616 PetscErrorCode ierr; 617 PetscInt M,N,first,last,*notme,i; 618 PetscMPIInt size; 619 620 PetscFunctionBegin; 621 622 /* Easy test: symmetric diagonal block */ 623 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 624 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 625 if (!*f) PetscFunctionReturn(0); 626 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 628 if (size == 1) PetscFunctionReturn(0); 629 630 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 631 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 632 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 633 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 634 for (i=0; i<first; i++) notme[i] = i; 635 for (i=last; i<M; i++) notme[i-last+first] = i; 636 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 637 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 638 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 639 Aoff = Aoffs[0]; 640 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 641 Boff = Boffs[0]; 642 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 644 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 645 ierr = ISDestroy(Me);CHKERRQ(ierr); 646 ierr = ISDestroy(Notme);CHKERRQ(ierr); 647 648 PetscFunctionReturn(0); 649 } 650 EXTERN_C_END 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 655 { 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 PetscErrorCode ierr; 658 659 PetscFunctionBegin; 660 /* do nondiagonal part */ 661 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 662 /* send it on its way */ 663 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 664 /* do local part */ 665 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 666 /* receive remote parts */ 667 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This only works correctly for square matrices where the subblock A->A is the 673 diagonal block 674 */ 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 678 { 679 PetscErrorCode ierr; 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 682 PetscFunctionBegin; 683 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 684 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 685 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 686 } 687 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatScale_MPIAIJ" 693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 696 PetscErrorCode ierr; 697 698 PetscFunctionBegin; 699 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 700 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatDestroy_MPIAIJ" 706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 709 PetscErrorCode ierr; 710 711 PetscFunctionBegin; 712 #if defined(PETSC_USE_LOG) 713 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 714 #endif 715 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 717 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 718 #if defined (PETSC_USE_CTABLE) 719 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 720 #else 721 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 722 #endif 723 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 724 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 725 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 726 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatView_MPIAIJ_Binary" 741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 744 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 745 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 746 PetscErrorCode ierr; 747 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 748 int fd; 749 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 750 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 751 PetscScalar *column_values; 752 753 PetscFunctionBegin; 754 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 755 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 756 nz = A->nz + B->nz; 757 if (!rank) { 758 header[0] = MAT_FILE_COOKIE; 759 header[1] = mat->rmap.N; 760 header[2] = mat->cmap.N; 761 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 762 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 763 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 764 /* get largest number of rows any processor has */ 765 rlen = mat->rmap.n; 766 range = mat->rmap.range; 767 for (i=1; i<size; i++) { 768 rlen = PetscMax(rlen,range[i+1] - range[i]); 769 } 770 } else { 771 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 772 rlen = mat->rmap.n; 773 } 774 775 /* load up the local row counts */ 776 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 777 for (i=0; i<mat->rmap.n; i++) { 778 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 779 } 780 781 /* store the row lengths to the file */ 782 if (!rank) { 783 MPI_Status status; 784 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 785 for (i=1; i<size; i++) { 786 rlen = range[i+1] - range[i]; 787 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 788 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 789 } 790 } else { 791 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 792 } 793 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 794 795 /* load up the local column indices */ 796 nzmax = nz; /* )th processor needs space a largest processor needs */ 797 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 798 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 799 cnt = 0; 800 for (i=0; i<mat->rmap.n; i++) { 801 for (j=B->i[i]; j<B->i[i+1]; j++) { 802 if ( (col = garray[B->j[j]]) > cstart) break; 803 column_indices[cnt++] = col; 804 } 805 for (k=A->i[i]; k<A->i[i+1]; k++) { 806 column_indices[cnt++] = A->j[k] + cstart; 807 } 808 for (; j<B->i[i+1]; j++) { 809 column_indices[cnt++] = garray[B->j[j]]; 810 } 811 } 812 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 813 814 /* store the column indices to the file */ 815 if (!rank) { 816 MPI_Status status; 817 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 818 for (i=1; i<size; i++) { 819 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 820 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 821 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 823 } 824 } else { 825 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 826 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 } 828 ierr = PetscFree(column_indices);CHKERRQ(ierr); 829 830 /* load up the local column values */ 831 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 832 cnt = 0; 833 for (i=0; i<mat->rmap.n; i++) { 834 for (j=B->i[i]; j<B->i[i+1]; j++) { 835 if ( garray[B->j[j]] > cstart) break; 836 column_values[cnt++] = B->a[j]; 837 } 838 for (k=A->i[i]; k<A->i[i+1]; k++) { 839 column_values[cnt++] = A->a[k]; 840 } 841 for (; j<B->i[i+1]; j++) { 842 column_values[cnt++] = B->a[j]; 843 } 844 } 845 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 846 847 /* store the column values to the file */ 848 if (!rank) { 849 MPI_Status status; 850 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 851 for (i=1; i<size; i++) { 852 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 853 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 854 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 856 } 857 } else { 858 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 859 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 860 } 861 ierr = PetscFree(column_values);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 868 { 869 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 870 PetscErrorCode ierr; 871 PetscMPIInt rank = aij->rank,size = aij->size; 872 PetscTruth isdraw,iascii,isbinary; 873 PetscViewer sviewer; 874 PetscViewerFormat format; 875 876 PetscFunctionBegin; 877 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 880 if (iascii) { 881 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 882 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 883 MatInfo info; 884 PetscTruth inodes; 885 886 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 887 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 889 if (!inodes) { 890 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 891 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 892 } else { 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 894 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 895 } 896 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 898 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 900 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 901 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } else if (format == PETSC_VIEWER_ASCII_INFO) { 904 PetscInt inodecount,inodelimit,*inodes; 905 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 906 if (inodes) { 907 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 908 } else { 909 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 910 } 911 PetscFunctionReturn(0); 912 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 913 PetscFunctionReturn(0); 914 } 915 } else if (isbinary) { 916 if (size == 1) { 917 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 918 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 919 } else { 920 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 921 } 922 PetscFunctionReturn(0); 923 } else if (isdraw) { 924 PetscDraw draw; 925 PetscTruth isnull; 926 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 927 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 928 } 929 930 if (size == 1) { 931 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 932 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 933 } else { 934 /* assemble the entire matrix onto first processor. */ 935 Mat A; 936 Mat_SeqAIJ *Aloc; 937 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 938 PetscScalar *a; 939 940 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 941 if (!rank) { 942 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 943 } else { 944 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 945 } 946 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 947 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 948 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 949 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 950 951 /* copy over the A part */ 952 Aloc = (Mat_SeqAIJ*)aij->A->data; 953 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 954 row = mat->rmap.rstart; 955 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 956 for (i=0; i<m; i++) { 957 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 958 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 959 } 960 aj = Aloc->j; 961 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 962 963 /* copy over the B part */ 964 Aloc = (Mat_SeqAIJ*)aij->B->data; 965 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 966 row = mat->rmap.rstart; 967 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 968 ct = cols; 969 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 970 for (i=0; i<m; i++) { 971 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 972 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 973 } 974 ierr = PetscFree(ct);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 /* 978 Everyone has to call to draw the matrix since the graphics waits are 979 synchronized across all processors that share the PetscDraw object 980 */ 981 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 982 if (!rank) { 983 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 984 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 985 } 986 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 ierr = MatDestroy(A);CHKERRQ(ierr); 988 } 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatView_MPIAIJ" 994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 995 { 996 PetscErrorCode ierr; 997 PetscTruth iascii,isdraw,issocket,isbinary; 998 999 PetscFunctionBegin; 1000 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1004 if (iascii || isdraw || isbinary || issocket) { 1005 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1006 } else { 1007 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatRelax_MPIAIJ" 1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscErrorCode ierr; 1020 Vec bb1; 1021 1022 PetscFunctionBegin; 1023 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1024 1025 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1026 1027 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1028 if (flag & SOR_ZERO_INITIAL_GUESS) { 1029 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1030 its--; 1031 } 1032 1033 while (its--) { 1034 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1036 1037 /* update rhs: bb1 = bb - B*x */ 1038 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1039 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1040 1041 /* local sweep */ 1042 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1043 CHKERRQ(ierr); 1044 } 1045 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1046 if (flag & SOR_ZERO_INITIAL_GUESS) { 1047 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1048 its--; 1049 } 1050 while (its--) { 1051 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 1054 /* update rhs: bb1 = bb - B*x */ 1055 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1056 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1057 1058 /* local sweep */ 1059 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1060 CHKERRQ(ierr); 1061 } 1062 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1063 if (flag & SOR_ZERO_INITIAL_GUESS) { 1064 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1065 its--; 1066 } 1067 while (its--) { 1068 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 1071 /* update rhs: bb1 = bb - B*x */ 1072 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1073 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1074 1075 /* local sweep */ 1076 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1077 CHKERRQ(ierr); 1078 } 1079 } else { 1080 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1081 } 1082 1083 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatPermute_MPIAIJ" 1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1090 { 1091 MPI_Comm comm,pcomm; 1092 PetscInt first,local_size,nrows,*rows; 1093 int ntids; 1094 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1099 /* make a collective version of 'rowp' */ 1100 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1101 if (pcomm==comm) { 1102 crowp = rowp; 1103 } else { 1104 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1105 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1107 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1108 } 1109 /* collect the global row permutation and invert it */ 1110 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1111 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1112 if (pcomm!=comm) { 1113 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1114 } 1115 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1116 /* get the local target indices */ 1117 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1118 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1119 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1121 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1122 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1123 /* the column permutation is so much easier; 1124 make a local version of 'colp' and invert it */ 1125 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1126 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1127 if (ntids==1) { 1128 lcolp = colp; 1129 } else { 1130 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1131 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1132 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1133 } 1134 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1135 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1136 if (ntids>1) { 1137 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1138 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1139 } 1140 /* now we just get the submatrix */ 1141 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1142 /* clean up */ 1143 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1144 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1151 { 1152 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1153 Mat A = mat->A,B = mat->B; 1154 PetscErrorCode ierr; 1155 PetscReal isend[5],irecv[5]; 1156 1157 PetscFunctionBegin; 1158 info->block_size = 1.0; 1159 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1160 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1161 isend[3] = info->memory; isend[4] = info->mallocs; 1162 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1163 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1164 isend[3] += info->memory; isend[4] += info->mallocs; 1165 if (flag == MAT_LOCAL) { 1166 info->nz_used = isend[0]; 1167 info->nz_allocated = isend[1]; 1168 info->nz_unneeded = isend[2]; 1169 info->memory = isend[3]; 1170 info->mallocs = isend[4]; 1171 } else if (flag == MAT_GLOBAL_MAX) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else if (flag == MAT_GLOBAL_SUM) { 1179 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1180 info->nz_used = irecv[0]; 1181 info->nz_allocated = irecv[1]; 1182 info->nz_unneeded = irecv[2]; 1183 info->memory = irecv[3]; 1184 info->mallocs = irecv[4]; 1185 } 1186 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1187 info->fill_ratio_needed = 0; 1188 info->factor_mallocs = 0; 1189 info->rows_global = (double)matin->rmap.N; 1190 info->columns_global = (double)matin->cmap.N; 1191 info->rows_local = (double)matin->rmap.n; 1192 info->columns_local = (double)matin->cmap.N; 1193 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatSetOption_MPIAIJ" 1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1200 { 1201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 switch (op) { 1206 case MAT_NO_NEW_NONZERO_LOCATIONS: 1207 case MAT_YES_NEW_NONZERO_LOCATIONS: 1208 case MAT_COLUMNS_UNSORTED: 1209 case MAT_COLUMNS_SORTED: 1210 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1211 case MAT_KEEP_ZEROED_ROWS: 1212 case MAT_NEW_NONZERO_LOCATION_ERR: 1213 case MAT_USE_INODES: 1214 case MAT_DO_NOT_USE_INODES: 1215 case MAT_IGNORE_ZERO_ENTRIES: 1216 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1217 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1218 break; 1219 case MAT_ROW_ORIENTED: 1220 a->roworiented = PETSC_TRUE; 1221 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1222 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1223 break; 1224 case MAT_ROWS_SORTED: 1225 case MAT_ROWS_UNSORTED: 1226 case MAT_YES_NEW_DIAGONALS: 1227 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1228 break; 1229 case MAT_COLUMN_ORIENTED: 1230 a->roworiented = PETSC_FALSE; 1231 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1232 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1233 break; 1234 case MAT_IGNORE_OFF_PROC_ENTRIES: 1235 a->donotstash = PETSC_TRUE; 1236 break; 1237 case MAT_NO_NEW_DIAGONALS: 1238 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1239 case MAT_SYMMETRIC: 1240 case MAT_STRUCTURALLY_SYMMETRIC: 1241 case MAT_HERMITIAN: 1242 case MAT_SYMMETRY_ETERNAL: 1243 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1244 break; 1245 case MAT_NOT_SYMMETRIC: 1246 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1247 case MAT_NOT_HERMITIAN: 1248 case MAT_NOT_SYMMETRY_ETERNAL: 1249 break; 1250 default: 1251 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1252 } 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatGetRow_MPIAIJ" 1258 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1259 { 1260 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1261 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1262 PetscErrorCode ierr; 1263 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1264 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1265 PetscInt *cmap,*idx_p; 1266 1267 PetscFunctionBegin; 1268 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1269 mat->getrowactive = PETSC_TRUE; 1270 1271 if (!mat->rowvalues && (idx || v)) { 1272 /* 1273 allocate enough space to hold information from the longest row. 1274 */ 1275 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1276 PetscInt max = 1,tmp; 1277 for (i=0; i<matin->rmap.n; i++) { 1278 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1279 if (max < tmp) { max = tmp; } 1280 } 1281 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1282 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1283 } 1284 1285 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1286 lrow = row - rstart; 1287 1288 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1289 if (!v) {pvA = 0; pvB = 0;} 1290 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1291 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1292 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1293 nztot = nzA + nzB; 1294 1295 cmap = mat->garray; 1296 if (v || idx) { 1297 if (nztot) { 1298 /* Sort by increasing column numbers, assuming A and B already sorted */ 1299 PetscInt imark = -1; 1300 if (v) { 1301 *v = v_p = mat->rowvalues; 1302 for (i=0; i<nzB; i++) { 1303 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1304 else break; 1305 } 1306 imark = i; 1307 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1308 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1309 } 1310 if (idx) { 1311 *idx = idx_p = mat->rowindices; 1312 if (imark > -1) { 1313 for (i=0; i<imark; i++) { 1314 idx_p[i] = cmap[cworkB[i]]; 1315 } 1316 } else { 1317 for (i=0; i<nzB; i++) { 1318 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1319 else break; 1320 } 1321 imark = i; 1322 } 1323 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1324 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1325 } 1326 } else { 1327 if (idx) *idx = 0; 1328 if (v) *v = 0; 1329 } 1330 } 1331 *nz = nztot; 1332 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1333 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1339 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1340 { 1341 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1342 1343 PetscFunctionBegin; 1344 if (!aij->getrowactive) { 1345 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1346 } 1347 aij->getrowactive = PETSC_FALSE; 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatNorm_MPIAIJ" 1353 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1354 { 1355 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1356 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1357 PetscErrorCode ierr; 1358 PetscInt i,j,cstart = mat->cmap.rstart; 1359 PetscReal sum = 0.0; 1360 PetscScalar *v; 1361 1362 PetscFunctionBegin; 1363 if (aij->size == 1) { 1364 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1365 } else { 1366 if (type == NORM_FROBENIUS) { 1367 v = amat->a; 1368 for (i=0; i<amat->nz; i++) { 1369 #if defined(PETSC_USE_COMPLEX) 1370 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1371 #else 1372 sum += (*v)*(*v); v++; 1373 #endif 1374 } 1375 v = bmat->a; 1376 for (i=0; i<bmat->nz; i++) { 1377 #if defined(PETSC_USE_COMPLEX) 1378 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1379 #else 1380 sum += (*v)*(*v); v++; 1381 #endif 1382 } 1383 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1384 *norm = sqrt(*norm); 1385 } else if (type == NORM_1) { /* max column norm */ 1386 PetscReal *tmp,*tmp2; 1387 PetscInt *jj,*garray = aij->garray; 1388 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1389 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1390 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1391 *norm = 0.0; 1392 v = amat->a; jj = amat->j; 1393 for (j=0; j<amat->nz; j++) { 1394 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1395 } 1396 v = bmat->a; jj = bmat->j; 1397 for (j=0; j<bmat->nz; j++) { 1398 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1399 } 1400 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1401 for (j=0; j<mat->cmap.N; j++) { 1402 if (tmp2[j] > *norm) *norm = tmp2[j]; 1403 } 1404 ierr = PetscFree(tmp);CHKERRQ(ierr); 1405 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1406 } else if (type == NORM_INFINITY) { /* max row norm */ 1407 PetscReal ntemp = 0.0; 1408 for (j=0; j<aij->A->rmap.n; j++) { 1409 v = amat->a + amat->i[j]; 1410 sum = 0.0; 1411 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1412 sum += PetscAbsScalar(*v); v++; 1413 } 1414 v = bmat->a + bmat->i[j]; 1415 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1416 sum += PetscAbsScalar(*v); v++; 1417 } 1418 if (sum > ntemp) ntemp = sum; 1419 } 1420 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1421 } else { 1422 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1423 } 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatTranspose_MPIAIJ" 1430 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1431 { 1432 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1433 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1434 PetscErrorCode ierr; 1435 PetscInt M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1436 Mat B; 1437 PetscScalar *array; 1438 1439 PetscFunctionBegin; 1440 if (!matout && M != N) { 1441 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1442 } 1443 1444 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1445 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1446 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1447 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1448 1449 /* copy over the A part */ 1450 Aloc = (Mat_SeqAIJ*)a->A->data; 1451 m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1452 row = A->rmap.rstart; 1453 for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;} 1454 for (i=0; i<m; i++) { 1455 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1456 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1457 } 1458 aj = Aloc->j; 1459 for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;} 1460 1461 /* copy over the B part */ 1462 Aloc = (Mat_SeqAIJ*)a->B->data; 1463 m = a->B->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1464 row = A->rmap.rstart; 1465 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1466 ct = cols; 1467 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1468 for (i=0; i<m; i++) { 1469 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1470 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1471 } 1472 ierr = PetscFree(ct);CHKERRQ(ierr); 1473 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1474 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1475 if (matout) { 1476 *matout = B; 1477 } else { 1478 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1479 } 1480 PetscFunctionReturn(0); 1481 } 1482 1483 #undef __FUNCT__ 1484 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1485 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1486 { 1487 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1488 Mat a = aij->A,b = aij->B; 1489 PetscErrorCode ierr; 1490 PetscInt s1,s2,s3; 1491 1492 PetscFunctionBegin; 1493 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1494 if (rr) { 1495 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1496 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1497 /* Overlap communication with computation. */ 1498 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1499 } 1500 if (ll) { 1501 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1502 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1503 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1504 } 1505 /* scale the diagonal block */ 1506 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1507 1508 if (rr) { 1509 /* Do a scatter end and then right scale the off-diagonal block */ 1510 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1511 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1512 } 1513 1514 PetscFunctionReturn(0); 1515 } 1516 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1520 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1521 { 1522 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1523 PetscErrorCode ierr; 1524 1525 PetscFunctionBegin; 1526 if (!a->rank) { 1527 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1528 } 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1534 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1535 { 1536 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1537 PetscErrorCode ierr; 1538 1539 PetscFunctionBegin; 1540 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1541 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1542 PetscFunctionReturn(0); 1543 } 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1546 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1547 { 1548 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1549 PetscErrorCode ierr; 1550 1551 PetscFunctionBegin; 1552 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatEqual_MPIAIJ" 1558 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1559 { 1560 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1561 Mat a,b,c,d; 1562 PetscTruth flg; 1563 PetscErrorCode ierr; 1564 1565 PetscFunctionBegin; 1566 a = matA->A; b = matA->B; 1567 c = matB->A; d = matB->B; 1568 1569 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1570 if (flg) { 1571 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1572 } 1573 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1574 PetscFunctionReturn(0); 1575 } 1576 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatCopy_MPIAIJ" 1579 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1580 { 1581 PetscErrorCode ierr; 1582 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1583 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1584 1585 PetscFunctionBegin; 1586 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1587 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1588 /* because of the column compression in the off-processor part of the matrix a->B, 1589 the number of columns in a->B and b->B may be different, hence we cannot call 1590 the MatCopy() directly on the two parts. If need be, we can provide a more 1591 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1592 then copying the submatrices */ 1593 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1594 } else { 1595 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1596 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1597 } 1598 PetscFunctionReturn(0); 1599 } 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1603 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1604 { 1605 PetscErrorCode ierr; 1606 1607 PetscFunctionBegin; 1608 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1609 PetscFunctionReturn(0); 1610 } 1611 1612 #include "petscblaslapack.h" 1613 #undef __FUNCT__ 1614 #define __FUNCT__ "MatAXPY_MPIAIJ" 1615 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1616 { 1617 PetscErrorCode ierr; 1618 PetscInt i; 1619 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1620 PetscBLASInt bnz,one=1; 1621 Mat_SeqAIJ *x,*y; 1622 1623 PetscFunctionBegin; 1624 if (str == SAME_NONZERO_PATTERN) { 1625 PetscScalar alpha = a; 1626 x = (Mat_SeqAIJ *)xx->A->data; 1627 y = (Mat_SeqAIJ *)yy->A->data; 1628 bnz = (PetscBLASInt)x->nz; 1629 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1630 x = (Mat_SeqAIJ *)xx->B->data; 1631 y = (Mat_SeqAIJ *)yy->B->data; 1632 bnz = (PetscBLASInt)x->nz; 1633 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1634 } else if (str == SUBSET_NONZERO_PATTERN) { 1635 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1636 1637 x = (Mat_SeqAIJ *)xx->B->data; 1638 y = (Mat_SeqAIJ *)yy->B->data; 1639 if (y->xtoy && y->XtoY != xx->B) { 1640 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1641 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1642 } 1643 if (!y->xtoy) { /* get xtoy */ 1644 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1645 y->XtoY = xx->B; 1646 } 1647 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1648 } else { 1649 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1650 } 1651 PetscFunctionReturn(0); 1652 } 1653 1654 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "MatConjugate_MPIAIJ" 1658 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1659 { 1660 #if defined(PETSC_USE_COMPLEX) 1661 PetscErrorCode ierr; 1662 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1663 1664 PetscFunctionBegin; 1665 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1666 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1667 #else 1668 PetscFunctionBegin; 1669 #endif 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatRealPart_MPIAIJ" 1675 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1676 { 1677 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1678 PetscErrorCode ierr; 1679 1680 PetscFunctionBegin; 1681 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1682 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1688 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1689 { 1690 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1691 PetscErrorCode ierr; 1692 1693 PetscFunctionBegin; 1694 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1695 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1696 PetscFunctionReturn(0); 1697 } 1698 1699 /* -------------------------------------------------------------------*/ 1700 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1701 MatGetRow_MPIAIJ, 1702 MatRestoreRow_MPIAIJ, 1703 MatMult_MPIAIJ, 1704 /* 4*/ MatMultAdd_MPIAIJ, 1705 MatMultTranspose_MPIAIJ, 1706 MatMultTransposeAdd_MPIAIJ, 1707 0, 1708 0, 1709 0, 1710 /*10*/ 0, 1711 0, 1712 0, 1713 MatRelax_MPIAIJ, 1714 MatTranspose_MPIAIJ, 1715 /*15*/ MatGetInfo_MPIAIJ, 1716 MatEqual_MPIAIJ, 1717 MatGetDiagonal_MPIAIJ, 1718 MatDiagonalScale_MPIAIJ, 1719 MatNorm_MPIAIJ, 1720 /*20*/ MatAssemblyBegin_MPIAIJ, 1721 MatAssemblyEnd_MPIAIJ, 1722 0, 1723 MatSetOption_MPIAIJ, 1724 MatZeroEntries_MPIAIJ, 1725 /*25*/ MatZeroRows_MPIAIJ, 1726 0, 1727 0, 1728 0, 1729 0, 1730 /*30*/ MatSetUpPreallocation_MPIAIJ, 1731 0, 1732 0, 1733 0, 1734 0, 1735 /*35*/ MatDuplicate_MPIAIJ, 1736 0, 1737 0, 1738 0, 1739 0, 1740 /*40*/ MatAXPY_MPIAIJ, 1741 MatGetSubMatrices_MPIAIJ, 1742 MatIncreaseOverlap_MPIAIJ, 1743 MatGetValues_MPIAIJ, 1744 MatCopy_MPIAIJ, 1745 /*45*/ MatPrintHelp_MPIAIJ, 1746 MatScale_MPIAIJ, 1747 0, 1748 0, 1749 0, 1750 /*50*/ MatSetBlockSize_MPIAIJ, 1751 0, 1752 0, 1753 0, 1754 0, 1755 /*55*/ MatFDColoringCreate_MPIAIJ, 1756 0, 1757 MatSetUnfactored_MPIAIJ, 1758 MatPermute_MPIAIJ, 1759 0, 1760 /*60*/ MatGetSubMatrix_MPIAIJ, 1761 MatDestroy_MPIAIJ, 1762 MatView_MPIAIJ, 1763 0, 1764 0, 1765 /*65*/ 0, 1766 0, 1767 0, 1768 0, 1769 0, 1770 /*70*/ 0, 1771 0, 1772 MatSetColoring_MPIAIJ, 1773 #if defined(PETSC_HAVE_ADIC) 1774 MatSetValuesAdic_MPIAIJ, 1775 #else 1776 0, 1777 #endif 1778 MatSetValuesAdifor_MPIAIJ, 1779 /*75*/ 0, 1780 0, 1781 0, 1782 0, 1783 0, 1784 /*80*/ 0, 1785 0, 1786 0, 1787 0, 1788 /*84*/ MatLoad_MPIAIJ, 1789 0, 1790 0, 1791 0, 1792 0, 1793 0, 1794 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1795 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1796 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1797 MatPtAP_Basic, 1798 MatPtAPSymbolic_MPIAIJ, 1799 /*95*/ MatPtAPNumeric_MPIAIJ, 1800 0, 1801 0, 1802 0, 1803 0, 1804 /*100*/0, 1805 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1806 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1807 MatConjugate_MPIAIJ, 1808 0, 1809 /*105*/MatSetValuesRow_MPIAIJ, 1810 MatRealPart_MPIAIJ, 1811 MatImaginaryPart_MPIAIJ}; 1812 1813 /* ----------------------------------------------------------------------------------------*/ 1814 1815 EXTERN_C_BEGIN 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1818 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1819 { 1820 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1821 PetscErrorCode ierr; 1822 1823 PetscFunctionBegin; 1824 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1825 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 EXTERN_C_END 1829 1830 EXTERN_C_BEGIN 1831 #undef __FUNCT__ 1832 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1833 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1834 { 1835 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1836 PetscErrorCode ierr; 1837 1838 PetscFunctionBegin; 1839 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1840 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1841 PetscFunctionReturn(0); 1842 } 1843 EXTERN_C_END 1844 1845 #include "petscpc.h" 1846 EXTERN_C_BEGIN 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1849 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1850 { 1851 Mat_MPIAIJ *b; 1852 PetscErrorCode ierr; 1853 PetscInt i; 1854 1855 PetscFunctionBegin; 1856 B->preallocated = PETSC_TRUE; 1857 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1858 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1859 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1860 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1861 1862 B->rmap.bs = B->cmap.bs = 1; 1863 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1864 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1865 if (d_nnz) { 1866 for (i=0; i<B->rmap.n; i++) { 1867 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1868 } 1869 } 1870 if (o_nnz) { 1871 for (i=0; i<B->rmap.n; i++) { 1872 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1873 } 1874 } 1875 b = (Mat_MPIAIJ*)B->data; 1876 1877 /* Explicitly create 2 MATSEQAIJ matrices. */ 1878 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1879 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1880 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1881 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1882 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1883 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1884 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1885 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1886 1887 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1888 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1889 1890 PetscFunctionReturn(0); 1891 } 1892 EXTERN_C_END 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1896 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1897 { 1898 Mat mat; 1899 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1900 PetscErrorCode ierr; 1901 1902 PetscFunctionBegin; 1903 *newmat = 0; 1904 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1905 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 1906 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1907 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1908 a = (Mat_MPIAIJ*)mat->data; 1909 1910 mat->factor = matin->factor; 1911 mat->rmap.bs = matin->rmap.bs; 1912 mat->assembled = PETSC_TRUE; 1913 mat->insertmode = NOT_SET_VALUES; 1914 mat->preallocated = PETSC_TRUE; 1915 1916 a->size = oldmat->size; 1917 a->rank = oldmat->rank; 1918 a->donotstash = oldmat->donotstash; 1919 a->roworiented = oldmat->roworiented; 1920 a->rowindices = 0; 1921 a->rowvalues = 0; 1922 a->getrowactive = PETSC_FALSE; 1923 1924 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 1925 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 1926 1927 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1928 if (oldmat->colmap) { 1929 #if defined (PETSC_USE_CTABLE) 1930 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1931 #else 1932 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1933 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 1934 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 1935 #endif 1936 } else a->colmap = 0; 1937 if (oldmat->garray) { 1938 PetscInt len; 1939 len = oldmat->B->cmap.n; 1940 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1941 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1942 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1943 } else a->garray = 0; 1944 1945 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1946 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1947 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1948 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1949 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1950 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1951 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1952 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1953 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1954 *newmat = mat; 1955 PetscFunctionReturn(0); 1956 } 1957 1958 #include "petscsys.h" 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatLoad_MPIAIJ" 1962 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1963 { 1964 Mat A; 1965 PetscScalar *vals,*svals; 1966 MPI_Comm comm = ((PetscObject)viewer)->comm; 1967 MPI_Status status; 1968 PetscErrorCode ierr; 1969 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1970 PetscInt i,nz,j,rstart,rend,mmax; 1971 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1972 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1973 PetscInt cend,cstart,n,*rowners; 1974 int fd; 1975 1976 PetscFunctionBegin; 1977 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1978 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1979 if (!rank) { 1980 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1981 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1982 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1983 } 1984 1985 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1986 M = header[1]; N = header[2]; 1987 /* determine ownership of all rows */ 1988 m = M/size + ((M % size) > rank); 1989 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1990 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1991 1992 /* First process needs enough room for process with most rows */ 1993 if (!rank) { 1994 mmax = rowners[1]; 1995 for (i=2; i<size; i++) { 1996 mmax = PetscMax(mmax,rowners[i]); 1997 } 1998 } else mmax = m; 1999 2000 rowners[0] = 0; 2001 for (i=2; i<=size; i++) { 2002 mmax = PetscMax(mmax,rowners[i]); 2003 rowners[i] += rowners[i-1]; 2004 } 2005 rstart = rowners[rank]; 2006 rend = rowners[rank+1]; 2007 2008 /* distribute row lengths to all processors */ 2009 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2010 if (!rank) { 2011 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2012 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2013 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2014 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2015 for (j=0; j<m; j++) { 2016 procsnz[0] += ourlens[j]; 2017 } 2018 for (i=1; i<size; i++) { 2019 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2020 /* calculate the number of nonzeros on each processor */ 2021 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2022 procsnz[i] += rowlengths[j]; 2023 } 2024 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2025 } 2026 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2027 } else { 2028 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2029 } 2030 2031 if (!rank) { 2032 /* determine max buffer needed and allocate it */ 2033 maxnz = 0; 2034 for (i=0; i<size; i++) { 2035 maxnz = PetscMax(maxnz,procsnz[i]); 2036 } 2037 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2038 2039 /* read in my part of the matrix column indices */ 2040 nz = procsnz[0]; 2041 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2042 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2043 2044 /* read in every one elses and ship off */ 2045 for (i=1; i<size; i++) { 2046 nz = procsnz[i]; 2047 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2048 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2049 } 2050 ierr = PetscFree(cols);CHKERRQ(ierr); 2051 } else { 2052 /* determine buffer space needed for message */ 2053 nz = 0; 2054 for (i=0; i<m; i++) { 2055 nz += ourlens[i]; 2056 } 2057 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2058 2059 /* receive message of column indices*/ 2060 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2061 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2062 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2063 } 2064 2065 /* determine column ownership if matrix is not square */ 2066 if (N != M) { 2067 n = N/size + ((N % size) > rank); 2068 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2069 cstart = cend - n; 2070 } else { 2071 cstart = rstart; 2072 cend = rend; 2073 n = cend - cstart; 2074 } 2075 2076 /* loop over local rows, determining number of off diagonal entries */ 2077 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2078 jj = 0; 2079 for (i=0; i<m; i++) { 2080 for (j=0; j<ourlens[i]; j++) { 2081 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2082 jj++; 2083 } 2084 } 2085 2086 /* create our matrix */ 2087 for (i=0; i<m; i++) { 2088 ourlens[i] -= offlens[i]; 2089 } 2090 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2091 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2092 ierr = MatSetType(A,type);CHKERRQ(ierr); 2093 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2094 2095 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2096 for (i=0; i<m; i++) { 2097 ourlens[i] += offlens[i]; 2098 } 2099 2100 if (!rank) { 2101 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2102 2103 /* read in my part of the matrix numerical values */ 2104 nz = procsnz[0]; 2105 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2106 2107 /* insert into matrix */ 2108 jj = rstart; 2109 smycols = mycols; 2110 svals = vals; 2111 for (i=0; i<m; i++) { 2112 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2113 smycols += ourlens[i]; 2114 svals += ourlens[i]; 2115 jj++; 2116 } 2117 2118 /* read in other processors and ship out */ 2119 for (i=1; i<size; i++) { 2120 nz = procsnz[i]; 2121 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2122 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2123 } 2124 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2125 } else { 2126 /* receive numeric values */ 2127 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2128 2129 /* receive message of values*/ 2130 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2131 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2132 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2133 2134 /* insert into matrix */ 2135 jj = rstart; 2136 smycols = mycols; 2137 svals = vals; 2138 for (i=0; i<m; i++) { 2139 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2140 smycols += ourlens[i]; 2141 svals += ourlens[i]; 2142 jj++; 2143 } 2144 } 2145 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2146 ierr = PetscFree(vals);CHKERRQ(ierr); 2147 ierr = PetscFree(mycols);CHKERRQ(ierr); 2148 ierr = PetscFree(rowners);CHKERRQ(ierr); 2149 2150 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2151 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2152 *newmat = A; 2153 PetscFunctionReturn(0); 2154 } 2155 2156 #undef __FUNCT__ 2157 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2158 /* 2159 Not great since it makes two copies of the submatrix, first an SeqAIJ 2160 in local and then by concatenating the local matrices the end result. 2161 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2162 */ 2163 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2164 { 2165 PetscErrorCode ierr; 2166 PetscMPIInt rank,size; 2167 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2168 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2169 Mat *local,M,Mreuse; 2170 PetscScalar *vwork,*aa; 2171 MPI_Comm comm = mat->comm; 2172 Mat_SeqAIJ *aij; 2173 2174 2175 PetscFunctionBegin; 2176 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2177 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2178 2179 if (call == MAT_REUSE_MATRIX) { 2180 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2181 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2182 local = &Mreuse; 2183 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2184 } else { 2185 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2186 Mreuse = *local; 2187 ierr = PetscFree(local);CHKERRQ(ierr); 2188 } 2189 2190 /* 2191 m - number of local rows 2192 n - number of columns (same on all processors) 2193 rstart - first row in new global matrix generated 2194 */ 2195 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2196 if (call == MAT_INITIAL_MATRIX) { 2197 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2198 ii = aij->i; 2199 jj = aij->j; 2200 2201 /* 2202 Determine the number of non-zeros in the diagonal and off-diagonal 2203 portions of the matrix in order to do correct preallocation 2204 */ 2205 2206 /* first get start and end of "diagonal" columns */ 2207 if (csize == PETSC_DECIDE) { 2208 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2209 if (mglobal == n) { /* square matrix */ 2210 nlocal = m; 2211 } else { 2212 nlocal = n/size + ((n % size) > rank); 2213 } 2214 } else { 2215 nlocal = csize; 2216 } 2217 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2218 rstart = rend - nlocal; 2219 if (rank == size - 1 && rend != n) { 2220 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2221 } 2222 2223 /* next, compute all the lengths */ 2224 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2225 olens = dlens + m; 2226 for (i=0; i<m; i++) { 2227 jend = ii[i+1] - ii[i]; 2228 olen = 0; 2229 dlen = 0; 2230 for (j=0; j<jend; j++) { 2231 if (*jj < rstart || *jj >= rend) olen++; 2232 else dlen++; 2233 jj++; 2234 } 2235 olens[i] = olen; 2236 dlens[i] = dlen; 2237 } 2238 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2239 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2240 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2241 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2242 ierr = PetscFree(dlens);CHKERRQ(ierr); 2243 } else { 2244 PetscInt ml,nl; 2245 2246 M = *newmat; 2247 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2248 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2249 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2250 /* 2251 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2252 rather than the slower MatSetValues(). 2253 */ 2254 M->was_assembled = PETSC_TRUE; 2255 M->assembled = PETSC_FALSE; 2256 } 2257 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2258 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2259 ii = aij->i; 2260 jj = aij->j; 2261 aa = aij->a; 2262 for (i=0; i<m; i++) { 2263 row = rstart + i; 2264 nz = ii[i+1] - ii[i]; 2265 cwork = jj; jj += nz; 2266 vwork = aa; aa += nz; 2267 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2268 } 2269 2270 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2271 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2272 *newmat = M; 2273 2274 /* save submatrix used in processor for next request */ 2275 if (call == MAT_INITIAL_MATRIX) { 2276 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2277 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2278 } 2279 2280 PetscFunctionReturn(0); 2281 } 2282 2283 EXTERN_C_BEGIN 2284 #undef __FUNCT__ 2285 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2286 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2287 { 2288 PetscInt m,cstart, cend,j,nnz,i,d; 2289 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2290 const PetscInt *JJ; 2291 PetscScalar *values; 2292 PetscErrorCode ierr; 2293 2294 PetscFunctionBegin; 2295 if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]); 2296 2297 B->rmap.bs = B->cmap.bs = 1; 2298 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2299 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2300 m = B->rmap.n; 2301 cstart = B->cmap.rstart; 2302 cend = B->cmap.rend; 2303 rstart = B->rmap.rstart; 2304 2305 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2306 o_nnz = d_nnz + m; 2307 2308 for (i=0; i<m; i++) { 2309 nnz = I[i+1]- I[i]; 2310 JJ = J + I[i]; 2311 nnz_max = PetscMax(nnz_max,nnz); 2312 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2313 for (j=0; j<nnz; j++) { 2314 if (*JJ >= cstart) break; 2315 JJ++; 2316 } 2317 d = 0; 2318 for (; j<nnz; j++) { 2319 if (*JJ++ >= cend) break; 2320 d++; 2321 } 2322 d_nnz[i] = d; 2323 o_nnz[i] = nnz - d; 2324 } 2325 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2326 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2327 2328 if (v) values = (PetscScalar*)v; 2329 else { 2330 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2331 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2332 } 2333 2334 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2335 for (i=0; i<m; i++) { 2336 ii = i + rstart; 2337 nnz = I[i+1]- I[i]; 2338 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2339 } 2340 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2341 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2342 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2343 2344 if (!v) { 2345 ierr = PetscFree(values);CHKERRQ(ierr); 2346 } 2347 PetscFunctionReturn(0); 2348 } 2349 EXTERN_C_END 2350 2351 #undef __FUNCT__ 2352 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2353 /*@ 2354 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2355 (the default parallel PETSc format). 2356 2357 Collective on MPI_Comm 2358 2359 Input Parameters: 2360 + B - the matrix 2361 . i - the indices into j for the start of each local row (starts with zero) 2362 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2363 - v - optional values in the matrix 2364 2365 Level: developer 2366 2367 .keywords: matrix, aij, compressed row, sparse, parallel 2368 2369 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2370 @*/ 2371 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2372 { 2373 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2374 2375 PetscFunctionBegin; 2376 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2377 if (f) { 2378 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2379 } 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2385 /*@C 2386 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2387 (the default parallel PETSc format). For good matrix assembly performance 2388 the user should preallocate the matrix storage by setting the parameters 2389 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2390 performance can be increased by more than a factor of 50. 2391 2392 Collective on MPI_Comm 2393 2394 Input Parameters: 2395 + A - the matrix 2396 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2397 (same value is used for all local rows) 2398 . d_nnz - array containing the number of nonzeros in the various rows of the 2399 DIAGONAL portion of the local submatrix (possibly different for each row) 2400 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2401 The size of this array is equal to the number of local rows, i.e 'm'. 2402 You must leave room for the diagonal entry even if it is zero. 2403 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2404 submatrix (same value is used for all local rows). 2405 - o_nnz - array containing the number of nonzeros in the various rows of the 2406 OFF-DIAGONAL portion of the local submatrix (possibly different for 2407 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2408 structure. The size of this array is equal to the number 2409 of local rows, i.e 'm'. 2410 2411 If the *_nnz parameter is given then the *_nz parameter is ignored 2412 2413 The AIJ format (also called the Yale sparse matrix format or 2414 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2415 storage. The stored row and column indices begin with zero. See the users manual for details. 2416 2417 The parallel matrix is partitioned such that the first m0 rows belong to 2418 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2419 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2420 2421 The DIAGONAL portion of the local submatrix of a processor can be defined 2422 as the submatrix which is obtained by extraction the part corresponding 2423 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2424 first row that belongs to the processor, and r2 is the last row belonging 2425 to the this processor. This is a square mxm matrix. The remaining portion 2426 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2427 2428 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2429 2430 Example usage: 2431 2432 Consider the following 8x8 matrix with 34 non-zero values, that is 2433 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2434 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2435 as follows: 2436 2437 .vb 2438 1 2 0 | 0 3 0 | 0 4 2439 Proc0 0 5 6 | 7 0 0 | 8 0 2440 9 0 10 | 11 0 0 | 12 0 2441 ------------------------------------- 2442 13 0 14 | 15 16 17 | 0 0 2443 Proc1 0 18 0 | 19 20 21 | 0 0 2444 0 0 0 | 22 23 0 | 24 0 2445 ------------------------------------- 2446 Proc2 25 26 27 | 0 0 28 | 29 0 2447 30 0 0 | 31 32 33 | 0 34 2448 .ve 2449 2450 This can be represented as a collection of submatrices as: 2451 2452 .vb 2453 A B C 2454 D E F 2455 G H I 2456 .ve 2457 2458 Where the submatrices A,B,C are owned by proc0, D,E,F are 2459 owned by proc1, G,H,I are owned by proc2. 2460 2461 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2462 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2463 The 'M','N' parameters are 8,8, and have the same values on all procs. 2464 2465 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2466 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2467 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2468 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2469 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2470 matrix, ans [DF] as another SeqAIJ matrix. 2471 2472 When d_nz, o_nz parameters are specified, d_nz storage elements are 2473 allocated for every row of the local diagonal submatrix, and o_nz 2474 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2475 One way to choose d_nz and o_nz is to use the max nonzerors per local 2476 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2477 In this case, the values of d_nz,o_nz are: 2478 .vb 2479 proc0 : dnz = 2, o_nz = 2 2480 proc1 : dnz = 3, o_nz = 2 2481 proc2 : dnz = 1, o_nz = 4 2482 .ve 2483 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2484 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2485 for proc3. i.e we are using 12+15+10=37 storage locations to store 2486 34 values. 2487 2488 When d_nnz, o_nnz parameters are specified, the storage is specified 2489 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2490 In the above case the values for d_nnz,o_nnz are: 2491 .vb 2492 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2493 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2494 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2495 .ve 2496 Here the space allocated is sum of all the above values i.e 34, and 2497 hence pre-allocation is perfect. 2498 2499 Level: intermediate 2500 2501 .keywords: matrix, aij, compressed row, sparse, parallel 2502 2503 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2504 MPIAIJ 2505 @*/ 2506 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2507 { 2508 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2509 2510 PetscFunctionBegin; 2511 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2512 if (f) { 2513 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2514 } 2515 PetscFunctionReturn(0); 2516 } 2517 2518 #undef __FUNCT__ 2519 #define __FUNCT__ "MatCreateMPIAIJ" 2520 /*@C 2521 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2522 (the default parallel PETSc format). For good matrix assembly performance 2523 the user should preallocate the matrix storage by setting the parameters 2524 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2525 performance can be increased by more than a factor of 50. 2526 2527 Collective on MPI_Comm 2528 2529 Input Parameters: 2530 + comm - MPI communicator 2531 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2532 This value should be the same as the local size used in creating the 2533 y vector for the matrix-vector product y = Ax. 2534 . n - This value should be the same as the local size used in creating the 2535 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2536 calculated if N is given) For square matrices n is almost always m. 2537 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2538 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2539 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2540 (same value is used for all local rows) 2541 . d_nnz - array containing the number of nonzeros in the various rows of the 2542 DIAGONAL portion of the local submatrix (possibly different for each row) 2543 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2544 The size of this array is equal to the number of local rows, i.e 'm'. 2545 You must leave room for the diagonal entry even if it is zero. 2546 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2547 submatrix (same value is used for all local rows). 2548 - o_nnz - array containing the number of nonzeros in the various rows of the 2549 OFF-DIAGONAL portion of the local submatrix (possibly different for 2550 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2551 structure. The size of this array is equal to the number 2552 of local rows, i.e 'm'. 2553 2554 Output Parameter: 2555 . A - the matrix 2556 2557 Notes: 2558 If the *_nnz parameter is given then the *_nz parameter is ignored 2559 2560 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2561 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2562 storage requirements for this matrix. 2563 2564 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2565 processor than it must be used on all processors that share the object for 2566 that argument. 2567 2568 The user MUST specify either the local or global matrix dimensions 2569 (possibly both). 2570 2571 The parallel matrix is partitioned such that the first m0 rows belong to 2572 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2573 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2574 2575 The DIAGONAL portion of the local submatrix of a processor can be defined 2576 as the submatrix which is obtained by extraction the part corresponding 2577 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2578 first row that belongs to the processor, and r2 is the last row belonging 2579 to the this processor. This is a square mxm matrix. The remaining portion 2580 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2581 2582 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2583 2584 When calling this routine with a single process communicator, a matrix of 2585 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2586 type of communicator, use the construction mechanism: 2587 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2588 2589 By default, this format uses inodes (identical nodes) when possible. 2590 We search for consecutive rows with the same nonzero structure, thereby 2591 reusing matrix information to achieve increased efficiency. 2592 2593 Options Database Keys: 2594 + -mat_no_inode - Do not use inodes 2595 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2596 - -mat_aij_oneindex - Internally use indexing starting at 1 2597 rather than 0. Note that when calling MatSetValues(), 2598 the user still MUST index entries starting at 0! 2599 2600 2601 Example usage: 2602 2603 Consider the following 8x8 matrix with 34 non-zero values, that is 2604 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2605 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2606 as follows: 2607 2608 .vb 2609 1 2 0 | 0 3 0 | 0 4 2610 Proc0 0 5 6 | 7 0 0 | 8 0 2611 9 0 10 | 11 0 0 | 12 0 2612 ------------------------------------- 2613 13 0 14 | 15 16 17 | 0 0 2614 Proc1 0 18 0 | 19 20 21 | 0 0 2615 0 0 0 | 22 23 0 | 24 0 2616 ------------------------------------- 2617 Proc2 25 26 27 | 0 0 28 | 29 0 2618 30 0 0 | 31 32 33 | 0 34 2619 .ve 2620 2621 This can be represented as a collection of submatrices as: 2622 2623 .vb 2624 A B C 2625 D E F 2626 G H I 2627 .ve 2628 2629 Where the submatrices A,B,C are owned by proc0, D,E,F are 2630 owned by proc1, G,H,I are owned by proc2. 2631 2632 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2633 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2634 The 'M','N' parameters are 8,8, and have the same values on all procs. 2635 2636 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2637 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2638 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2639 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2640 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2641 matrix, ans [DF] as another SeqAIJ matrix. 2642 2643 When d_nz, o_nz parameters are specified, d_nz storage elements are 2644 allocated for every row of the local diagonal submatrix, and o_nz 2645 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2646 One way to choose d_nz and o_nz is to use the max nonzerors per local 2647 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2648 In this case, the values of d_nz,o_nz are: 2649 .vb 2650 proc0 : dnz = 2, o_nz = 2 2651 proc1 : dnz = 3, o_nz = 2 2652 proc2 : dnz = 1, o_nz = 4 2653 .ve 2654 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2655 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2656 for proc3. i.e we are using 12+15+10=37 storage locations to store 2657 34 values. 2658 2659 When d_nnz, o_nnz parameters are specified, the storage is specified 2660 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2661 In the above case the values for d_nnz,o_nnz are: 2662 .vb 2663 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2664 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2665 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2666 .ve 2667 Here the space allocated is sum of all the above values i.e 34, and 2668 hence pre-allocation is perfect. 2669 2670 Level: intermediate 2671 2672 .keywords: matrix, aij, compressed row, sparse, parallel 2673 2674 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2675 MPIAIJ 2676 @*/ 2677 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2678 { 2679 PetscErrorCode ierr; 2680 PetscMPIInt size; 2681 2682 PetscFunctionBegin; 2683 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2684 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2685 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2686 if (size > 1) { 2687 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2688 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2689 } else { 2690 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2691 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2692 } 2693 PetscFunctionReturn(0); 2694 } 2695 2696 #undef __FUNCT__ 2697 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2698 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2699 { 2700 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2701 2702 PetscFunctionBegin; 2703 *Ad = a->A; 2704 *Ao = a->B; 2705 *colmap = a->garray; 2706 PetscFunctionReturn(0); 2707 } 2708 2709 #undef __FUNCT__ 2710 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2711 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2712 { 2713 PetscErrorCode ierr; 2714 PetscInt i; 2715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2716 2717 PetscFunctionBegin; 2718 if (coloring->ctype == IS_COLORING_LOCAL) { 2719 ISColoringValue *allcolors,*colors; 2720 ISColoring ocoloring; 2721 2722 /* set coloring for diagonal portion */ 2723 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2724 2725 /* set coloring for off-diagonal portion */ 2726 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2727 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2728 for (i=0; i<a->B->cmap.n; i++) { 2729 colors[i] = allcolors[a->garray[i]]; 2730 } 2731 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2732 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2733 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2734 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2735 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2736 ISColoringValue *colors; 2737 PetscInt *larray; 2738 ISColoring ocoloring; 2739 2740 /* set coloring for diagonal portion */ 2741 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2742 for (i=0; i<a->A->cmap.n; i++) { 2743 larray[i] = i + A->cmap.rstart; 2744 } 2745 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2746 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2747 for (i=0; i<a->A->cmap.n; i++) { 2748 colors[i] = coloring->colors[larray[i]]; 2749 } 2750 ierr = PetscFree(larray);CHKERRQ(ierr); 2751 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2752 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2753 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2754 2755 /* set coloring for off-diagonal portion */ 2756 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2757 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2758 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2759 for (i=0; i<a->B->cmap.n; i++) { 2760 colors[i] = coloring->colors[larray[i]]; 2761 } 2762 ierr = PetscFree(larray);CHKERRQ(ierr); 2763 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2764 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2765 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2766 } else { 2767 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2768 } 2769 2770 PetscFunctionReturn(0); 2771 } 2772 2773 #if defined(PETSC_HAVE_ADIC) 2774 #undef __FUNCT__ 2775 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2776 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2777 { 2778 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2779 PetscErrorCode ierr; 2780 2781 PetscFunctionBegin; 2782 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2783 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2784 PetscFunctionReturn(0); 2785 } 2786 #endif 2787 2788 #undef __FUNCT__ 2789 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2790 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2791 { 2792 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2793 PetscErrorCode ierr; 2794 2795 PetscFunctionBegin; 2796 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2797 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2798 PetscFunctionReturn(0); 2799 } 2800 2801 #undef __FUNCT__ 2802 #define __FUNCT__ "MatMerge" 2803 /*@C 2804 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2805 matrices from each processor 2806 2807 Collective on MPI_Comm 2808 2809 Input Parameters: 2810 + comm - the communicators the parallel matrix will live on 2811 . inmat - the input sequential matrices 2812 . n - number of local columns (or PETSC_DECIDE) 2813 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2814 2815 Output Parameter: 2816 . outmat - the parallel matrix generated 2817 2818 Level: advanced 2819 2820 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2821 2822 @*/ 2823 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2824 { 2825 PetscErrorCode ierr; 2826 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2827 PetscInt *indx; 2828 PetscScalar *values; 2829 2830 PetscFunctionBegin; 2831 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2832 if (scall == MAT_INITIAL_MATRIX){ 2833 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2834 if (n == PETSC_DECIDE){ 2835 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 2836 } 2837 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2838 rstart -= m; 2839 2840 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2841 for (i=0;i<m;i++) { 2842 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2843 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2844 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2845 } 2846 /* This routine will ONLY return MPIAIJ type matrix */ 2847 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2848 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2849 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2850 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2851 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2852 2853 } else if (scall == MAT_REUSE_MATRIX){ 2854 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2855 } else { 2856 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2857 } 2858 2859 for (i=0;i<m;i++) { 2860 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2861 I = i + rstart; 2862 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2863 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2864 } 2865 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2866 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2867 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2868 2869 PetscFunctionReturn(0); 2870 } 2871 2872 #undef __FUNCT__ 2873 #define __FUNCT__ "MatFileSplit" 2874 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2875 { 2876 PetscErrorCode ierr; 2877 PetscMPIInt rank; 2878 PetscInt m,N,i,rstart,nnz; 2879 size_t len; 2880 const PetscInt *indx; 2881 PetscViewer out; 2882 char *name; 2883 Mat B; 2884 const PetscScalar *values; 2885 2886 PetscFunctionBegin; 2887 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2888 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2889 /* Should this be the type of the diagonal block of A? */ 2890 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2891 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2892 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2893 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2894 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2895 for (i=0;i<m;i++) { 2896 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2897 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2898 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2899 } 2900 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2901 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2902 2903 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2904 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2905 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2906 sprintf(name,"%s.%d",outfile,rank); 2907 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 2908 ierr = PetscFree(name); 2909 ierr = MatView(B,out);CHKERRQ(ierr); 2910 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2911 ierr = MatDestroy(B);CHKERRQ(ierr); 2912 PetscFunctionReturn(0); 2913 } 2914 2915 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2916 #undef __FUNCT__ 2917 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2918 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2919 { 2920 PetscErrorCode ierr; 2921 Mat_Merge_SeqsToMPI *merge; 2922 PetscObjectContainer container; 2923 2924 PetscFunctionBegin; 2925 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2926 if (container) { 2927 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2928 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2929 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2930 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2931 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2932 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2933 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2934 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2935 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 2936 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 2937 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 2938 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 2939 2940 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2941 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2942 } 2943 ierr = PetscFree(merge);CHKERRQ(ierr); 2944 2945 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2946 PetscFunctionReturn(0); 2947 } 2948 2949 #include "src/mat/utils/freespace.h" 2950 #include "petscbt.h" 2951 static PetscEvent logkey_seqstompinum = 0; 2952 #undef __FUNCT__ 2953 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2954 /*@C 2955 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2956 matrices from each processor 2957 2958 Collective on MPI_Comm 2959 2960 Input Parameters: 2961 + comm - the communicators the parallel matrix will live on 2962 . seqmat - the input sequential matrices 2963 . m - number of local rows (or PETSC_DECIDE) 2964 . n - number of local columns (or PETSC_DECIDE) 2965 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2966 2967 Output Parameter: 2968 . mpimat - the parallel matrix generated 2969 2970 Level: advanced 2971 2972 Notes: 2973 The dimensions of the sequential matrix in each processor MUST be the same. 2974 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2975 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2976 @*/ 2977 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2978 { 2979 PetscErrorCode ierr; 2980 MPI_Comm comm=mpimat->comm; 2981 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2982 PetscMPIInt size,rank,taga,*len_s; 2983 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 2984 PetscInt proc,m; 2985 PetscInt **buf_ri,**buf_rj; 2986 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2987 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2988 MPI_Request *s_waits,*r_waits; 2989 MPI_Status *status; 2990 MatScalar *aa=a->a,**abuf_r,*ba_i; 2991 Mat_Merge_SeqsToMPI *merge; 2992 PetscObjectContainer container; 2993 2994 PetscFunctionBegin; 2995 if (!logkey_seqstompinum) { 2996 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2997 } 2998 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2999 3000 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3001 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3002 3003 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3004 if (container) { 3005 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3006 } 3007 bi = merge->bi; 3008 bj = merge->bj; 3009 buf_ri = merge->buf_ri; 3010 buf_rj = merge->buf_rj; 3011 3012 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3013 owners = merge->rowmap.range; 3014 len_s = merge->len_s; 3015 3016 /* send and recv matrix values */ 3017 /*-----------------------------*/ 3018 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3019 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3020 3021 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3022 for (proc=0,k=0; proc<size; proc++){ 3023 if (!len_s[proc]) continue; 3024 i = owners[proc]; 3025 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3026 k++; 3027 } 3028 3029 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3030 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3031 ierr = PetscFree(status);CHKERRQ(ierr); 3032 3033 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3034 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3035 3036 /* insert mat values of mpimat */ 3037 /*----------------------------*/ 3038 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3039 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3040 nextrow = buf_ri_k + merge->nrecv; 3041 nextai = nextrow + merge->nrecv; 3042 3043 for (k=0; k<merge->nrecv; k++){ 3044 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3045 nrows = *(buf_ri_k[k]); 3046 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3047 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3048 } 3049 3050 /* set values of ba */ 3051 m = merge->rowmap.n; 3052 for (i=0; i<m; i++) { 3053 arow = owners[rank] + i; 3054 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3055 bnzi = bi[i+1] - bi[i]; 3056 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3057 3058 /* add local non-zero vals of this proc's seqmat into ba */ 3059 anzi = ai[arow+1] - ai[arow]; 3060 aj = a->j + ai[arow]; 3061 aa = a->a + ai[arow]; 3062 nextaj = 0; 3063 for (j=0; nextaj<anzi; j++){ 3064 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3065 ba_i[j] += aa[nextaj++]; 3066 } 3067 } 3068 3069 /* add received vals into ba */ 3070 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3071 /* i-th row */ 3072 if (i == *nextrow[k]) { 3073 anzi = *(nextai[k]+1) - *nextai[k]; 3074 aj = buf_rj[k] + *(nextai[k]); 3075 aa = abuf_r[k] + *(nextai[k]); 3076 nextaj = 0; 3077 for (j=0; nextaj<anzi; j++){ 3078 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3079 ba_i[j] += aa[nextaj++]; 3080 } 3081 } 3082 nextrow[k]++; nextai[k]++; 3083 } 3084 } 3085 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3086 } 3087 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3088 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3089 3090 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3091 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3092 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3093 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3094 PetscFunctionReturn(0); 3095 } 3096 3097 static PetscEvent logkey_seqstompisym = 0; 3098 #undef __FUNCT__ 3099 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3100 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3101 { 3102 PetscErrorCode ierr; 3103 Mat B_mpi; 3104 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3105 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3106 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3107 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3108 PetscInt len,proc,*dnz,*onz; 3109 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3110 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3111 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3112 MPI_Status *status; 3113 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3114 PetscBT lnkbt; 3115 Mat_Merge_SeqsToMPI *merge; 3116 PetscObjectContainer container; 3117 3118 PetscFunctionBegin; 3119 if (!logkey_seqstompisym) { 3120 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3121 } 3122 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3123 3124 /* make sure it is a PETSc comm */ 3125 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3126 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3127 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3128 3129 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3130 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3131 3132 /* determine row ownership */ 3133 /*---------------------------------------------------------*/ 3134 merge->rowmap.n = m; 3135 merge->rowmap.N = M; 3136 merge->rowmap.bs = 1; 3137 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3138 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3139 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3140 3141 m = merge->rowmap.n; 3142 M = merge->rowmap.N; 3143 owners = merge->rowmap.range; 3144 3145 /* determine the number of messages to send, their lengths */ 3146 /*---------------------------------------------------------*/ 3147 len_s = merge->len_s; 3148 3149 len = 0; /* length of buf_si[] */ 3150 merge->nsend = 0; 3151 for (proc=0; proc<size; proc++){ 3152 len_si[proc] = 0; 3153 if (proc == rank){ 3154 len_s[proc] = 0; 3155 } else { 3156 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3157 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3158 } 3159 if (len_s[proc]) { 3160 merge->nsend++; 3161 nrows = 0; 3162 for (i=owners[proc]; i<owners[proc+1]; i++){ 3163 if (ai[i+1] > ai[i]) nrows++; 3164 } 3165 len_si[proc] = 2*(nrows+1); 3166 len += len_si[proc]; 3167 } 3168 } 3169 3170 /* determine the number and length of messages to receive for ij-structure */ 3171 /*-------------------------------------------------------------------------*/ 3172 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3173 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3174 3175 /* post the Irecv of j-structure */ 3176 /*-------------------------------*/ 3177 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3178 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3179 3180 /* post the Isend of j-structure */ 3181 /*--------------------------------*/ 3182 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3183 sj_waits = si_waits + merge->nsend; 3184 3185 for (proc=0, k=0; proc<size; proc++){ 3186 if (!len_s[proc]) continue; 3187 i = owners[proc]; 3188 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3189 k++; 3190 } 3191 3192 /* receives and sends of j-structure are complete */ 3193 /*------------------------------------------------*/ 3194 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3195 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3196 3197 /* send and recv i-structure */ 3198 /*---------------------------*/ 3199 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3200 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3201 3202 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3203 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3204 for (proc=0,k=0; proc<size; proc++){ 3205 if (!len_s[proc]) continue; 3206 /* form outgoing message for i-structure: 3207 buf_si[0]: nrows to be sent 3208 [1:nrows]: row index (global) 3209 [nrows+1:2*nrows+1]: i-structure index 3210 */ 3211 /*-------------------------------------------*/ 3212 nrows = len_si[proc]/2 - 1; 3213 buf_si_i = buf_si + nrows+1; 3214 buf_si[0] = nrows; 3215 buf_si_i[0] = 0; 3216 nrows = 0; 3217 for (i=owners[proc]; i<owners[proc+1]; i++){ 3218 anzi = ai[i+1] - ai[i]; 3219 if (anzi) { 3220 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3221 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3222 nrows++; 3223 } 3224 } 3225 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3226 k++; 3227 buf_si += len_si[proc]; 3228 } 3229 3230 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3231 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3232 3233 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3234 for (i=0; i<merge->nrecv; i++){ 3235 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3236 } 3237 3238 ierr = PetscFree(len_si);CHKERRQ(ierr); 3239 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3240 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3241 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3242 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3243 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3244 ierr = PetscFree(status);CHKERRQ(ierr); 3245 3246 /* compute a local seq matrix in each processor */ 3247 /*----------------------------------------------*/ 3248 /* allocate bi array and free space for accumulating nonzero column info */ 3249 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3250 bi[0] = 0; 3251 3252 /* create and initialize a linked list */ 3253 nlnk = N+1; 3254 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3255 3256 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3257 len = 0; 3258 len = ai[owners[rank+1]] - ai[owners[rank]]; 3259 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3260 current_space = free_space; 3261 3262 /* determine symbolic info for each local row */ 3263 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3264 nextrow = buf_ri_k + merge->nrecv; 3265 nextai = nextrow + merge->nrecv; 3266 for (k=0; k<merge->nrecv; k++){ 3267 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3268 nrows = *buf_ri_k[k]; 3269 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3270 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3271 } 3272 3273 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3274 len = 0; 3275 for (i=0;i<m;i++) { 3276 bnzi = 0; 3277 /* add local non-zero cols of this proc's seqmat into lnk */ 3278 arow = owners[rank] + i; 3279 anzi = ai[arow+1] - ai[arow]; 3280 aj = a->j + ai[arow]; 3281 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3282 bnzi += nlnk; 3283 /* add received col data into lnk */ 3284 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3285 if (i == *nextrow[k]) { /* i-th row */ 3286 anzi = *(nextai[k]+1) - *nextai[k]; 3287 aj = buf_rj[k] + *nextai[k]; 3288 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3289 bnzi += nlnk; 3290 nextrow[k]++; nextai[k]++; 3291 } 3292 } 3293 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3294 3295 /* if free space is not available, make more free space */ 3296 if (current_space->local_remaining<bnzi) { 3297 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3298 nspacedouble++; 3299 } 3300 /* copy data into free space, then initialize lnk */ 3301 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3302 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3303 3304 current_space->array += bnzi; 3305 current_space->local_used += bnzi; 3306 current_space->local_remaining -= bnzi; 3307 3308 bi[i+1] = bi[i] + bnzi; 3309 } 3310 3311 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3312 3313 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3314 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3315 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3316 3317 /* create symbolic parallel matrix B_mpi */ 3318 /*---------------------------------------*/ 3319 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3320 if (n==PETSC_DECIDE) { 3321 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3322 } else { 3323 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3324 } 3325 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3326 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3327 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3328 3329 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3330 B_mpi->assembled = PETSC_FALSE; 3331 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3332 merge->bi = bi; 3333 merge->bj = bj; 3334 merge->buf_ri = buf_ri; 3335 merge->buf_rj = buf_rj; 3336 merge->coi = PETSC_NULL; 3337 merge->coj = PETSC_NULL; 3338 merge->owners_co = PETSC_NULL; 3339 3340 /* attach the supporting struct to B_mpi for reuse */ 3341 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3342 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3343 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3344 *mpimat = B_mpi; 3345 3346 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3347 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3348 PetscFunctionReturn(0); 3349 } 3350 3351 static PetscEvent logkey_seqstompi = 0; 3352 #undef __FUNCT__ 3353 #define __FUNCT__ "MatMerge_SeqsToMPI" 3354 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3355 { 3356 PetscErrorCode ierr; 3357 3358 PetscFunctionBegin; 3359 if (!logkey_seqstompi) { 3360 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3361 } 3362 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3363 if (scall == MAT_INITIAL_MATRIX){ 3364 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3365 } 3366 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3367 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3368 PetscFunctionReturn(0); 3369 } 3370 static PetscEvent logkey_getlocalmat = 0; 3371 #undef __FUNCT__ 3372 #define __FUNCT__ "MatGetLocalMat" 3373 /*@C 3374 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3375 3376 Not Collective 3377 3378 Input Parameters: 3379 + A - the matrix 3380 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3381 3382 Output Parameter: 3383 . A_loc - the local sequential matrix generated 3384 3385 Level: developer 3386 3387 @*/ 3388 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3389 { 3390 PetscErrorCode ierr; 3391 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3392 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3393 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3394 PetscScalar *aa=a->a,*ba=b->a,*ca; 3395 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3396 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3397 3398 PetscFunctionBegin; 3399 if (!logkey_getlocalmat) { 3400 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3401 } 3402 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3403 if (scall == MAT_INITIAL_MATRIX){ 3404 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3405 ci[0] = 0; 3406 for (i=0; i<am; i++){ 3407 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3408 } 3409 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3410 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3411 k = 0; 3412 for (i=0; i<am; i++) { 3413 ncols_o = bi[i+1] - bi[i]; 3414 ncols_d = ai[i+1] - ai[i]; 3415 /* off-diagonal portion of A */ 3416 for (jo=0; jo<ncols_o; jo++) { 3417 col = cmap[*bj]; 3418 if (col >= cstart) break; 3419 cj[k] = col; bj++; 3420 ca[k++] = *ba++; 3421 } 3422 /* diagonal portion of A */ 3423 for (j=0; j<ncols_d; j++) { 3424 cj[k] = cstart + *aj++; 3425 ca[k++] = *aa++; 3426 } 3427 /* off-diagonal portion of A */ 3428 for (j=jo; j<ncols_o; j++) { 3429 cj[k] = cmap[*bj++]; 3430 ca[k++] = *ba++; 3431 } 3432 } 3433 /* put together the new matrix */ 3434 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3435 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3436 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3437 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3438 mat->freedata = PETSC_TRUE; 3439 mat->nonew = 0; 3440 } else if (scall == MAT_REUSE_MATRIX){ 3441 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3442 ci = mat->i; cj = mat->j; ca = mat->a; 3443 for (i=0; i<am; i++) { 3444 /* off-diagonal portion of A */ 3445 ncols_o = bi[i+1] - bi[i]; 3446 for (jo=0; jo<ncols_o; jo++) { 3447 col = cmap[*bj]; 3448 if (col >= cstart) break; 3449 *ca++ = *ba++; bj++; 3450 } 3451 /* diagonal portion of A */ 3452 ncols_d = ai[i+1] - ai[i]; 3453 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3454 /* off-diagonal portion of A */ 3455 for (j=jo; j<ncols_o; j++) { 3456 *ca++ = *ba++; bj++; 3457 } 3458 } 3459 } else { 3460 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3461 } 3462 3463 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3464 PetscFunctionReturn(0); 3465 } 3466 3467 static PetscEvent logkey_getlocalmatcondensed = 0; 3468 #undef __FUNCT__ 3469 #define __FUNCT__ "MatGetLocalMatCondensed" 3470 /*@C 3471 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3472 3473 Not Collective 3474 3475 Input Parameters: 3476 + A - the matrix 3477 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3478 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3479 3480 Output Parameter: 3481 . A_loc - the local sequential matrix generated 3482 3483 Level: developer 3484 3485 @*/ 3486 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3487 { 3488 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3489 PetscErrorCode ierr; 3490 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3491 IS isrowa,iscola; 3492 Mat *aloc; 3493 3494 PetscFunctionBegin; 3495 if (!logkey_getlocalmatcondensed) { 3496 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3497 } 3498 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3499 if (!row){ 3500 start = A->rmap.rstart; end = A->rmap.rend; 3501 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3502 } else { 3503 isrowa = *row; 3504 } 3505 if (!col){ 3506 start = A->cmap.rstart; 3507 cmap = a->garray; 3508 nzA = a->A->cmap.n; 3509 nzB = a->B->cmap.n; 3510 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3511 ncols = 0; 3512 for (i=0; i<nzB; i++) { 3513 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3514 else break; 3515 } 3516 imark = i; 3517 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3518 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3519 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3520 ierr = PetscFree(idx);CHKERRQ(ierr); 3521 } else { 3522 iscola = *col; 3523 } 3524 if (scall != MAT_INITIAL_MATRIX){ 3525 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3526 aloc[0] = *A_loc; 3527 } 3528 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3529 *A_loc = aloc[0]; 3530 ierr = PetscFree(aloc);CHKERRQ(ierr); 3531 if (!row){ 3532 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3533 } 3534 if (!col){ 3535 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3536 } 3537 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3538 PetscFunctionReturn(0); 3539 } 3540 3541 static PetscEvent logkey_GetBrowsOfAcols = 0; 3542 #undef __FUNCT__ 3543 #define __FUNCT__ "MatGetBrowsOfAcols" 3544 /*@C 3545 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3546 3547 Collective on Mat 3548 3549 Input Parameters: 3550 + A,B - the matrices in mpiaij format 3551 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3552 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3553 3554 Output Parameter: 3555 + rowb, colb - index sets of rows and columns of B to extract 3556 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 3557 - B_seq - the sequential matrix generated 3558 3559 Level: developer 3560 3561 @*/ 3562 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3563 { 3564 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3565 PetscErrorCode ierr; 3566 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3567 IS isrowb,iscolb; 3568 Mat *bseq; 3569 3570 PetscFunctionBegin; 3571 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3572 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3573 } 3574 if (!logkey_GetBrowsOfAcols) { 3575 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3576 } 3577 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3578 3579 if (scall == MAT_INITIAL_MATRIX){ 3580 start = A->cmap.rstart; 3581 cmap = a->garray; 3582 nzA = a->A->cmap.n; 3583 nzB = a->B->cmap.n; 3584 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3585 ncols = 0; 3586 for (i=0; i<nzB; i++) { /* row < local row index */ 3587 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3588 else break; 3589 } 3590 imark = i; 3591 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3592 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3593 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3594 ierr = PetscFree(idx);CHKERRQ(ierr); 3595 *brstart = imark; 3596 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 3597 } else { 3598 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3599 isrowb = *rowb; iscolb = *colb; 3600 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3601 bseq[0] = *B_seq; 3602 } 3603 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3604 *B_seq = bseq[0]; 3605 ierr = PetscFree(bseq);CHKERRQ(ierr); 3606 if (!rowb){ 3607 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3608 } else { 3609 *rowb = isrowb; 3610 } 3611 if (!colb){ 3612 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3613 } else { 3614 *colb = iscolb; 3615 } 3616 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3617 PetscFunctionReturn(0); 3618 } 3619 3620 static PetscEvent logkey_GetBrowsOfAocols = 0; 3621 #undef __FUNCT__ 3622 #define __FUNCT__ "MatGetBrowsOfAoCols" 3623 /*@C 3624 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3625 of the OFF-DIAGONAL portion of local A 3626 3627 Collective on Mat 3628 3629 Input Parameters: 3630 + A,B - the matrices in mpiaij format 3631 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3632 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3633 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3634 3635 Output Parameter: 3636 + B_oth - the sequential matrix generated 3637 3638 Level: developer 3639 3640 @*/ 3641 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3642 { 3643 VecScatter_MPI_General *gen_to,*gen_from; 3644 PetscErrorCode ierr; 3645 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3646 Mat_SeqAIJ *b_oth; 3647 VecScatter ctx=a->Mvctx; 3648 MPI_Comm comm=ctx->comm; 3649 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3650 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 3651 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3652 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3653 MPI_Request *rwaits,*swaits; 3654 MPI_Status *sstatus,rstatus; 3655 PetscInt *cols; 3656 PetscScalar *vals; 3657 PetscMPIInt j; 3658 3659 PetscFunctionBegin; 3660 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3661 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3662 } 3663 if (!logkey_GetBrowsOfAocols) { 3664 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3665 } 3666 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3667 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3668 3669 gen_to = (VecScatter_MPI_General*)ctx->todata; 3670 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3671 rvalues = gen_from->values; /* holds the length of sending row */ 3672 svalues = gen_to->values; /* holds the length of receiving row */ 3673 nrecvs = gen_from->n; 3674 nsends = gen_to->n; 3675 rwaits = gen_from->requests; 3676 swaits = gen_to->requests; 3677 srow = gen_to->indices; /* local row index to be sent */ 3678 rstarts = gen_from->starts; 3679 sstarts = gen_to->starts; 3680 rprocs = gen_from->procs; 3681 sprocs = gen_to->procs; 3682 sstatus = gen_to->sstatus; 3683 3684 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3685 if (scall == MAT_INITIAL_MATRIX){ 3686 /* i-array */ 3687 /*---------*/ 3688 /* post receives */ 3689 for (i=0; i<nrecvs; i++){ 3690 rowlen = (PetscInt*)rvalues + rstarts[i]; 3691 nrows = rstarts[i+1]-rstarts[i]; 3692 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3693 } 3694 3695 /* pack the outgoing message */ 3696 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3697 rstartsj = sstartsj + nsends +1; 3698 sstartsj[0] = 0; rstartsj[0] = 0; 3699 len = 0; /* total length of j or a array to be sent */ 3700 k = 0; 3701 for (i=0; i<nsends; i++){ 3702 rowlen = (PetscInt*)svalues + sstarts[i]; 3703 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3704 for (j=0; j<nrows; j++) { 3705 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 3706 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3707 len += rowlen[j]; 3708 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3709 k++; 3710 } 3711 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3712 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3713 } 3714 /* recvs and sends of i-array are completed */ 3715 i = nrecvs; 3716 while (i--) { 3717 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3718 } 3719 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3720 /* allocate buffers for sending j and a arrays */ 3721 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3722 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3723 3724 /* create i-array of B_oth */ 3725 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3726 b_othi[0] = 0; 3727 len = 0; /* total length of j or a array to be received */ 3728 k = 0; 3729 for (i=0; i<nrecvs; i++){ 3730 rowlen = (PetscInt*)rvalues + rstarts[i]; 3731 nrows = rstarts[i+1]-rstarts[i]; 3732 for (j=0; j<nrows; j++) { 3733 b_othi[k+1] = b_othi[k] + rowlen[j]; 3734 len += rowlen[j]; k++; 3735 } 3736 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3737 } 3738 3739 /* allocate space for j and a arrrays of B_oth */ 3740 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3741 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3742 3743 /* j-array */ 3744 /*---------*/ 3745 /* post receives of j-array */ 3746 for (i=0; i<nrecvs; i++){ 3747 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3748 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3749 } 3750 k = 0; 3751 for (i=0; i<nsends; i++){ 3752 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3753 bufJ = bufj+sstartsj[i]; 3754 for (j=0; j<nrows; j++) { 3755 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3756 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3757 for (l=0; l<ncols; l++){ 3758 *bufJ++ = cols[l]; 3759 } 3760 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3761 } 3762 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3763 } 3764 3765 /* recvs and sends of j-array are completed */ 3766 i = nrecvs; 3767 while (i--) { 3768 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3769 } 3770 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3771 } else if (scall == MAT_REUSE_MATRIX){ 3772 sstartsj = *startsj; 3773 rstartsj = sstartsj + nsends +1; 3774 bufa = *bufa_ptr; 3775 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3776 b_otha = b_oth->a; 3777 } else { 3778 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3779 } 3780 3781 /* a-array */ 3782 /*---------*/ 3783 /* post receives of a-array */ 3784 for (i=0; i<nrecvs; i++){ 3785 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3786 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3787 } 3788 k = 0; 3789 for (i=0; i<nsends; i++){ 3790 nrows = sstarts[i+1]-sstarts[i]; 3791 bufA = bufa+sstartsj[i]; 3792 for (j=0; j<nrows; j++) { 3793 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3794 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3795 for (l=0; l<ncols; l++){ 3796 *bufA++ = vals[l]; 3797 } 3798 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3799 3800 } 3801 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3802 } 3803 /* recvs and sends of a-array are completed */ 3804 i = nrecvs; 3805 while (i--) { 3806 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3807 } 3808 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3809 3810 if (scall == MAT_INITIAL_MATRIX){ 3811 /* put together the new matrix */ 3812 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3813 3814 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3815 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3816 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3817 b_oth->freedata = PETSC_TRUE; 3818 b_oth->nonew = 0; 3819 3820 ierr = PetscFree(bufj);CHKERRQ(ierr); 3821 if (!startsj || !bufa_ptr){ 3822 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3823 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3824 } else { 3825 *startsj = sstartsj; 3826 *bufa_ptr = bufa; 3827 } 3828 } 3829 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3830 3831 PetscFunctionReturn(0); 3832 } 3833 3834 #undef __FUNCT__ 3835 #define __FUNCT__ "MatGetCommunicationStructs" 3836 /*@C 3837 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 3838 3839 Not Collective 3840 3841 Input Parameters: 3842 . A - The matrix in mpiaij format 3843 3844 Output Parameter: 3845 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 3846 . colmap - A map from global column index to local index into lvec 3847 - multScatter - A scatter from the argument of a matrix-vector product to lvec 3848 3849 Level: developer 3850 3851 @*/ 3852 #if defined (PETSC_USE_CTABLE) 3853 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 3854 #else 3855 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 3856 #endif 3857 { 3858 Mat_MPIAIJ *a; 3859 3860 PetscFunctionBegin; 3861 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 3862 PetscValidPointer(lvec, 2) 3863 PetscValidPointer(colmap, 3) 3864 PetscValidPointer(multScatter, 4) 3865 a = (Mat_MPIAIJ *) A->data; 3866 if (lvec) *lvec = a->lvec; 3867 if (colmap) *colmap = a->colmap; 3868 if (multScatter) *multScatter = a->Mvctx; 3869 PetscFunctionReturn(0); 3870 } 3871 3872 /*MC 3873 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3874 3875 Options Database Keys: 3876 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3877 3878 Level: beginner 3879 3880 .seealso: MatCreateMPIAIJ 3881 M*/ 3882 3883 EXTERN_C_BEGIN 3884 #undef __FUNCT__ 3885 #define __FUNCT__ "MatCreate_MPIAIJ" 3886 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3887 { 3888 Mat_MPIAIJ *b; 3889 PetscErrorCode ierr; 3890 PetscMPIInt size; 3891 3892 PetscFunctionBegin; 3893 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3894 3895 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3896 B->data = (void*)b; 3897 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3898 B->factor = 0; 3899 B->rmap.bs = 1; 3900 B->assembled = PETSC_FALSE; 3901 B->mapping = 0; 3902 3903 B->insertmode = NOT_SET_VALUES; 3904 b->size = size; 3905 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3906 3907 /* build cache for off array entries formed */ 3908 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3909 b->donotstash = PETSC_FALSE; 3910 b->colmap = 0; 3911 b->garray = 0; 3912 b->roworiented = PETSC_TRUE; 3913 3914 /* stuff used for matrix vector multiply */ 3915 b->lvec = PETSC_NULL; 3916 b->Mvctx = PETSC_NULL; 3917 3918 /* stuff for MatGetRow() */ 3919 b->rowindices = 0; 3920 b->rowvalues = 0; 3921 b->getrowactive = PETSC_FALSE; 3922 3923 3924 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3925 "MatStoreValues_MPIAIJ", 3926 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3927 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3928 "MatRetrieveValues_MPIAIJ", 3929 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3930 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3931 "MatGetDiagonalBlock_MPIAIJ", 3932 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3933 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3934 "MatIsTranspose_MPIAIJ", 3935 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3936 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3937 "MatMPIAIJSetPreallocation_MPIAIJ", 3938 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3939 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3940 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3941 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3942 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3943 "MatDiagonalScaleLocal_MPIAIJ", 3944 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3945 PetscFunctionReturn(0); 3946 } 3947 EXTERN_C_END 3948 3949 /* 3950 Special version for direct calls from Fortran 3951 */ 3952 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3953 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 3954 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3955 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 3956 #endif 3957 3958 /* Change these macros so can be used in void function */ 3959 #undef CHKERRQ 3960 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 3961 #undef SETERRQ2 3962 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 3963 #undef SETERRQ 3964 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 3965 3966 EXTERN_C_BEGIN 3967 #undef __FUNCT__ 3968 #define __FUNCT__ "matsetvaluesmpiaij_" 3969 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv) 3970 { 3971 Mat mat = *mmat; 3972 PetscInt m = *mm, n = *mn; 3973 InsertMode addv = *maddv; 3974 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3975 PetscScalar value; 3976 PetscErrorCode ierr; 3977 3978 MatPreallocated(mat); 3979 if (mat->insertmode == NOT_SET_VALUES) { 3980 mat->insertmode = addv; 3981 } 3982 #if defined(PETSC_USE_DEBUG) 3983 else if (mat->insertmode != addv) { 3984 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3985 } 3986 #endif 3987 { 3988 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 3989 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 3990 PetscTruth roworiented = aij->roworiented; 3991 3992 /* Some Variables required in the macro */ 3993 Mat A = aij->A; 3994 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3995 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 3996 PetscScalar *aa = a->a; 3997 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 3998 Mat B = aij->B; 3999 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4000 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4001 PetscScalar *ba = b->a; 4002 4003 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4004 PetscInt nonew = a->nonew; 4005 PetscScalar *ap1,*ap2; 4006 4007 PetscFunctionBegin; 4008 for (i=0; i<m; i++) { 4009 if (im[i] < 0) continue; 4010 #if defined(PETSC_USE_DEBUG) 4011 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4012 #endif 4013 if (im[i] >= rstart && im[i] < rend) { 4014 row = im[i] - rstart; 4015 lastcol1 = -1; 4016 rp1 = aj + ai[row]; 4017 ap1 = aa + ai[row]; 4018 rmax1 = aimax[row]; 4019 nrow1 = ailen[row]; 4020 low1 = 0; 4021 high1 = nrow1; 4022 lastcol2 = -1; 4023 rp2 = bj + bi[row]; 4024 ap2 = ba + bi[row]; 4025 rmax2 = bimax[row]; 4026 nrow2 = bilen[row]; 4027 low2 = 0; 4028 high2 = nrow2; 4029 4030 for (j=0; j<n; j++) { 4031 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4032 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4033 if (in[j] >= cstart && in[j] < cend){ 4034 col = in[j] - cstart; 4035 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4036 } else if (in[j] < 0) continue; 4037 #if defined(PETSC_USE_DEBUG) 4038 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4039 #endif 4040 else { 4041 if (mat->was_assembled) { 4042 if (!aij->colmap) { 4043 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4044 } 4045 #if defined (PETSC_USE_CTABLE) 4046 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4047 col--; 4048 #else 4049 col = aij->colmap[in[j]] - 1; 4050 #endif 4051 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4052 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4053 col = in[j]; 4054 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4055 B = aij->B; 4056 b = (Mat_SeqAIJ*)B->data; 4057 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4058 rp2 = bj + bi[row]; 4059 ap2 = ba + bi[row]; 4060 rmax2 = bimax[row]; 4061 nrow2 = bilen[row]; 4062 low2 = 0; 4063 high2 = nrow2; 4064 bm = aij->B->rmap.n; 4065 ba = b->a; 4066 } 4067 } else col = in[j]; 4068 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4069 } 4070 } 4071 } else { 4072 if (!aij->donotstash) { 4073 if (roworiented) { 4074 if (ignorezeroentries && v[i*n] == 0.0) continue; 4075 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4076 } else { 4077 if (ignorezeroentries && v[i] == 0.0) continue; 4078 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4079 } 4080 } 4081 } 4082 }} 4083 PetscFunctionReturnVoid(); 4084 } 4085 EXTERN_C_END 4086