1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "../src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 415 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 642 source = lens + nrecvs; 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree(lens);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 Contributed by: Matthew Knepley 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultAdd_MPIAIJ" 733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 741 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 PetscTruth merged; 753 754 PetscFunctionBegin; 755 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 756 /* do nondiagonal part */ 757 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 758 if (!merged) { 759 /* send it on its way */ 760 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 761 /* do local part */ 762 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 763 /* receive remote parts: note this assumes the values are not actually */ 764 /* added in yy until the next line, */ 765 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 } else { 767 /* do local part */ 768 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 769 /* send it on its way */ 770 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 /* values actually were received in the Begin() but we need to call this nop */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 EXTERN_C_BEGIN 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 781 { 782 MPI_Comm comm; 783 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 784 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 785 IS Me,Notme; 786 PetscErrorCode ierr; 787 PetscInt M,N,first,last,*notme,i; 788 PetscMPIInt size; 789 790 PetscFunctionBegin; 791 792 /* Easy test: symmetric diagonal block */ 793 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 794 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 795 if (!*f) PetscFunctionReturn(0); 796 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 798 if (size == 1) PetscFunctionReturn(0); 799 800 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 801 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 802 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 803 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 804 for (i=0; i<first; i++) notme[i] = i; 805 for (i=last; i<M; i++) notme[i-last+first] = i; 806 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 807 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 808 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 809 Aoff = Aoffs[0]; 810 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 811 Boff = Boffs[0]; 812 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 813 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 815 ierr = ISDestroy(Me);CHKERRQ(ierr); 816 ierr = ISDestroy(Notme);CHKERRQ(ierr); 817 818 PetscFunctionReturn(0); 819 } 820 EXTERN_C_END 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 /* do nondiagonal part */ 831 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 832 /* send it on its way */ 833 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 834 /* do local part */ 835 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 836 /* receive remote parts */ 837 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 This only works correctly for square matrices where the subblock A->A is the 843 diagonal block 844 */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 848 { 849 PetscErrorCode ierr; 850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 851 852 PetscFunctionBegin; 853 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 854 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 855 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 856 } 857 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatScale_MPIAIJ" 863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 864 { 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 870 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDestroy_MPIAIJ" 876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 877 { 878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 #if defined(PETSC_USE_LOG) 883 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 884 #endif 885 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 886 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 887 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 888 #if defined (PETSC_USE_CTABLE) 889 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 890 #else 891 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 892 #endif 893 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 894 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 895 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 896 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 897 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 898 ierr = PetscFree(aij);CHKERRQ(ierr); 899 900 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPIAIJ_Binary" 913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 914 { 915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 916 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 917 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 920 int fd; 921 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 922 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 923 PetscScalar *column_values; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 927 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 928 nz = A->nz + B->nz; 929 if (!rank) { 930 header[0] = MAT_FILE_COOKIE; 931 header[1] = mat->rmap->N; 932 header[2] = mat->cmap->N; 933 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 936 /* get largest number of rows any processor has */ 937 rlen = mat->rmap->n; 938 range = mat->rmap->range; 939 for (i=1; i<size; i++) { 940 rlen = PetscMax(rlen,range[i+1] - range[i]); 941 } 942 } else { 943 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 944 rlen = mat->rmap->n; 945 } 946 947 /* load up the local row counts */ 948 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 949 for (i=0; i<mat->rmap->n; i++) { 950 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 951 } 952 953 /* store the row lengths to the file */ 954 if (!rank) { 955 MPI_Status status; 956 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 957 for (i=1; i<size; i++) { 958 rlen = range[i+1] - range[i]; 959 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 960 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 961 } 962 } else { 963 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 964 } 965 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 966 967 /* load up the local column indices */ 968 nzmax = nz; /* )th processor needs space a largest processor needs */ 969 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 971 cnt = 0; 972 for (i=0; i<mat->rmap->n; i++) { 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ( (col = garray[B->j[j]]) > cstart) break; 975 column_indices[cnt++] = col; 976 } 977 for (k=A->i[i]; k<A->i[i+1]; k++) { 978 column_indices[cnt++] = A->j[k] + cstart; 979 } 980 for (; j<B->i[i+1]; j++) { 981 column_indices[cnt++] = garray[B->j[j]]; 982 } 983 } 984 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 985 986 /* store the column indices to the file */ 987 if (!rank) { 988 MPI_Status status; 989 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 990 for (i=1; i<size; i++) { 991 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 993 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 995 } 996 } else { 997 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 998 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1001 1002 /* load up the local column values */ 1003 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1004 cnt = 0; 1005 for (i=0; i<mat->rmap->n; i++) { 1006 for (j=B->i[i]; j<B->i[i+1]; j++) { 1007 if ( garray[B->j[j]] > cstart) break; 1008 column_values[cnt++] = B->a[j]; 1009 } 1010 for (k=A->i[i]; k<A->i[i+1]; k++) { 1011 column_values[cnt++] = A->a[k]; 1012 } 1013 for (; j<B->i[i+1]; j++) { 1014 column_values[cnt++] = B->a[j]; 1015 } 1016 } 1017 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1018 1019 /* store the column values to the file */ 1020 if (!rank) { 1021 MPI_Status status; 1022 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 } else { 1030 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1031 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(column_values);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1040 { 1041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1042 PetscErrorCode ierr; 1043 PetscMPIInt rank = aij->rank,size = aij->size; 1044 PetscTruth isdraw,iascii,isbinary; 1045 PetscViewer sviewer; 1046 PetscViewerFormat format; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1052 if (iascii) { 1053 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1054 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1055 MatInfo info; 1056 PetscTruth inodes; 1057 1058 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1059 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1061 if (!inodes) { 1062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1063 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1064 } else { 1065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1066 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1067 } 1068 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1074 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscInt inodecount,inodelimit,*inodes; 1078 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1079 if (inodes) { 1080 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } 1088 } else if (isbinary) { 1089 if (size == 1) { 1090 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1091 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (isdraw) { 1097 PetscDraw draw; 1098 PetscTruth isnull; 1099 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1100 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1101 } 1102 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 /* assemble the entire matrix onto first processor. */ 1108 Mat A; 1109 Mat_SeqAIJ *Aloc; 1110 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1111 MatScalar *a; 1112 1113 if (mat->rmap->N > 1024) { 1114 PetscTruth flg = PETSC_FALSE; 1115 1116 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1117 if (!flg) { 1118 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1119 } 1120 } 1121 1122 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1123 if (!rank) { 1124 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1125 } else { 1126 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1127 } 1128 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1129 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1130 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1131 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1132 1133 /* copy over the A part */ 1134 Aloc = (Mat_SeqAIJ*)aij->A->data; 1135 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1136 row = mat->rmap->rstart; 1137 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1138 for (i=0; i<m; i++) { 1139 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1140 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1141 } 1142 aj = Aloc->j; 1143 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1144 1145 /* copy over the B part */ 1146 Aloc = (Mat_SeqAIJ*)aij->B->data; 1147 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1148 row = mat->rmap->rstart; 1149 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1150 ct = cols; 1151 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1155 } 1156 ierr = PetscFree(ct);CHKERRQ(ierr); 1157 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1158 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1159 /* 1160 Everyone has to call to draw the matrix since the graphics waits are 1161 synchronized across all processors that share the PetscDraw object 1162 */ 1163 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1164 if (!rank) { 1165 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1166 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1167 } 1168 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1169 ierr = MatDestroy(A);CHKERRQ(ierr); 1170 } 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatView_MPIAIJ" 1176 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1177 { 1178 PetscErrorCode ierr; 1179 PetscTruth iascii,isdraw,issocket,isbinary; 1180 1181 PetscFunctionBegin; 1182 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1183 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1184 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1185 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1186 if (iascii || isdraw || isbinary || issocket) { 1187 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1188 } else { 1189 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatRelax_MPIAIJ" 1196 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1197 { 1198 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1199 PetscErrorCode ierr; 1200 Vec bb1; 1201 1202 PetscFunctionBegin; 1203 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1204 1205 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1206 if (flag & SOR_ZERO_INITIAL_GUESS) { 1207 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1208 its--; 1209 } 1210 1211 while (its--) { 1212 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 1215 /* update rhs: bb1 = bb - B*x */ 1216 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1217 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1218 1219 /* local sweep */ 1220 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1221 } 1222 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1223 if (flag & SOR_ZERO_INITIAL_GUESS) { 1224 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1225 its--; 1226 } 1227 while (its--) { 1228 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 1231 /* update rhs: bb1 = bb - B*x */ 1232 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1233 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1234 1235 /* local sweep */ 1236 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1237 } 1238 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1239 if (flag & SOR_ZERO_INITIAL_GUESS) { 1240 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1241 its--; 1242 } 1243 while (its--) { 1244 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 1247 /* update rhs: bb1 = bb - B*x */ 1248 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1249 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1250 1251 /* local sweep */ 1252 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1253 } 1254 } else { 1255 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1256 } 1257 1258 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatPermute_MPIAIJ" 1264 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1265 { 1266 MPI_Comm comm,pcomm; 1267 PetscInt first,local_size,nrows; 1268 const PetscInt *rows; 1269 PetscMPIInt size; 1270 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1275 /* make a collective version of 'rowp' */ 1276 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1277 if (pcomm==comm) { 1278 crowp = rowp; 1279 } else { 1280 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1281 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1282 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1283 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1284 } 1285 /* collect the global row permutation and invert it */ 1286 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1287 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1288 if (pcomm!=comm) { 1289 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1290 } 1291 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1292 /* get the local target indices */ 1293 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1294 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1295 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1296 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1297 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1298 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1299 /* the column permutation is so much easier; 1300 make a local version of 'colp' and invert it */ 1301 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1302 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1303 if (size==1) { 1304 lcolp = colp; 1305 } else { 1306 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1307 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1308 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1309 } 1310 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1311 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1312 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1313 if (size>1) { 1314 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1315 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1316 } 1317 /* now we just get the submatrix */ 1318 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1319 /* clean up */ 1320 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1321 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1327 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1328 { 1329 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1330 Mat A = mat->A,B = mat->B; 1331 PetscErrorCode ierr; 1332 PetscReal isend[5],irecv[5]; 1333 1334 PetscFunctionBegin; 1335 info->block_size = 1.0; 1336 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1337 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1338 isend[3] = info->memory; isend[4] = info->mallocs; 1339 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1340 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1341 isend[3] += info->memory; isend[4] += info->mallocs; 1342 if (flag == MAT_LOCAL) { 1343 info->nz_used = isend[0]; 1344 info->nz_allocated = isend[1]; 1345 info->nz_unneeded = isend[2]; 1346 info->memory = isend[3]; 1347 info->mallocs = isend[4]; 1348 } else if (flag == MAT_GLOBAL_MAX) { 1349 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1350 info->nz_used = irecv[0]; 1351 info->nz_allocated = irecv[1]; 1352 info->nz_unneeded = irecv[2]; 1353 info->memory = irecv[3]; 1354 info->mallocs = irecv[4]; 1355 } else if (flag == MAT_GLOBAL_SUM) { 1356 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1357 info->nz_used = irecv[0]; 1358 info->nz_allocated = irecv[1]; 1359 info->nz_unneeded = irecv[2]; 1360 info->memory = irecv[3]; 1361 info->mallocs = irecv[4]; 1362 } 1363 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1364 info->fill_ratio_needed = 0; 1365 info->factor_mallocs = 0; 1366 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatSetOption_MPIAIJ" 1372 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1373 { 1374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1375 PetscErrorCode ierr; 1376 1377 PetscFunctionBegin; 1378 switch (op) { 1379 case MAT_NEW_NONZERO_LOCATIONS: 1380 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1381 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1382 case MAT_KEEP_ZEROED_ROWS: 1383 case MAT_NEW_NONZERO_LOCATION_ERR: 1384 case MAT_USE_INODES: 1385 case MAT_IGNORE_ZERO_ENTRIES: 1386 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1387 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1388 break; 1389 case MAT_ROW_ORIENTED: 1390 a->roworiented = flg; 1391 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1392 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1393 break; 1394 case MAT_NEW_DIAGONALS: 1395 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1396 break; 1397 case MAT_IGNORE_OFF_PROC_ENTRIES: 1398 a->donotstash = PETSC_TRUE; 1399 break; 1400 case MAT_SYMMETRIC: 1401 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1402 break; 1403 case MAT_STRUCTURALLY_SYMMETRIC: 1404 case MAT_HERMITIAN: 1405 case MAT_SYMMETRY_ETERNAL: 1406 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1407 break; 1408 default: 1409 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1410 } 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatGetRow_MPIAIJ" 1416 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1417 { 1418 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1419 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1420 PetscErrorCode ierr; 1421 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1422 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1423 PetscInt *cmap,*idx_p; 1424 1425 PetscFunctionBegin; 1426 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1427 mat->getrowactive = PETSC_TRUE; 1428 1429 if (!mat->rowvalues && (idx || v)) { 1430 /* 1431 allocate enough space to hold information from the longest row. 1432 */ 1433 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1434 PetscInt max = 1,tmp; 1435 for (i=0; i<matin->rmap->n; i++) { 1436 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1437 if (max < tmp) { max = tmp; } 1438 } 1439 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1440 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1441 } 1442 1443 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1444 lrow = row - rstart; 1445 1446 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1447 if (!v) {pvA = 0; pvB = 0;} 1448 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1449 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1450 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1451 nztot = nzA + nzB; 1452 1453 cmap = mat->garray; 1454 if (v || idx) { 1455 if (nztot) { 1456 /* Sort by increasing column numbers, assuming A and B already sorted */ 1457 PetscInt imark = -1; 1458 if (v) { 1459 *v = v_p = mat->rowvalues; 1460 for (i=0; i<nzB; i++) { 1461 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1462 else break; 1463 } 1464 imark = i; 1465 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1466 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1467 } 1468 if (idx) { 1469 *idx = idx_p = mat->rowindices; 1470 if (imark > -1) { 1471 for (i=0; i<imark; i++) { 1472 idx_p[i] = cmap[cworkB[i]]; 1473 } 1474 } else { 1475 for (i=0; i<nzB; i++) { 1476 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1477 else break; 1478 } 1479 imark = i; 1480 } 1481 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1482 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1483 } 1484 } else { 1485 if (idx) *idx = 0; 1486 if (v) *v = 0; 1487 } 1488 } 1489 *nz = nztot; 1490 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1491 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1492 PetscFunctionReturn(0); 1493 } 1494 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1497 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1498 { 1499 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1500 1501 PetscFunctionBegin; 1502 if (!aij->getrowactive) { 1503 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1504 } 1505 aij->getrowactive = PETSC_FALSE; 1506 PetscFunctionReturn(0); 1507 } 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "MatNorm_MPIAIJ" 1511 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1512 { 1513 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1514 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1515 PetscErrorCode ierr; 1516 PetscInt i,j,cstart = mat->cmap->rstart; 1517 PetscReal sum = 0.0; 1518 MatScalar *v; 1519 1520 PetscFunctionBegin; 1521 if (aij->size == 1) { 1522 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1523 } else { 1524 if (type == NORM_FROBENIUS) { 1525 v = amat->a; 1526 for (i=0; i<amat->nz; i++) { 1527 #if defined(PETSC_USE_COMPLEX) 1528 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1529 #else 1530 sum += (*v)*(*v); v++; 1531 #endif 1532 } 1533 v = bmat->a; 1534 for (i=0; i<bmat->nz; i++) { 1535 #if defined(PETSC_USE_COMPLEX) 1536 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1537 #else 1538 sum += (*v)*(*v); v++; 1539 #endif 1540 } 1541 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1542 *norm = sqrt(*norm); 1543 } else if (type == NORM_1) { /* max column norm */ 1544 PetscReal *tmp,*tmp2; 1545 PetscInt *jj,*garray = aij->garray; 1546 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1547 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1548 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1549 *norm = 0.0; 1550 v = amat->a; jj = amat->j; 1551 for (j=0; j<amat->nz; j++) { 1552 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1553 } 1554 v = bmat->a; jj = bmat->j; 1555 for (j=0; j<bmat->nz; j++) { 1556 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1557 } 1558 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1559 for (j=0; j<mat->cmap->N; j++) { 1560 if (tmp2[j] > *norm) *norm = tmp2[j]; 1561 } 1562 ierr = PetscFree(tmp);CHKERRQ(ierr); 1563 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1564 } else if (type == NORM_INFINITY) { /* max row norm */ 1565 PetscReal ntemp = 0.0; 1566 for (j=0; j<aij->A->rmap->n; j++) { 1567 v = amat->a + amat->i[j]; 1568 sum = 0.0; 1569 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1570 sum += PetscAbsScalar(*v); v++; 1571 } 1572 v = bmat->a + bmat->i[j]; 1573 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1574 sum += PetscAbsScalar(*v); v++; 1575 } 1576 if (sum > ntemp) ntemp = sum; 1577 } 1578 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1579 } else { 1580 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1581 } 1582 } 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatTranspose_MPIAIJ" 1588 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1589 { 1590 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1591 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1592 PetscErrorCode ierr; 1593 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1594 PetscInt cstart=A->cmap->rstart,ncol; 1595 Mat B; 1596 MatScalar *array; 1597 1598 PetscFunctionBegin; 1599 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1600 1601 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1602 ai = Aloc->i; aj = Aloc->j; 1603 bi = Bloc->i; bj = Bloc->j; 1604 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1605 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1606 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1607 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1608 for (i=0; i<ai[ma]; i++){ 1609 d_nnz[aj[i]] ++; 1610 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1611 } 1612 1613 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1614 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1615 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1616 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1617 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1618 } else { 1619 B = *matout; 1620 } 1621 1622 /* copy over the A part */ 1623 array = Aloc->a; 1624 row = A->rmap->rstart; 1625 for (i=0; i<ma; i++) { 1626 ncol = ai[i+1]-ai[i]; 1627 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1628 row++; array += ncol; aj += ncol; 1629 } 1630 aj = Aloc->j; 1631 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1632 1633 /* copy over the B part */ 1634 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1635 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1636 array = Bloc->a; 1637 row = A->rmap->rstart; 1638 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1639 cols_tmp = cols; 1640 for (i=0; i<mb; i++) { 1641 ncol = bi[i+1]-bi[i]; 1642 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1643 row++; array += ncol; cols_tmp += ncol; 1644 } 1645 ierr = PetscFree(cols);CHKERRQ(ierr); 1646 1647 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1648 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1649 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1650 *matout = B; 1651 } else { 1652 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1653 } 1654 PetscFunctionReturn(0); 1655 } 1656 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1659 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1660 { 1661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1662 Mat a = aij->A,b = aij->B; 1663 PetscErrorCode ierr; 1664 PetscInt s1,s2,s3; 1665 1666 PetscFunctionBegin; 1667 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1668 if (rr) { 1669 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1670 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1671 /* Overlap communication with computation. */ 1672 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 } 1674 if (ll) { 1675 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1676 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1677 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1678 } 1679 /* scale the diagonal block */ 1680 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1681 1682 if (rr) { 1683 /* Do a scatter end and then right scale the off-diagonal block */ 1684 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1685 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1686 } 1687 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1693 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1694 { 1695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1696 PetscErrorCode ierr; 1697 1698 PetscFunctionBegin; 1699 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1700 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1705 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1706 { 1707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1708 PetscErrorCode ierr; 1709 1710 PetscFunctionBegin; 1711 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatEqual_MPIAIJ" 1717 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1718 { 1719 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1720 Mat a,b,c,d; 1721 PetscTruth flg; 1722 PetscErrorCode ierr; 1723 1724 PetscFunctionBegin; 1725 a = matA->A; b = matA->B; 1726 c = matB->A; d = matB->B; 1727 1728 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1729 if (flg) { 1730 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1731 } 1732 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1733 PetscFunctionReturn(0); 1734 } 1735 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatCopy_MPIAIJ" 1738 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1739 { 1740 PetscErrorCode ierr; 1741 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1742 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1743 1744 PetscFunctionBegin; 1745 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1746 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1747 /* because of the column compression in the off-processor part of the matrix a->B, 1748 the number of columns in a->B and b->B may be different, hence we cannot call 1749 the MatCopy() directly on the two parts. If need be, we can provide a more 1750 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1751 then copying the submatrices */ 1752 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1753 } else { 1754 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1755 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1756 } 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1762 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1763 { 1764 PetscErrorCode ierr; 1765 1766 PetscFunctionBegin; 1767 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1768 PetscFunctionReturn(0); 1769 } 1770 1771 #include "petscblaslapack.h" 1772 #undef __FUNCT__ 1773 #define __FUNCT__ "MatAXPY_MPIAIJ" 1774 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1775 { 1776 PetscErrorCode ierr; 1777 PetscInt i; 1778 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1779 PetscBLASInt bnz,one=1; 1780 Mat_SeqAIJ *x,*y; 1781 1782 PetscFunctionBegin; 1783 if (str == SAME_NONZERO_PATTERN) { 1784 PetscScalar alpha = a; 1785 x = (Mat_SeqAIJ *)xx->A->data; 1786 y = (Mat_SeqAIJ *)yy->A->data; 1787 bnz = PetscBLASIntCast(x->nz); 1788 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1789 x = (Mat_SeqAIJ *)xx->B->data; 1790 y = (Mat_SeqAIJ *)yy->B->data; 1791 bnz = PetscBLASIntCast(x->nz); 1792 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1793 } else if (str == SUBSET_NONZERO_PATTERN) { 1794 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1795 1796 x = (Mat_SeqAIJ *)xx->B->data; 1797 y = (Mat_SeqAIJ *)yy->B->data; 1798 if (y->xtoy && y->XtoY != xx->B) { 1799 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1800 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1801 } 1802 if (!y->xtoy) { /* get xtoy */ 1803 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1804 y->XtoY = xx->B; 1805 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1806 } 1807 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1808 } else { 1809 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 1814 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1815 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatConjugate_MPIAIJ" 1818 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1819 { 1820 #if defined(PETSC_USE_COMPLEX) 1821 PetscErrorCode ierr; 1822 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1823 1824 PetscFunctionBegin; 1825 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1826 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1827 #else 1828 PetscFunctionBegin; 1829 #endif 1830 PetscFunctionReturn(0); 1831 } 1832 1833 #undef __FUNCT__ 1834 #define __FUNCT__ "MatRealPart_MPIAIJ" 1835 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1836 { 1837 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1838 PetscErrorCode ierr; 1839 1840 PetscFunctionBegin; 1841 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1842 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 1846 #undef __FUNCT__ 1847 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1848 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1849 { 1850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1851 PetscErrorCode ierr; 1852 1853 PetscFunctionBegin; 1854 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1855 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #ifdef PETSC_HAVE_PBGL 1860 1861 #include <boost/parallel/mpi/bsp_process_group.hpp> 1862 #include <boost/graph/distributed/ilu_default_graph.hpp> 1863 #include <boost/graph/distributed/ilu_0_block.hpp> 1864 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1865 #include <boost/graph/distributed/petsc/interface.hpp> 1866 #include <boost/multi_array.hpp> 1867 #include <boost/parallel/distributed_property_map->hpp> 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1871 /* 1872 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1873 */ 1874 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1875 { 1876 namespace petsc = boost::distributed::petsc; 1877 1878 namespace graph_dist = boost::graph::distributed; 1879 using boost::graph::distributed::ilu_default::process_group_type; 1880 using boost::graph::ilu_permuted; 1881 1882 PetscTruth row_identity, col_identity; 1883 PetscContainer c; 1884 PetscInt m, n, M, N; 1885 PetscErrorCode ierr; 1886 1887 PetscFunctionBegin; 1888 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1889 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1890 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1891 if (!row_identity || !col_identity) { 1892 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1893 } 1894 1895 process_group_type pg; 1896 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1897 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1898 lgraph_type& level_graph = *lgraph_p; 1899 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1900 1901 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1902 ilu_permuted(level_graph); 1903 1904 /* put together the new matrix */ 1905 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1906 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1907 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1908 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1909 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1910 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1911 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1912 1913 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1914 ierr = PetscContainerSetPointer(c, lgraph_p); 1915 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1921 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1922 { 1923 PetscFunctionBegin; 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatSolve_MPIAIJ" 1929 /* 1930 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1931 */ 1932 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1933 { 1934 namespace graph_dist = boost::graph::distributed; 1935 1936 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1937 lgraph_type* lgraph_p; 1938 PetscContainer c; 1939 PetscErrorCode ierr; 1940 1941 PetscFunctionBegin; 1942 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1943 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1944 ierr = VecCopy(b, x);CHKERRQ(ierr); 1945 1946 PetscScalar* array_x; 1947 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1948 PetscInt sx; 1949 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1950 1951 PetscScalar* array_b; 1952 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1953 PetscInt sb; 1954 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1955 1956 lgraph_type& level_graph = *lgraph_p; 1957 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1958 1959 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1960 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1961 ref_x(array_x, boost::extents[num_vertices(graph)]); 1962 1963 typedef boost::iterator_property_map<array_ref_type::iterator, 1964 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1965 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1966 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1967 1968 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1969 1970 PetscFunctionReturn(0); 1971 } 1972 #endif 1973 1974 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1975 PetscInt nzlocal,nsends,nrecvs; 1976 PetscMPIInt *send_rank; 1977 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1978 PetscScalar *sbuf_a,**rbuf_a; 1979 PetscErrorCode (*MatDestroy)(Mat); 1980 } Mat_Redundant; 1981 1982 #undef __FUNCT__ 1983 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1984 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1985 { 1986 PetscErrorCode ierr; 1987 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1988 PetscInt i; 1989 1990 PetscFunctionBegin; 1991 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1992 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1993 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1994 for (i=0; i<redund->nrecvs; i++){ 1995 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1996 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1997 } 1998 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1999 ierr = PetscFree(redund);CHKERRQ(ierr); 2000 PetscFunctionReturn(0); 2001 } 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatDestroy_MatRedundant" 2005 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2006 { 2007 PetscErrorCode ierr; 2008 PetscContainer container; 2009 Mat_Redundant *redund=PETSC_NULL; 2010 2011 PetscFunctionBegin; 2012 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2013 if (container) { 2014 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2015 } else { 2016 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2017 } 2018 A->ops->destroy = redund->MatDestroy; 2019 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2020 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2021 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2022 PetscFunctionReturn(0); 2023 } 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2027 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2028 { 2029 PetscMPIInt rank,size; 2030 MPI_Comm comm=((PetscObject)mat)->comm; 2031 PetscErrorCode ierr; 2032 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2033 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2034 PetscInt *rowrange=mat->rmap->range; 2035 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2036 Mat A=aij->A,B=aij->B,C=*matredundant; 2037 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2038 PetscScalar *sbuf_a; 2039 PetscInt nzlocal=a->nz+b->nz; 2040 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2041 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2042 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2043 MatScalar *aworkA,*aworkB; 2044 PetscScalar *vals; 2045 PetscMPIInt tag1,tag2,tag3,imdex; 2046 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2047 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2048 MPI_Status recv_status,*send_status; 2049 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2050 PetscInt **rbuf_j=PETSC_NULL; 2051 PetscScalar **rbuf_a=PETSC_NULL; 2052 Mat_Redundant *redund=PETSC_NULL; 2053 PetscContainer container; 2054 2055 PetscFunctionBegin; 2056 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2057 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2058 2059 if (reuse == MAT_REUSE_MATRIX) { 2060 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2061 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2062 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2063 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2064 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2065 if (container) { 2066 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2067 } else { 2068 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2069 } 2070 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2071 2072 nsends = redund->nsends; 2073 nrecvs = redund->nrecvs; 2074 send_rank = redund->send_rank; recv_rank = send_rank + size; 2075 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2076 sbuf_j = redund->sbuf_j; 2077 sbuf_a = redund->sbuf_a; 2078 rbuf_j = redund->rbuf_j; 2079 rbuf_a = redund->rbuf_a; 2080 } 2081 2082 if (reuse == MAT_INITIAL_MATRIX){ 2083 PetscMPIInt subrank,subsize; 2084 PetscInt nleftover,np_subcomm; 2085 /* get the destination processors' id send_rank, nsends and nrecvs */ 2086 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2087 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2088 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2089 recv_rank = send_rank + size; 2090 np_subcomm = size/nsubcomm; 2091 nleftover = size - nsubcomm*np_subcomm; 2092 nsends = 0; nrecvs = 0; 2093 for (i=0; i<size; i++){ /* i=rank*/ 2094 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2095 send_rank[nsends] = i; nsends++; 2096 recv_rank[nrecvs++] = i; 2097 } 2098 } 2099 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2100 i = size-nleftover-1; 2101 j = 0; 2102 while (j < nsubcomm - nleftover){ 2103 send_rank[nsends++] = i; 2104 i--; j++; 2105 } 2106 } 2107 2108 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2109 for (i=0; i<nleftover; i++){ 2110 recv_rank[nrecvs++] = size-nleftover+i; 2111 } 2112 } 2113 2114 /* allocate sbuf_j, sbuf_a */ 2115 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2116 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2117 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2118 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2119 2120 /* copy mat's local entries into the buffers */ 2121 if (reuse == MAT_INITIAL_MATRIX){ 2122 rownz_max = 0; 2123 rptr = sbuf_j; 2124 cols = sbuf_j + rend-rstart + 1; 2125 vals = sbuf_a; 2126 rptr[0] = 0; 2127 for (i=0; i<rend-rstart; i++){ 2128 row = i + rstart; 2129 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2130 ncols = nzA + nzB; 2131 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2132 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2133 /* load the column indices for this row into cols */ 2134 lwrite = 0; 2135 for (l=0; l<nzB; l++) { 2136 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2137 vals[lwrite] = aworkB[l]; 2138 cols[lwrite++] = ctmp; 2139 } 2140 } 2141 for (l=0; l<nzA; l++){ 2142 vals[lwrite] = aworkA[l]; 2143 cols[lwrite++] = cstart + cworkA[l]; 2144 } 2145 for (l=0; l<nzB; l++) { 2146 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2147 vals[lwrite] = aworkB[l]; 2148 cols[lwrite++] = ctmp; 2149 } 2150 } 2151 vals += ncols; 2152 cols += ncols; 2153 rptr[i+1] = rptr[i] + ncols; 2154 if (rownz_max < ncols) rownz_max = ncols; 2155 } 2156 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2157 } else { /* only copy matrix values into sbuf_a */ 2158 rptr = sbuf_j; 2159 vals = sbuf_a; 2160 rptr[0] = 0; 2161 for (i=0; i<rend-rstart; i++){ 2162 row = i + rstart; 2163 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2164 ncols = nzA + nzB; 2165 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2166 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2167 lwrite = 0; 2168 for (l=0; l<nzB; l++) { 2169 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2170 } 2171 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2172 for (l=0; l<nzB; l++) { 2173 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2174 } 2175 vals += ncols; 2176 rptr[i+1] = rptr[i] + ncols; 2177 } 2178 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2179 2180 /* send nzlocal to others, and recv other's nzlocal */ 2181 /*--------------------------------------------------*/ 2182 if (reuse == MAT_INITIAL_MATRIX){ 2183 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2184 s_waits2 = s_waits3 + nsends; 2185 s_waits1 = s_waits2 + nsends; 2186 r_waits1 = s_waits1 + nsends; 2187 r_waits2 = r_waits1 + nrecvs; 2188 r_waits3 = r_waits2 + nrecvs; 2189 } else { 2190 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2191 r_waits3 = s_waits3 + nsends; 2192 } 2193 2194 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2195 if (reuse == MAT_INITIAL_MATRIX){ 2196 /* get new tags to keep the communication clean */ 2197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2198 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2199 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2200 rbuf_nz = sbuf_nz + nsends; 2201 2202 /* post receives of other's nzlocal */ 2203 for (i=0; i<nrecvs; i++){ 2204 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2205 } 2206 /* send nzlocal to others */ 2207 for (i=0; i<nsends; i++){ 2208 sbuf_nz[i] = nzlocal; 2209 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2210 } 2211 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2212 count = nrecvs; 2213 while (count) { 2214 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2215 recv_rank[imdex] = recv_status.MPI_SOURCE; 2216 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2217 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2218 2219 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2220 rbuf_nz[imdex] += i + 2; 2221 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2222 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2223 count--; 2224 } 2225 /* wait on sends of nzlocal */ 2226 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2227 /* send mat->i,j to others, and recv from other's */ 2228 /*------------------------------------------------*/ 2229 for (i=0; i<nsends; i++){ 2230 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2231 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2232 } 2233 /* wait on receives of mat->i,j */ 2234 /*------------------------------*/ 2235 count = nrecvs; 2236 while (count) { 2237 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2238 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2239 count--; 2240 } 2241 /* wait on sends of mat->i,j */ 2242 /*---------------------------*/ 2243 if (nsends) { 2244 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2245 } 2246 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2247 2248 /* post receives, send and receive mat->a */ 2249 /*----------------------------------------*/ 2250 for (imdex=0; imdex<nrecvs; imdex++) { 2251 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2252 } 2253 for (i=0; i<nsends; i++){ 2254 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2255 } 2256 count = nrecvs; 2257 while (count) { 2258 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2259 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2260 count--; 2261 } 2262 if (nsends) { 2263 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2264 } 2265 2266 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2267 2268 /* create redundant matrix */ 2269 /*-------------------------*/ 2270 if (reuse == MAT_INITIAL_MATRIX){ 2271 /* compute rownz_max for preallocation */ 2272 for (imdex=0; imdex<nrecvs; imdex++){ 2273 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2274 rptr = rbuf_j[imdex]; 2275 for (i=0; i<j; i++){ 2276 ncols = rptr[i+1] - rptr[i]; 2277 if (rownz_max < ncols) rownz_max = ncols; 2278 } 2279 } 2280 2281 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2282 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2283 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2284 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2285 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2286 } else { 2287 C = *matredundant; 2288 } 2289 2290 /* insert local matrix entries */ 2291 rptr = sbuf_j; 2292 cols = sbuf_j + rend-rstart + 1; 2293 vals = sbuf_a; 2294 for (i=0; i<rend-rstart; i++){ 2295 row = i + rstart; 2296 ncols = rptr[i+1] - rptr[i]; 2297 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2298 vals += ncols; 2299 cols += ncols; 2300 } 2301 /* insert received matrix entries */ 2302 for (imdex=0; imdex<nrecvs; imdex++){ 2303 rstart = rowrange[recv_rank[imdex]]; 2304 rend = rowrange[recv_rank[imdex]+1]; 2305 rptr = rbuf_j[imdex]; 2306 cols = rbuf_j[imdex] + rend-rstart + 1; 2307 vals = rbuf_a[imdex]; 2308 for (i=0; i<rend-rstart; i++){ 2309 row = i + rstart; 2310 ncols = rptr[i+1] - rptr[i]; 2311 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2312 vals += ncols; 2313 cols += ncols; 2314 } 2315 } 2316 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2317 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2318 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2319 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2320 if (reuse == MAT_INITIAL_MATRIX){ 2321 PetscContainer container; 2322 *matredundant = C; 2323 /* create a supporting struct and attach it to C for reuse */ 2324 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2325 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2326 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2327 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2328 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2329 2330 redund->nzlocal = nzlocal; 2331 redund->nsends = nsends; 2332 redund->nrecvs = nrecvs; 2333 redund->send_rank = send_rank; 2334 redund->sbuf_nz = sbuf_nz; 2335 redund->sbuf_j = sbuf_j; 2336 redund->sbuf_a = sbuf_a; 2337 redund->rbuf_j = rbuf_j; 2338 redund->rbuf_a = rbuf_a; 2339 2340 redund->MatDestroy = C->ops->destroy; 2341 C->ops->destroy = MatDestroy_MatRedundant; 2342 } 2343 PetscFunctionReturn(0); 2344 } 2345 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2348 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2349 { 2350 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2351 PetscErrorCode ierr; 2352 PetscInt i,*idxb = 0; 2353 PetscScalar *va,*vb; 2354 Vec vtmp; 2355 2356 PetscFunctionBegin; 2357 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2358 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2359 if (idx) { 2360 for (i=0; i<A->rmap->n; i++) { 2361 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2362 } 2363 } 2364 2365 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2366 if (idx) { 2367 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2368 } 2369 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2370 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2371 2372 for (i=0; i<A->rmap->n; i++){ 2373 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2374 va[i] = vb[i]; 2375 if (idx) idx[i] = a->garray[idxb[i]]; 2376 } 2377 } 2378 2379 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2380 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2381 if (idxb) { 2382 ierr = PetscFree(idxb);CHKERRQ(ierr); 2383 } 2384 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2390 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2391 { 2392 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2393 PetscErrorCode ierr; 2394 PetscInt i,*idxb = 0; 2395 PetscScalar *va,*vb; 2396 Vec vtmp; 2397 2398 PetscFunctionBegin; 2399 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2400 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2401 if (idx) { 2402 for (i=0; i<A->cmap->n; i++) { 2403 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2404 } 2405 } 2406 2407 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2408 if (idx) { 2409 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2410 } 2411 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2412 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2413 2414 for (i=0; i<A->rmap->n; i++){ 2415 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2416 va[i] = vb[i]; 2417 if (idx) idx[i] = a->garray[idxb[i]]; 2418 } 2419 } 2420 2421 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2422 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2423 if (idxb) { 2424 ierr = PetscFree(idxb);CHKERRQ(ierr); 2425 } 2426 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2432 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2433 { 2434 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2435 PetscInt n = A->rmap->n; 2436 PetscInt cstart = A->cmap->rstart; 2437 PetscInt *cmap = mat->garray; 2438 PetscInt *diagIdx, *offdiagIdx; 2439 Vec diagV, offdiagV; 2440 PetscScalar *a, *diagA, *offdiagA; 2441 PetscInt r; 2442 PetscErrorCode ierr; 2443 2444 PetscFunctionBegin; 2445 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2446 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2447 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2448 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2449 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2450 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2451 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2452 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2453 for(r = 0; r < n; ++r) { 2454 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2455 a[r] = diagA[r]; 2456 idx[r] = cstart + diagIdx[r]; 2457 } else { 2458 a[r] = offdiagA[r]; 2459 idx[r] = cmap[offdiagIdx[r]]; 2460 } 2461 } 2462 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2463 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2464 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2465 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2466 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2467 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2473 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2474 { 2475 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2476 PetscInt n = A->rmap->n; 2477 PetscInt cstart = A->cmap->rstart; 2478 PetscInt *cmap = mat->garray; 2479 PetscInt *diagIdx, *offdiagIdx; 2480 Vec diagV, offdiagV; 2481 PetscScalar *a, *diagA, *offdiagA; 2482 PetscInt r; 2483 PetscErrorCode ierr; 2484 2485 PetscFunctionBegin; 2486 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2487 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2488 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2489 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2490 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2491 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2492 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2493 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2494 for(r = 0; r < n; ++r) { 2495 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2496 a[r] = diagA[r]; 2497 idx[r] = cstart + diagIdx[r]; 2498 } else { 2499 a[r] = offdiagA[r]; 2500 idx[r] = cmap[offdiagIdx[r]]; 2501 } 2502 } 2503 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2504 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2505 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2506 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2507 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2508 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2509 PetscFunctionReturn(0); 2510 } 2511 2512 #undef __FUNCT__ 2513 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2514 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2515 { 2516 PetscErrorCode ierr; 2517 2518 PetscFunctionBegin; 2519 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 /* -------------------------------------------------------------------*/ 2524 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2525 MatGetRow_MPIAIJ, 2526 MatRestoreRow_MPIAIJ, 2527 MatMult_MPIAIJ, 2528 /* 4*/ MatMultAdd_MPIAIJ, 2529 MatMultTranspose_MPIAIJ, 2530 MatMultTransposeAdd_MPIAIJ, 2531 #ifdef PETSC_HAVE_PBGL 2532 MatSolve_MPIAIJ, 2533 #else 2534 0, 2535 #endif 2536 0, 2537 0, 2538 /*10*/ 0, 2539 0, 2540 0, 2541 MatRelax_MPIAIJ, 2542 MatTranspose_MPIAIJ, 2543 /*15*/ MatGetInfo_MPIAIJ, 2544 MatEqual_MPIAIJ, 2545 MatGetDiagonal_MPIAIJ, 2546 MatDiagonalScale_MPIAIJ, 2547 MatNorm_MPIAIJ, 2548 /*20*/ MatAssemblyBegin_MPIAIJ, 2549 MatAssemblyEnd_MPIAIJ, 2550 0, 2551 MatSetOption_MPIAIJ, 2552 MatZeroEntries_MPIAIJ, 2553 /*25*/ MatZeroRows_MPIAIJ, 2554 0, 2555 #ifdef PETSC_HAVE_PBGL 2556 0, 2557 #else 2558 0, 2559 #endif 2560 0, 2561 0, 2562 /*30*/ MatSetUpPreallocation_MPIAIJ, 2563 #ifdef PETSC_HAVE_PBGL 2564 0, 2565 #else 2566 0, 2567 #endif 2568 0, 2569 0, 2570 0, 2571 /*35*/ MatDuplicate_MPIAIJ, 2572 0, 2573 0, 2574 0, 2575 0, 2576 /*40*/ MatAXPY_MPIAIJ, 2577 MatGetSubMatrices_MPIAIJ, 2578 MatIncreaseOverlap_MPIAIJ, 2579 MatGetValues_MPIAIJ, 2580 MatCopy_MPIAIJ, 2581 /*45*/ MatGetRowMax_MPIAIJ, 2582 MatScale_MPIAIJ, 2583 0, 2584 0, 2585 0, 2586 /*50*/ MatSetBlockSize_MPIAIJ, 2587 0, 2588 0, 2589 0, 2590 0, 2591 /*55*/ MatFDColoringCreate_MPIAIJ, 2592 0, 2593 MatSetUnfactored_MPIAIJ, 2594 MatPermute_MPIAIJ, 2595 0, 2596 /*60*/ MatGetSubMatrix_MPIAIJ, 2597 MatDestroy_MPIAIJ, 2598 MatView_MPIAIJ, 2599 0, 2600 0, 2601 /*65*/ 0, 2602 0, 2603 0, 2604 0, 2605 0, 2606 /*70*/ MatGetRowMaxAbs_MPIAIJ, 2607 MatGetRowMinAbs_MPIAIJ, 2608 0, 2609 MatSetColoring_MPIAIJ, 2610 #if defined(PETSC_HAVE_ADIC) 2611 MatSetValuesAdic_MPIAIJ, 2612 #else 2613 0, 2614 #endif 2615 MatSetValuesAdifor_MPIAIJ, 2616 /*75*/ 0, 2617 0, 2618 0, 2619 0, 2620 0, 2621 /*80*/ 0, 2622 0, 2623 0, 2624 /*84*/ MatLoad_MPIAIJ, 2625 0, 2626 0, 2627 0, 2628 0, 2629 0, 2630 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2631 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2632 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2633 MatPtAP_Basic, 2634 MatPtAPSymbolic_MPIAIJ, 2635 /*95*/ MatPtAPNumeric_MPIAIJ, 2636 0, 2637 0, 2638 0, 2639 0, 2640 /*100*/0, 2641 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2642 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2643 MatConjugate_MPIAIJ, 2644 0, 2645 /*105*/MatSetValuesRow_MPIAIJ, 2646 MatRealPart_MPIAIJ, 2647 MatImaginaryPart_MPIAIJ, 2648 0, 2649 0, 2650 /*110*/0, 2651 MatGetRedundantMatrix_MPIAIJ, 2652 MatGetRowMin_MPIAIJ, 2653 0, 2654 0, 2655 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2656 2657 /* ----------------------------------------------------------------------------------------*/ 2658 2659 EXTERN_C_BEGIN 2660 #undef __FUNCT__ 2661 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2662 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2663 { 2664 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2665 PetscErrorCode ierr; 2666 2667 PetscFunctionBegin; 2668 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2669 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2670 PetscFunctionReturn(0); 2671 } 2672 EXTERN_C_END 2673 2674 EXTERN_C_BEGIN 2675 #undef __FUNCT__ 2676 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2677 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2678 { 2679 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2680 PetscErrorCode ierr; 2681 2682 PetscFunctionBegin; 2683 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2684 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2685 PetscFunctionReturn(0); 2686 } 2687 EXTERN_C_END 2688 2689 #include "petscpc.h" 2690 EXTERN_C_BEGIN 2691 #undef __FUNCT__ 2692 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2693 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2694 { 2695 Mat_MPIAIJ *b; 2696 PetscErrorCode ierr; 2697 PetscInt i; 2698 2699 PetscFunctionBegin; 2700 B->preallocated = PETSC_TRUE; 2701 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2702 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2703 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2704 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2705 2706 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2707 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2708 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2709 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2710 if (d_nnz) { 2711 for (i=0; i<B->rmap->n; i++) { 2712 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2713 } 2714 } 2715 if (o_nnz) { 2716 for (i=0; i<B->rmap->n; i++) { 2717 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2718 } 2719 } 2720 b = (Mat_MPIAIJ*)B->data; 2721 2722 /* Explicitly create 2 MATSEQAIJ matrices. */ 2723 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2724 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2725 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2726 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2727 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2728 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2729 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2730 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2731 2732 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2733 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2734 2735 PetscFunctionReturn(0); 2736 } 2737 EXTERN_C_END 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2741 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2742 { 2743 Mat mat; 2744 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2745 PetscErrorCode ierr; 2746 2747 PetscFunctionBegin; 2748 *newmat = 0; 2749 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2750 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2751 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2752 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2753 a = (Mat_MPIAIJ*)mat->data; 2754 2755 mat->factor = matin->factor; 2756 mat->rmap->bs = matin->rmap->bs; 2757 mat->assembled = PETSC_TRUE; 2758 mat->insertmode = NOT_SET_VALUES; 2759 mat->preallocated = PETSC_TRUE; 2760 2761 a->size = oldmat->size; 2762 a->rank = oldmat->rank; 2763 a->donotstash = oldmat->donotstash; 2764 a->roworiented = oldmat->roworiented; 2765 a->rowindices = 0; 2766 a->rowvalues = 0; 2767 a->getrowactive = PETSC_FALSE; 2768 2769 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2770 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2771 2772 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2773 if (oldmat->colmap) { 2774 #if defined (PETSC_USE_CTABLE) 2775 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2776 #else 2777 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2778 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2779 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2780 #endif 2781 } else a->colmap = 0; 2782 if (oldmat->garray) { 2783 PetscInt len; 2784 len = oldmat->B->cmap->n; 2785 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2786 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2787 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2788 } else a->garray = 0; 2789 2790 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2791 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2792 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2793 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2794 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2795 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2796 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2797 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2798 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2799 *newmat = mat; 2800 PetscFunctionReturn(0); 2801 } 2802 2803 #include "petscsys.h" 2804 2805 #undef __FUNCT__ 2806 #define __FUNCT__ "MatLoad_MPIAIJ" 2807 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2808 { 2809 Mat A; 2810 PetscScalar *vals,*svals; 2811 MPI_Comm comm = ((PetscObject)viewer)->comm; 2812 MPI_Status status; 2813 PetscErrorCode ierr; 2814 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2815 PetscInt i,nz,j,rstart,rend,mmax,maxnz; 2816 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2817 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2818 PetscInt cend,cstart,n,*rowners; 2819 int fd; 2820 2821 PetscFunctionBegin; 2822 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2823 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2824 if (!rank) { 2825 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2826 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2827 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2828 } 2829 2830 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2831 M = header[1]; N = header[2]; 2832 /* determine ownership of all rows */ 2833 m = M/size + ((M % size) > rank); 2834 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2835 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2836 2837 /* First process needs enough room for process with most rows */ 2838 if (!rank) { 2839 mmax = rowners[1]; 2840 for (i=2; i<size; i++) { 2841 mmax = PetscMax(mmax,rowners[i]); 2842 } 2843 } else mmax = m; 2844 2845 rowners[0] = 0; 2846 for (i=2; i<=size; i++) { 2847 rowners[i] += rowners[i-1]; 2848 } 2849 rstart = rowners[rank]; 2850 rend = rowners[rank+1]; 2851 2852 /* distribute row lengths to all processors */ 2853 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2854 if (!rank) { 2855 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2856 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2857 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2858 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2859 for (j=0; j<m; j++) { 2860 procsnz[0] += ourlens[j]; 2861 } 2862 for (i=1; i<size; i++) { 2863 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2864 /* calculate the number of nonzeros on each processor */ 2865 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2866 procsnz[i] += rowlengths[j]; 2867 } 2868 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2869 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2870 } 2871 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2872 } else { 2873 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2874 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2875 } 2876 2877 if (!rank) { 2878 /* determine max buffer needed and allocate it */ 2879 maxnz = 0; 2880 for (i=0; i<size; i++) { 2881 maxnz = PetscMax(maxnz,procsnz[i]); 2882 } 2883 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2884 2885 /* read in my part of the matrix column indices */ 2886 nz = procsnz[0]; 2887 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2888 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2889 2890 /* read in every one elses and ship off */ 2891 for (i=1; i<size; i++) { 2892 nz = procsnz[i]; 2893 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2894 mpicnt = PetscMPIIntCast(nz); 2895 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2896 } 2897 ierr = PetscFree(cols);CHKERRQ(ierr); 2898 } else { 2899 /* determine buffer space needed for message */ 2900 nz = 0; 2901 for (i=0; i<m; i++) { 2902 nz += ourlens[i]; 2903 } 2904 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2905 2906 /* receive message of column indices*/ 2907 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2908 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2909 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2910 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2911 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2912 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2913 } 2914 2915 /* determine column ownership if matrix is not square */ 2916 if (N != M) { 2917 n = N/size + ((N % size) > rank); 2918 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2919 cstart = cend - n; 2920 } else { 2921 cstart = rstart; 2922 cend = rend; 2923 n = cend - cstart; 2924 } 2925 2926 /* loop over local rows, determining number of off diagonal entries */ 2927 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2928 jj = 0; 2929 for (i=0; i<m; i++) { 2930 for (j=0; j<ourlens[i]; j++) { 2931 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2932 jj++; 2933 } 2934 } 2935 2936 /* create our matrix */ 2937 for (i=0; i<m; i++) { 2938 ourlens[i] -= offlens[i]; 2939 } 2940 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2941 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2942 ierr = MatSetType(A,type);CHKERRQ(ierr); 2943 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2944 2945 for (i=0; i<m; i++) { 2946 ourlens[i] += offlens[i]; 2947 } 2948 2949 if (!rank) { 2950 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2951 2952 /* read in my part of the matrix numerical values */ 2953 nz = procsnz[0]; 2954 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2955 2956 /* insert into matrix */ 2957 jj = rstart; 2958 smycols = mycols; 2959 svals = vals; 2960 for (i=0; i<m; i++) { 2961 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2962 smycols += ourlens[i]; 2963 svals += ourlens[i]; 2964 jj++; 2965 } 2966 2967 /* read in other processors and ship out */ 2968 for (i=1; i<size; i++) { 2969 nz = procsnz[i]; 2970 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2971 mpicnt = PetscMPIIntCast(nz); 2972 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2973 } 2974 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2975 } else { 2976 /* receive numeric values */ 2977 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2978 2979 /* receive message of values*/ 2980 mpicnt = PetscMPIIntCast(nz); 2981 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2982 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 2983 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2984 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2985 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2986 2987 /* insert into matrix */ 2988 jj = rstart; 2989 smycols = mycols; 2990 svals = vals; 2991 for (i=0; i<m; i++) { 2992 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2993 smycols += ourlens[i]; 2994 svals += ourlens[i]; 2995 jj++; 2996 } 2997 } 2998 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2999 ierr = PetscFree(vals);CHKERRQ(ierr); 3000 ierr = PetscFree(mycols);CHKERRQ(ierr); 3001 ierr = PetscFree(rowners);CHKERRQ(ierr); 3002 3003 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3004 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3005 *newmat = A; 3006 PetscFunctionReturn(0); 3007 } 3008 3009 #undef __FUNCT__ 3010 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3011 /* 3012 Not great since it makes two copies of the submatrix, first an SeqAIJ 3013 in local and then by concatenating the local matrices the end result. 3014 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3015 */ 3016 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3017 { 3018 PetscErrorCode ierr; 3019 PetscMPIInt rank,size; 3020 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3021 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3022 Mat *local,M,Mreuse; 3023 MatScalar *vwork,*aa; 3024 MPI_Comm comm = ((PetscObject)mat)->comm; 3025 Mat_SeqAIJ *aij; 3026 3027 3028 PetscFunctionBegin; 3029 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3030 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3031 3032 if (call == MAT_REUSE_MATRIX) { 3033 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3034 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3035 local = &Mreuse; 3036 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3037 } else { 3038 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3039 Mreuse = *local; 3040 ierr = PetscFree(local);CHKERRQ(ierr); 3041 } 3042 3043 /* 3044 m - number of local rows 3045 n - number of columns (same on all processors) 3046 rstart - first row in new global matrix generated 3047 */ 3048 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3049 if (call == MAT_INITIAL_MATRIX) { 3050 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3051 ii = aij->i; 3052 jj = aij->j; 3053 3054 /* 3055 Determine the number of non-zeros in the diagonal and off-diagonal 3056 portions of the matrix in order to do correct preallocation 3057 */ 3058 3059 /* first get start and end of "diagonal" columns */ 3060 if (csize == PETSC_DECIDE) { 3061 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3062 if (mglobal == n) { /* square matrix */ 3063 nlocal = m; 3064 } else { 3065 nlocal = n/size + ((n % size) > rank); 3066 } 3067 } else { 3068 nlocal = csize; 3069 } 3070 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3071 rstart = rend - nlocal; 3072 if (rank == size - 1 && rend != n) { 3073 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3074 } 3075 3076 /* next, compute all the lengths */ 3077 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3078 olens = dlens + m; 3079 for (i=0; i<m; i++) { 3080 jend = ii[i+1] - ii[i]; 3081 olen = 0; 3082 dlen = 0; 3083 for (j=0; j<jend; j++) { 3084 if (*jj < rstart || *jj >= rend) olen++; 3085 else dlen++; 3086 jj++; 3087 } 3088 olens[i] = olen; 3089 dlens[i] = dlen; 3090 } 3091 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3092 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3093 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3094 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3095 ierr = PetscFree(dlens);CHKERRQ(ierr); 3096 } else { 3097 PetscInt ml,nl; 3098 3099 M = *newmat; 3100 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3101 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3102 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3103 /* 3104 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3105 rather than the slower MatSetValues(). 3106 */ 3107 M->was_assembled = PETSC_TRUE; 3108 M->assembled = PETSC_FALSE; 3109 } 3110 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3111 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3112 ii = aij->i; 3113 jj = aij->j; 3114 aa = aij->a; 3115 for (i=0; i<m; i++) { 3116 row = rstart + i; 3117 nz = ii[i+1] - ii[i]; 3118 cwork = jj; jj += nz; 3119 vwork = aa; aa += nz; 3120 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3121 } 3122 3123 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3124 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3125 *newmat = M; 3126 3127 /* save submatrix used in processor for next request */ 3128 if (call == MAT_INITIAL_MATRIX) { 3129 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3130 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3131 } 3132 3133 PetscFunctionReturn(0); 3134 } 3135 3136 EXTERN_C_BEGIN 3137 #undef __FUNCT__ 3138 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3139 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3140 { 3141 PetscInt m,cstart, cend,j,nnz,i,d; 3142 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3143 const PetscInt *JJ; 3144 PetscScalar *values; 3145 PetscErrorCode ierr; 3146 3147 PetscFunctionBegin; 3148 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3149 3150 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3151 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3152 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3153 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3154 m = B->rmap->n; 3155 cstart = B->cmap->rstart; 3156 cend = B->cmap->rend; 3157 rstart = B->rmap->rstart; 3158 3159 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3160 o_nnz = d_nnz + m; 3161 3162 #if defined(PETSC_USE_DEBUGGING) 3163 for (i=0; i<m; i++) { 3164 nnz = Ii[i+1]- Ii[i]; 3165 JJ = J + Ii[i]; 3166 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3167 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3168 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3169 for (j=1; j<nnz; j++) { 3170 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3171 } 3172 } 3173 #endif 3174 3175 for (i=0; i<m; i++) { 3176 nnz = Ii[i+1]- Ii[i]; 3177 JJ = J + Ii[i]; 3178 nnz_max = PetscMax(nnz_max,nnz); 3179 for (j=0; j<nnz; j++) { 3180 if (*JJ >= cstart) break; 3181 JJ++; 3182 } 3183 d = 0; 3184 for (; j<nnz; j++) { 3185 if (*JJ++ >= cend) break; 3186 d++; 3187 } 3188 d_nnz[i] = d; 3189 o_nnz[i] = nnz - d; 3190 } 3191 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3192 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3193 3194 if (v) values = (PetscScalar*)v; 3195 else { 3196 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3197 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3198 } 3199 3200 for (i=0; i<m; i++) { 3201 ii = i + rstart; 3202 nnz = Ii[i+1]- Ii[i]; 3203 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3204 } 3205 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3206 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3207 3208 if (!v) { 3209 ierr = PetscFree(values);CHKERRQ(ierr); 3210 } 3211 PetscFunctionReturn(0); 3212 } 3213 EXTERN_C_END 3214 3215 #undef __FUNCT__ 3216 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3217 /*@ 3218 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3219 (the default parallel PETSc format). 3220 3221 Collective on MPI_Comm 3222 3223 Input Parameters: 3224 + B - the matrix 3225 . i - the indices into j for the start of each local row (starts with zero) 3226 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3227 - v - optional values in the matrix 3228 3229 Level: developer 3230 3231 Notes: 3232 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3233 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3234 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3235 3236 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3237 3238 The format which is used for the sparse matrix input, is equivalent to a 3239 row-major ordering.. i.e for the following matrix, the input data expected is 3240 as shown: 3241 3242 1 0 0 3243 2 0 3 P0 3244 ------- 3245 4 5 6 P1 3246 3247 Process0 [P0]: rows_owned=[0,1] 3248 i = {0,1,3} [size = nrow+1 = 2+1] 3249 j = {0,0,2} [size = nz = 6] 3250 v = {1,2,3} [size = nz = 6] 3251 3252 Process1 [P1]: rows_owned=[2] 3253 i = {0,3} [size = nrow+1 = 1+1] 3254 j = {0,1,2} [size = nz = 6] 3255 v = {4,5,6} [size = nz = 6] 3256 3257 The column indices for each row MUST be sorted. 3258 3259 .keywords: matrix, aij, compressed row, sparse, parallel 3260 3261 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3262 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3263 @*/ 3264 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3265 { 3266 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3267 3268 PetscFunctionBegin; 3269 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3270 if (f) { 3271 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3272 } 3273 PetscFunctionReturn(0); 3274 } 3275 3276 #undef __FUNCT__ 3277 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3278 /*@C 3279 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3280 (the default parallel PETSc format). For good matrix assembly performance 3281 the user should preallocate the matrix storage by setting the parameters 3282 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3283 performance can be increased by more than a factor of 50. 3284 3285 Collective on MPI_Comm 3286 3287 Input Parameters: 3288 + A - the matrix 3289 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3290 (same value is used for all local rows) 3291 . d_nnz - array containing the number of nonzeros in the various rows of the 3292 DIAGONAL portion of the local submatrix (possibly different for each row) 3293 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3294 The size of this array is equal to the number of local rows, i.e 'm'. 3295 You must leave room for the diagonal entry even if it is zero. 3296 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3297 submatrix (same value is used for all local rows). 3298 - o_nnz - array containing the number of nonzeros in the various rows of the 3299 OFF-DIAGONAL portion of the local submatrix (possibly different for 3300 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3301 structure. The size of this array is equal to the number 3302 of local rows, i.e 'm'. 3303 3304 If the *_nnz parameter is given then the *_nz parameter is ignored 3305 3306 The AIJ format (also called the Yale sparse matrix format or 3307 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3308 storage. The stored row and column indices begin with zero. See the users manual for details. 3309 3310 The parallel matrix is partitioned such that the first m0 rows belong to 3311 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3312 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3313 3314 The DIAGONAL portion of the local submatrix of a processor can be defined 3315 as the submatrix which is obtained by extraction the part corresponding 3316 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3317 first row that belongs to the processor, and r2 is the last row belonging 3318 to the this processor. This is a square mxm matrix. The remaining portion 3319 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3320 3321 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3322 3323 You can call MatGetInfo() to get information on how effective the preallocation was; 3324 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3325 You can also run with the option -info and look for messages with the string 3326 malloc in them to see if additional memory allocation was needed. 3327 3328 Example usage: 3329 3330 Consider the following 8x8 matrix with 34 non-zero values, that is 3331 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3332 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3333 as follows: 3334 3335 .vb 3336 1 2 0 | 0 3 0 | 0 4 3337 Proc0 0 5 6 | 7 0 0 | 8 0 3338 9 0 10 | 11 0 0 | 12 0 3339 ------------------------------------- 3340 13 0 14 | 15 16 17 | 0 0 3341 Proc1 0 18 0 | 19 20 21 | 0 0 3342 0 0 0 | 22 23 0 | 24 0 3343 ------------------------------------- 3344 Proc2 25 26 27 | 0 0 28 | 29 0 3345 30 0 0 | 31 32 33 | 0 34 3346 .ve 3347 3348 This can be represented as a collection of submatrices as: 3349 3350 .vb 3351 A B C 3352 D E F 3353 G H I 3354 .ve 3355 3356 Where the submatrices A,B,C are owned by proc0, D,E,F are 3357 owned by proc1, G,H,I are owned by proc2. 3358 3359 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3360 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3361 The 'M','N' parameters are 8,8, and have the same values on all procs. 3362 3363 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3364 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3365 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3366 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3367 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3368 matrix, ans [DF] as another SeqAIJ matrix. 3369 3370 When d_nz, o_nz parameters are specified, d_nz storage elements are 3371 allocated for every row of the local diagonal submatrix, and o_nz 3372 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3373 One way to choose d_nz and o_nz is to use the max nonzerors per local 3374 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3375 In this case, the values of d_nz,o_nz are: 3376 .vb 3377 proc0 : dnz = 2, o_nz = 2 3378 proc1 : dnz = 3, o_nz = 2 3379 proc2 : dnz = 1, o_nz = 4 3380 .ve 3381 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3382 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3383 for proc3. i.e we are using 12+15+10=37 storage locations to store 3384 34 values. 3385 3386 When d_nnz, o_nnz parameters are specified, the storage is specified 3387 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3388 In the above case the values for d_nnz,o_nnz are: 3389 .vb 3390 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3391 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3392 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3393 .ve 3394 Here the space allocated is sum of all the above values i.e 34, and 3395 hence pre-allocation is perfect. 3396 3397 Level: intermediate 3398 3399 .keywords: matrix, aij, compressed row, sparse, parallel 3400 3401 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3402 MPIAIJ, MatGetInfo() 3403 @*/ 3404 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3405 { 3406 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3407 3408 PetscFunctionBegin; 3409 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3410 if (f) { 3411 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3412 } 3413 PetscFunctionReturn(0); 3414 } 3415 3416 #undef __FUNCT__ 3417 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3418 /*@ 3419 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3420 CSR format the local rows. 3421 3422 Collective on MPI_Comm 3423 3424 Input Parameters: 3425 + comm - MPI communicator 3426 . m - number of local rows (Cannot be PETSC_DECIDE) 3427 . n - This value should be the same as the local size used in creating the 3428 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3429 calculated if N is given) For square matrices n is almost always m. 3430 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3431 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3432 . i - row indices 3433 . j - column indices 3434 - a - matrix values 3435 3436 Output Parameter: 3437 . mat - the matrix 3438 3439 Level: intermediate 3440 3441 Notes: 3442 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3443 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3444 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3445 3446 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3447 3448 The format which is used for the sparse matrix input, is equivalent to a 3449 row-major ordering.. i.e for the following matrix, the input data expected is 3450 as shown: 3451 3452 1 0 0 3453 2 0 3 P0 3454 ------- 3455 4 5 6 P1 3456 3457 Process0 [P0]: rows_owned=[0,1] 3458 i = {0,1,3} [size = nrow+1 = 2+1] 3459 j = {0,0,2} [size = nz = 6] 3460 v = {1,2,3} [size = nz = 6] 3461 3462 Process1 [P1]: rows_owned=[2] 3463 i = {0,3} [size = nrow+1 = 1+1] 3464 j = {0,1,2} [size = nz = 6] 3465 v = {4,5,6} [size = nz = 6] 3466 3467 .keywords: matrix, aij, compressed row, sparse, parallel 3468 3469 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3470 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3471 @*/ 3472 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3473 { 3474 PetscErrorCode ierr; 3475 3476 PetscFunctionBegin; 3477 if (i[0]) { 3478 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3479 } 3480 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3481 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3482 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3483 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3484 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3485 PetscFunctionReturn(0); 3486 } 3487 3488 #undef __FUNCT__ 3489 #define __FUNCT__ "MatCreateMPIAIJ" 3490 /*@C 3491 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3492 (the default parallel PETSc format). For good matrix assembly performance 3493 the user should preallocate the matrix storage by setting the parameters 3494 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3495 performance can be increased by more than a factor of 50. 3496 3497 Collective on MPI_Comm 3498 3499 Input Parameters: 3500 + comm - MPI communicator 3501 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3502 This value should be the same as the local size used in creating the 3503 y vector for the matrix-vector product y = Ax. 3504 . n - This value should be the same as the local size used in creating the 3505 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3506 calculated if N is given) For square matrices n is almost always m. 3507 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3508 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3509 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3510 (same value is used for all local rows) 3511 . d_nnz - array containing the number of nonzeros in the various rows of the 3512 DIAGONAL portion of the local submatrix (possibly different for each row) 3513 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3514 The size of this array is equal to the number of local rows, i.e 'm'. 3515 You must leave room for the diagonal entry even if it is zero. 3516 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3517 submatrix (same value is used for all local rows). 3518 - o_nnz - array containing the number of nonzeros in the various rows of the 3519 OFF-DIAGONAL portion of the local submatrix (possibly different for 3520 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3521 structure. The size of this array is equal to the number 3522 of local rows, i.e 'm'. 3523 3524 Output Parameter: 3525 . A - the matrix 3526 3527 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3528 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3529 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3530 3531 Notes: 3532 If the *_nnz parameter is given then the *_nz parameter is ignored 3533 3534 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3535 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3536 storage requirements for this matrix. 3537 3538 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3539 processor than it must be used on all processors that share the object for 3540 that argument. 3541 3542 The user MUST specify either the local or global matrix dimensions 3543 (possibly both). 3544 3545 The parallel matrix is partitioned across processors such that the 3546 first m0 rows belong to process 0, the next m1 rows belong to 3547 process 1, the next m2 rows belong to process 2 etc.. where 3548 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3549 values corresponding to [m x N] submatrix. 3550 3551 The columns are logically partitioned with the n0 columns belonging 3552 to 0th partition, the next n1 columns belonging to the next 3553 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3554 3555 The DIAGONAL portion of the local submatrix on any given processor 3556 is the submatrix corresponding to the rows and columns m,n 3557 corresponding to the given processor. i.e diagonal matrix on 3558 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3559 etc. The remaining portion of the local submatrix [m x (N-n)] 3560 constitute the OFF-DIAGONAL portion. The example below better 3561 illustrates this concept. 3562 3563 For a square global matrix we define each processor's diagonal portion 3564 to be its local rows and the corresponding columns (a square submatrix); 3565 each processor's off-diagonal portion encompasses the remainder of the 3566 local matrix (a rectangular submatrix). 3567 3568 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3569 3570 When calling this routine with a single process communicator, a matrix of 3571 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3572 type of communicator, use the construction mechanism: 3573 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3574 3575 By default, this format uses inodes (identical nodes) when possible. 3576 We search for consecutive rows with the same nonzero structure, thereby 3577 reusing matrix information to achieve increased efficiency. 3578 3579 Options Database Keys: 3580 + -mat_no_inode - Do not use inodes 3581 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3582 - -mat_aij_oneindex - Internally use indexing starting at 1 3583 rather than 0. Note that when calling MatSetValues(), 3584 the user still MUST index entries starting at 0! 3585 3586 3587 Example usage: 3588 3589 Consider the following 8x8 matrix with 34 non-zero values, that is 3590 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3591 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3592 as follows: 3593 3594 .vb 3595 1 2 0 | 0 3 0 | 0 4 3596 Proc0 0 5 6 | 7 0 0 | 8 0 3597 9 0 10 | 11 0 0 | 12 0 3598 ------------------------------------- 3599 13 0 14 | 15 16 17 | 0 0 3600 Proc1 0 18 0 | 19 20 21 | 0 0 3601 0 0 0 | 22 23 0 | 24 0 3602 ------------------------------------- 3603 Proc2 25 26 27 | 0 0 28 | 29 0 3604 30 0 0 | 31 32 33 | 0 34 3605 .ve 3606 3607 This can be represented as a collection of submatrices as: 3608 3609 .vb 3610 A B C 3611 D E F 3612 G H I 3613 .ve 3614 3615 Where the submatrices A,B,C are owned by proc0, D,E,F are 3616 owned by proc1, G,H,I are owned by proc2. 3617 3618 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3619 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3620 The 'M','N' parameters are 8,8, and have the same values on all procs. 3621 3622 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3623 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3624 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3625 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3626 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3627 matrix, ans [DF] as another SeqAIJ matrix. 3628 3629 When d_nz, o_nz parameters are specified, d_nz storage elements are 3630 allocated for every row of the local diagonal submatrix, and o_nz 3631 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3632 One way to choose d_nz and o_nz is to use the max nonzerors per local 3633 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3634 In this case, the values of d_nz,o_nz are: 3635 .vb 3636 proc0 : dnz = 2, o_nz = 2 3637 proc1 : dnz = 3, o_nz = 2 3638 proc2 : dnz = 1, o_nz = 4 3639 .ve 3640 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3641 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3642 for proc3. i.e we are using 12+15+10=37 storage locations to store 3643 34 values. 3644 3645 When d_nnz, o_nnz parameters are specified, the storage is specified 3646 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3647 In the above case the values for d_nnz,o_nnz are: 3648 .vb 3649 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3650 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3651 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3652 .ve 3653 Here the space allocated is sum of all the above values i.e 34, and 3654 hence pre-allocation is perfect. 3655 3656 Level: intermediate 3657 3658 .keywords: matrix, aij, compressed row, sparse, parallel 3659 3660 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3661 MPIAIJ, MatCreateMPIAIJWithArrays() 3662 @*/ 3663 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3664 { 3665 PetscErrorCode ierr; 3666 PetscMPIInt size; 3667 3668 PetscFunctionBegin; 3669 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3670 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3671 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3672 if (size > 1) { 3673 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3674 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3675 } else { 3676 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3677 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3678 } 3679 PetscFunctionReturn(0); 3680 } 3681 3682 #undef __FUNCT__ 3683 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3684 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3685 { 3686 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3687 3688 PetscFunctionBegin; 3689 *Ad = a->A; 3690 *Ao = a->B; 3691 *colmap = a->garray; 3692 PetscFunctionReturn(0); 3693 } 3694 3695 #undef __FUNCT__ 3696 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3697 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3698 { 3699 PetscErrorCode ierr; 3700 PetscInt i; 3701 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3702 3703 PetscFunctionBegin; 3704 if (coloring->ctype == IS_COLORING_GLOBAL) { 3705 ISColoringValue *allcolors,*colors; 3706 ISColoring ocoloring; 3707 3708 /* set coloring for diagonal portion */ 3709 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3710 3711 /* set coloring for off-diagonal portion */ 3712 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3713 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3714 for (i=0; i<a->B->cmap->n; i++) { 3715 colors[i] = allcolors[a->garray[i]]; 3716 } 3717 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3718 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3719 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3720 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3721 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3722 ISColoringValue *colors; 3723 PetscInt *larray; 3724 ISColoring ocoloring; 3725 3726 /* set coloring for diagonal portion */ 3727 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3728 for (i=0; i<a->A->cmap->n; i++) { 3729 larray[i] = i + A->cmap->rstart; 3730 } 3731 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3732 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3733 for (i=0; i<a->A->cmap->n; i++) { 3734 colors[i] = coloring->colors[larray[i]]; 3735 } 3736 ierr = PetscFree(larray);CHKERRQ(ierr); 3737 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3738 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3739 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3740 3741 /* set coloring for off-diagonal portion */ 3742 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3743 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3744 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3745 for (i=0; i<a->B->cmap->n; i++) { 3746 colors[i] = coloring->colors[larray[i]]; 3747 } 3748 ierr = PetscFree(larray);CHKERRQ(ierr); 3749 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3750 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3751 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3752 } else { 3753 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3754 } 3755 3756 PetscFunctionReturn(0); 3757 } 3758 3759 #if defined(PETSC_HAVE_ADIC) 3760 #undef __FUNCT__ 3761 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3762 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3763 { 3764 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3765 PetscErrorCode ierr; 3766 3767 PetscFunctionBegin; 3768 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3769 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3770 PetscFunctionReturn(0); 3771 } 3772 #endif 3773 3774 #undef __FUNCT__ 3775 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3776 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3777 { 3778 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3779 PetscErrorCode ierr; 3780 3781 PetscFunctionBegin; 3782 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3783 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3784 PetscFunctionReturn(0); 3785 } 3786 3787 #undef __FUNCT__ 3788 #define __FUNCT__ "MatMerge" 3789 /*@ 3790 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3791 matrices from each processor 3792 3793 Collective on MPI_Comm 3794 3795 Input Parameters: 3796 + comm - the communicators the parallel matrix will live on 3797 . inmat - the input sequential matrices 3798 . n - number of local columns (or PETSC_DECIDE) 3799 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3800 3801 Output Parameter: 3802 . outmat - the parallel matrix generated 3803 3804 Level: advanced 3805 3806 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3807 3808 @*/ 3809 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3810 { 3811 PetscErrorCode ierr; 3812 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3813 PetscInt *indx; 3814 PetscScalar *values; 3815 3816 PetscFunctionBegin; 3817 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3818 if (scall == MAT_INITIAL_MATRIX){ 3819 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3820 if (n == PETSC_DECIDE){ 3821 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3822 } 3823 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3824 rstart -= m; 3825 3826 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3827 for (i=0;i<m;i++) { 3828 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3829 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3830 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3831 } 3832 /* This routine will ONLY return MPIAIJ type matrix */ 3833 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3834 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3835 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3836 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3837 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3838 3839 } else if (scall == MAT_REUSE_MATRIX){ 3840 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3841 } else { 3842 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3843 } 3844 3845 for (i=0;i<m;i++) { 3846 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3847 Ii = i + rstart; 3848 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3849 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3850 } 3851 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3852 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3853 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3854 3855 PetscFunctionReturn(0); 3856 } 3857 3858 #undef __FUNCT__ 3859 #define __FUNCT__ "MatFileSplit" 3860 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3861 { 3862 PetscErrorCode ierr; 3863 PetscMPIInt rank; 3864 PetscInt m,N,i,rstart,nnz; 3865 size_t len; 3866 const PetscInt *indx; 3867 PetscViewer out; 3868 char *name; 3869 Mat B; 3870 const PetscScalar *values; 3871 3872 PetscFunctionBegin; 3873 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3874 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3875 /* Should this be the type of the diagonal block of A? */ 3876 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3877 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3878 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3879 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3880 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3881 for (i=0;i<m;i++) { 3882 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3883 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3884 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3885 } 3886 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3887 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3888 3889 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3890 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3891 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3892 sprintf(name,"%s.%d",outfile,rank); 3893 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3894 ierr = PetscFree(name); 3895 ierr = MatView(B,out);CHKERRQ(ierr); 3896 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3897 ierr = MatDestroy(B);CHKERRQ(ierr); 3898 PetscFunctionReturn(0); 3899 } 3900 3901 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3902 #undef __FUNCT__ 3903 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3904 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3905 { 3906 PetscErrorCode ierr; 3907 Mat_Merge_SeqsToMPI *merge; 3908 PetscContainer container; 3909 3910 PetscFunctionBegin; 3911 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3912 if (container) { 3913 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3914 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3915 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3916 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3917 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3918 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3919 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3920 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3921 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3922 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3923 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3924 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3925 3926 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3927 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3928 } 3929 ierr = PetscFree(merge);CHKERRQ(ierr); 3930 3931 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3932 PetscFunctionReturn(0); 3933 } 3934 3935 #include "../src/mat/utils/freespace.h" 3936 #include "petscbt.h" 3937 3938 #undef __FUNCT__ 3939 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3940 /*@C 3941 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3942 matrices from each processor 3943 3944 Collective on MPI_Comm 3945 3946 Input Parameters: 3947 + comm - the communicators the parallel matrix will live on 3948 . seqmat - the input sequential matrices 3949 . m - number of local rows (or PETSC_DECIDE) 3950 . n - number of local columns (or PETSC_DECIDE) 3951 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3952 3953 Output Parameter: 3954 . mpimat - the parallel matrix generated 3955 3956 Level: advanced 3957 3958 Notes: 3959 The dimensions of the sequential matrix in each processor MUST be the same. 3960 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3961 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3962 @*/ 3963 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3964 { 3965 PetscErrorCode ierr; 3966 MPI_Comm comm=((PetscObject)mpimat)->comm; 3967 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3968 PetscMPIInt size,rank,taga,*len_s; 3969 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 3970 PetscInt proc,m; 3971 PetscInt **buf_ri,**buf_rj; 3972 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3973 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3974 MPI_Request *s_waits,*r_waits; 3975 MPI_Status *status; 3976 MatScalar *aa=a->a; 3977 MatScalar **abuf_r,*ba_i; 3978 Mat_Merge_SeqsToMPI *merge; 3979 PetscContainer container; 3980 3981 PetscFunctionBegin; 3982 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3983 3984 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3985 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3986 3987 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3988 if (container) { 3989 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3990 } 3991 bi = merge->bi; 3992 bj = merge->bj; 3993 buf_ri = merge->buf_ri; 3994 buf_rj = merge->buf_rj; 3995 3996 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3997 owners = merge->rowmap.range; 3998 len_s = merge->len_s; 3999 4000 /* send and recv matrix values */ 4001 /*-----------------------------*/ 4002 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4003 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4004 4005 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4006 for (proc=0,k=0; proc<size; proc++){ 4007 if (!len_s[proc]) continue; 4008 i = owners[proc]; 4009 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4010 k++; 4011 } 4012 4013 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4014 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4015 ierr = PetscFree(status);CHKERRQ(ierr); 4016 4017 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4018 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4019 4020 /* insert mat values of mpimat */ 4021 /*----------------------------*/ 4022 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4023 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4024 nextrow = buf_ri_k + merge->nrecv; 4025 nextai = nextrow + merge->nrecv; 4026 4027 for (k=0; k<merge->nrecv; k++){ 4028 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4029 nrows = *(buf_ri_k[k]); 4030 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4031 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4032 } 4033 4034 /* set values of ba */ 4035 m = merge->rowmap.n; 4036 for (i=0; i<m; i++) { 4037 arow = owners[rank] + i; 4038 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4039 bnzi = bi[i+1] - bi[i]; 4040 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4041 4042 /* add local non-zero vals of this proc's seqmat into ba */ 4043 anzi = ai[arow+1] - ai[arow]; 4044 aj = a->j + ai[arow]; 4045 aa = a->a + ai[arow]; 4046 nextaj = 0; 4047 for (j=0; nextaj<anzi; j++){ 4048 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4049 ba_i[j] += aa[nextaj++]; 4050 } 4051 } 4052 4053 /* add received vals into ba */ 4054 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4055 /* i-th row */ 4056 if (i == *nextrow[k]) { 4057 anzi = *(nextai[k]+1) - *nextai[k]; 4058 aj = buf_rj[k] + *(nextai[k]); 4059 aa = abuf_r[k] + *(nextai[k]); 4060 nextaj = 0; 4061 for (j=0; nextaj<anzi; j++){ 4062 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4063 ba_i[j] += aa[nextaj++]; 4064 } 4065 } 4066 nextrow[k]++; nextai[k]++; 4067 } 4068 } 4069 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4070 } 4071 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4072 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4073 4074 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4075 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4076 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4077 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4078 PetscFunctionReturn(0); 4079 } 4080 4081 #undef __FUNCT__ 4082 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4083 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4084 { 4085 PetscErrorCode ierr; 4086 Mat B_mpi; 4087 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4088 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4089 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4090 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4091 PetscInt len,proc,*dnz,*onz; 4092 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4093 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4094 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4095 MPI_Status *status; 4096 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4097 PetscBT lnkbt; 4098 Mat_Merge_SeqsToMPI *merge; 4099 PetscContainer container; 4100 4101 PetscFunctionBegin; 4102 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4103 4104 /* make sure it is a PETSc comm */ 4105 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4106 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4107 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4108 4109 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4110 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4111 4112 /* determine row ownership */ 4113 /*---------------------------------------------------------*/ 4114 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4115 merge->rowmap.n = m; 4116 merge->rowmap.N = M; 4117 merge->rowmap.bs = 1; 4118 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4119 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4120 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4121 4122 m = merge->rowmap.n; 4123 M = merge->rowmap.N; 4124 owners = merge->rowmap.range; 4125 4126 /* determine the number of messages to send, their lengths */ 4127 /*---------------------------------------------------------*/ 4128 len_s = merge->len_s; 4129 4130 len = 0; /* length of buf_si[] */ 4131 merge->nsend = 0; 4132 for (proc=0; proc<size; proc++){ 4133 len_si[proc] = 0; 4134 if (proc == rank){ 4135 len_s[proc] = 0; 4136 } else { 4137 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4138 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4139 } 4140 if (len_s[proc]) { 4141 merge->nsend++; 4142 nrows = 0; 4143 for (i=owners[proc]; i<owners[proc+1]; i++){ 4144 if (ai[i+1] > ai[i]) nrows++; 4145 } 4146 len_si[proc] = 2*(nrows+1); 4147 len += len_si[proc]; 4148 } 4149 } 4150 4151 /* determine the number and length of messages to receive for ij-structure */ 4152 /*-------------------------------------------------------------------------*/ 4153 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4154 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4155 4156 /* post the Irecv of j-structure */ 4157 /*-------------------------------*/ 4158 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4159 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4160 4161 /* post the Isend of j-structure */ 4162 /*--------------------------------*/ 4163 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4164 sj_waits = si_waits + merge->nsend; 4165 4166 for (proc=0, k=0; proc<size; proc++){ 4167 if (!len_s[proc]) continue; 4168 i = owners[proc]; 4169 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4170 k++; 4171 } 4172 4173 /* receives and sends of j-structure are complete */ 4174 /*------------------------------------------------*/ 4175 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4176 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4177 4178 /* send and recv i-structure */ 4179 /*---------------------------*/ 4180 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4181 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4182 4183 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4184 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4185 for (proc=0,k=0; proc<size; proc++){ 4186 if (!len_s[proc]) continue; 4187 /* form outgoing message for i-structure: 4188 buf_si[0]: nrows to be sent 4189 [1:nrows]: row index (global) 4190 [nrows+1:2*nrows+1]: i-structure index 4191 */ 4192 /*-------------------------------------------*/ 4193 nrows = len_si[proc]/2 - 1; 4194 buf_si_i = buf_si + nrows+1; 4195 buf_si[0] = nrows; 4196 buf_si_i[0] = 0; 4197 nrows = 0; 4198 for (i=owners[proc]; i<owners[proc+1]; i++){ 4199 anzi = ai[i+1] - ai[i]; 4200 if (anzi) { 4201 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4202 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4203 nrows++; 4204 } 4205 } 4206 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4207 k++; 4208 buf_si += len_si[proc]; 4209 } 4210 4211 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4212 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4213 4214 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4215 for (i=0; i<merge->nrecv; i++){ 4216 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4217 } 4218 4219 ierr = PetscFree(len_si);CHKERRQ(ierr); 4220 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4221 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4222 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4223 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4224 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4225 ierr = PetscFree(status);CHKERRQ(ierr); 4226 4227 /* compute a local seq matrix in each processor */ 4228 /*----------------------------------------------*/ 4229 /* allocate bi array and free space for accumulating nonzero column info */ 4230 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4231 bi[0] = 0; 4232 4233 /* create and initialize a linked list */ 4234 nlnk = N+1; 4235 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4236 4237 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4238 len = 0; 4239 len = ai[owners[rank+1]] - ai[owners[rank]]; 4240 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4241 current_space = free_space; 4242 4243 /* determine symbolic info for each local row */ 4244 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4245 nextrow = buf_ri_k + merge->nrecv; 4246 nextai = nextrow + merge->nrecv; 4247 for (k=0; k<merge->nrecv; k++){ 4248 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4249 nrows = *buf_ri_k[k]; 4250 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4251 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4252 } 4253 4254 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4255 len = 0; 4256 for (i=0;i<m;i++) { 4257 bnzi = 0; 4258 /* add local non-zero cols of this proc's seqmat into lnk */ 4259 arow = owners[rank] + i; 4260 anzi = ai[arow+1] - ai[arow]; 4261 aj = a->j + ai[arow]; 4262 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4263 bnzi += nlnk; 4264 /* add received col data into lnk */ 4265 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4266 if (i == *nextrow[k]) { /* i-th row */ 4267 anzi = *(nextai[k]+1) - *nextai[k]; 4268 aj = buf_rj[k] + *nextai[k]; 4269 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4270 bnzi += nlnk; 4271 nextrow[k]++; nextai[k]++; 4272 } 4273 } 4274 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4275 4276 /* if free space is not available, make more free space */ 4277 if (current_space->local_remaining<bnzi) { 4278 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4279 nspacedouble++; 4280 } 4281 /* copy data into free space, then initialize lnk */ 4282 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4283 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4284 4285 current_space->array += bnzi; 4286 current_space->local_used += bnzi; 4287 current_space->local_remaining -= bnzi; 4288 4289 bi[i+1] = bi[i] + bnzi; 4290 } 4291 4292 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4293 4294 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4295 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4296 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4297 4298 /* create symbolic parallel matrix B_mpi */ 4299 /*---------------------------------------*/ 4300 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4301 if (n==PETSC_DECIDE) { 4302 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4303 } else { 4304 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4305 } 4306 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4307 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4308 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4309 4310 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4311 B_mpi->assembled = PETSC_FALSE; 4312 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4313 merge->bi = bi; 4314 merge->bj = bj; 4315 merge->buf_ri = buf_ri; 4316 merge->buf_rj = buf_rj; 4317 merge->coi = PETSC_NULL; 4318 merge->coj = PETSC_NULL; 4319 merge->owners_co = PETSC_NULL; 4320 4321 /* attach the supporting struct to B_mpi for reuse */ 4322 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4323 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4324 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4325 *mpimat = B_mpi; 4326 4327 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4328 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4329 PetscFunctionReturn(0); 4330 } 4331 4332 #undef __FUNCT__ 4333 #define __FUNCT__ "MatMerge_SeqsToMPI" 4334 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4335 { 4336 PetscErrorCode ierr; 4337 4338 PetscFunctionBegin; 4339 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4340 if (scall == MAT_INITIAL_MATRIX){ 4341 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4342 } 4343 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4344 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4345 PetscFunctionReturn(0); 4346 } 4347 4348 #undef __FUNCT__ 4349 #define __FUNCT__ "MatGetLocalMat" 4350 /*@ 4351 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4352 4353 Not Collective 4354 4355 Input Parameters: 4356 + A - the matrix 4357 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4358 4359 Output Parameter: 4360 . A_loc - the local sequential matrix generated 4361 4362 Level: developer 4363 4364 @*/ 4365 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4366 { 4367 PetscErrorCode ierr; 4368 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4369 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4370 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4371 MatScalar *aa=a->a,*ba=b->a,*cam; 4372 PetscScalar *ca; 4373 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4374 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4375 4376 PetscFunctionBegin; 4377 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4378 if (scall == MAT_INITIAL_MATRIX){ 4379 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4380 ci[0] = 0; 4381 for (i=0; i<am; i++){ 4382 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4383 } 4384 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4385 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4386 k = 0; 4387 for (i=0; i<am; i++) { 4388 ncols_o = bi[i+1] - bi[i]; 4389 ncols_d = ai[i+1] - ai[i]; 4390 /* off-diagonal portion of A */ 4391 for (jo=0; jo<ncols_o; jo++) { 4392 col = cmap[*bj]; 4393 if (col >= cstart) break; 4394 cj[k] = col; bj++; 4395 ca[k++] = *ba++; 4396 } 4397 /* diagonal portion of A */ 4398 for (j=0; j<ncols_d; j++) { 4399 cj[k] = cstart + *aj++; 4400 ca[k++] = *aa++; 4401 } 4402 /* off-diagonal portion of A */ 4403 for (j=jo; j<ncols_o; j++) { 4404 cj[k] = cmap[*bj++]; 4405 ca[k++] = *ba++; 4406 } 4407 } 4408 /* put together the new matrix */ 4409 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4410 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4411 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4412 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4413 mat->free_a = PETSC_TRUE; 4414 mat->free_ij = PETSC_TRUE; 4415 mat->nonew = 0; 4416 } else if (scall == MAT_REUSE_MATRIX){ 4417 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4418 ci = mat->i; cj = mat->j; cam = mat->a; 4419 for (i=0; i<am; i++) { 4420 /* off-diagonal portion of A */ 4421 ncols_o = bi[i+1] - bi[i]; 4422 for (jo=0; jo<ncols_o; jo++) { 4423 col = cmap[*bj]; 4424 if (col >= cstart) break; 4425 *cam++ = *ba++; bj++; 4426 } 4427 /* diagonal portion of A */ 4428 ncols_d = ai[i+1] - ai[i]; 4429 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4430 /* off-diagonal portion of A */ 4431 for (j=jo; j<ncols_o; j++) { 4432 *cam++ = *ba++; bj++; 4433 } 4434 } 4435 } else { 4436 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4437 } 4438 4439 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4440 PetscFunctionReturn(0); 4441 } 4442 4443 #undef __FUNCT__ 4444 #define __FUNCT__ "MatGetLocalMatCondensed" 4445 /*@C 4446 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4447 4448 Not Collective 4449 4450 Input Parameters: 4451 + A - the matrix 4452 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4453 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4454 4455 Output Parameter: 4456 . A_loc - the local sequential matrix generated 4457 4458 Level: developer 4459 4460 @*/ 4461 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4462 { 4463 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4464 PetscErrorCode ierr; 4465 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4466 IS isrowa,iscola; 4467 Mat *aloc; 4468 4469 PetscFunctionBegin; 4470 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4471 if (!row){ 4472 start = A->rmap->rstart; end = A->rmap->rend; 4473 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4474 } else { 4475 isrowa = *row; 4476 } 4477 if (!col){ 4478 start = A->cmap->rstart; 4479 cmap = a->garray; 4480 nzA = a->A->cmap->n; 4481 nzB = a->B->cmap->n; 4482 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4483 ncols = 0; 4484 for (i=0; i<nzB; i++) { 4485 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4486 else break; 4487 } 4488 imark = i; 4489 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4490 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4491 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4492 ierr = PetscFree(idx);CHKERRQ(ierr); 4493 } else { 4494 iscola = *col; 4495 } 4496 if (scall != MAT_INITIAL_MATRIX){ 4497 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4498 aloc[0] = *A_loc; 4499 } 4500 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4501 *A_loc = aloc[0]; 4502 ierr = PetscFree(aloc);CHKERRQ(ierr); 4503 if (!row){ 4504 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4505 } 4506 if (!col){ 4507 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4508 } 4509 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4510 PetscFunctionReturn(0); 4511 } 4512 4513 #undef __FUNCT__ 4514 #define __FUNCT__ "MatGetBrowsOfAcols" 4515 /*@C 4516 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4517 4518 Collective on Mat 4519 4520 Input Parameters: 4521 + A,B - the matrices in mpiaij format 4522 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4523 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4524 4525 Output Parameter: 4526 + rowb, colb - index sets of rows and columns of B to extract 4527 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4528 - B_seq - the sequential matrix generated 4529 4530 Level: developer 4531 4532 @*/ 4533 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4534 { 4535 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4536 PetscErrorCode ierr; 4537 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4538 IS isrowb,iscolb; 4539 Mat *bseq; 4540 4541 PetscFunctionBegin; 4542 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4543 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4544 } 4545 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4546 4547 if (scall == MAT_INITIAL_MATRIX){ 4548 start = A->cmap->rstart; 4549 cmap = a->garray; 4550 nzA = a->A->cmap->n; 4551 nzB = a->B->cmap->n; 4552 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4553 ncols = 0; 4554 for (i=0; i<nzB; i++) { /* row < local row index */ 4555 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4556 else break; 4557 } 4558 imark = i; 4559 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4560 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4561 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4562 ierr = PetscFree(idx);CHKERRQ(ierr); 4563 *brstart = imark; 4564 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4565 } else { 4566 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4567 isrowb = *rowb; iscolb = *colb; 4568 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4569 bseq[0] = *B_seq; 4570 } 4571 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4572 *B_seq = bseq[0]; 4573 ierr = PetscFree(bseq);CHKERRQ(ierr); 4574 if (!rowb){ 4575 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4576 } else { 4577 *rowb = isrowb; 4578 } 4579 if (!colb){ 4580 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4581 } else { 4582 *colb = iscolb; 4583 } 4584 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4585 PetscFunctionReturn(0); 4586 } 4587 4588 #undef __FUNCT__ 4589 #define __FUNCT__ "MatGetBrowsOfAoCols" 4590 /*@C 4591 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4592 of the OFF-DIAGONAL portion of local A 4593 4594 Collective on Mat 4595 4596 Input Parameters: 4597 + A,B - the matrices in mpiaij format 4598 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4599 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4600 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4601 4602 Output Parameter: 4603 + B_oth - the sequential matrix generated 4604 4605 Level: developer 4606 4607 @*/ 4608 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4609 { 4610 VecScatter_MPI_General *gen_to,*gen_from; 4611 PetscErrorCode ierr; 4612 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4613 Mat_SeqAIJ *b_oth; 4614 VecScatter ctx=a->Mvctx; 4615 MPI_Comm comm=((PetscObject)ctx)->comm; 4616 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4617 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4618 PetscScalar *rvalues,*svalues; 4619 MatScalar *b_otha,*bufa,*bufA; 4620 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4621 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4622 MPI_Status *sstatus,rstatus; 4623 PetscMPIInt jj; 4624 PetscInt *cols,sbs,rbs; 4625 PetscScalar *vals; 4626 4627 PetscFunctionBegin; 4628 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4629 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4630 } 4631 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4632 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4633 4634 gen_to = (VecScatter_MPI_General*)ctx->todata; 4635 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4636 rvalues = gen_from->values; /* holds the length of receiving row */ 4637 svalues = gen_to->values; /* holds the length of sending row */ 4638 nrecvs = gen_from->n; 4639 nsends = gen_to->n; 4640 4641 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4642 srow = gen_to->indices; /* local row index to be sent */ 4643 sstarts = gen_to->starts; 4644 sprocs = gen_to->procs; 4645 sstatus = gen_to->sstatus; 4646 sbs = gen_to->bs; 4647 rstarts = gen_from->starts; 4648 rprocs = gen_from->procs; 4649 rbs = gen_from->bs; 4650 4651 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4652 if (scall == MAT_INITIAL_MATRIX){ 4653 /* i-array */ 4654 /*---------*/ 4655 /* post receives */ 4656 for (i=0; i<nrecvs; i++){ 4657 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4658 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4659 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4660 } 4661 4662 /* pack the outgoing message */ 4663 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4664 rstartsj = sstartsj + nsends +1; 4665 sstartsj[0] = 0; rstartsj[0] = 0; 4666 len = 0; /* total length of j or a array to be sent */ 4667 k = 0; 4668 for (i=0; i<nsends; i++){ 4669 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4670 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4671 for (j=0; j<nrows; j++) { 4672 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4673 for (l=0; l<sbs; l++){ 4674 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4675 rowlen[j*sbs+l] = ncols; 4676 len += ncols; 4677 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4678 } 4679 k++; 4680 } 4681 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4682 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4683 } 4684 /* recvs and sends of i-array are completed */ 4685 i = nrecvs; 4686 while (i--) { 4687 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4688 } 4689 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4690 4691 /* allocate buffers for sending j and a arrays */ 4692 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4693 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4694 4695 /* create i-array of B_oth */ 4696 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4697 b_othi[0] = 0; 4698 len = 0; /* total length of j or a array to be received */ 4699 k = 0; 4700 for (i=0; i<nrecvs; i++){ 4701 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4702 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4703 for (j=0; j<nrows; j++) { 4704 b_othi[k+1] = b_othi[k] + rowlen[j]; 4705 len += rowlen[j]; k++; 4706 } 4707 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4708 } 4709 4710 /* allocate space for j and a arrrays of B_oth */ 4711 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4712 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4713 4714 /* j-array */ 4715 /*---------*/ 4716 /* post receives of j-array */ 4717 for (i=0; i<nrecvs; i++){ 4718 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4719 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4720 } 4721 4722 /* pack the outgoing message j-array */ 4723 k = 0; 4724 for (i=0; i<nsends; i++){ 4725 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4726 bufJ = bufj+sstartsj[i]; 4727 for (j=0; j<nrows; j++) { 4728 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4729 for (ll=0; ll<sbs; ll++){ 4730 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4731 for (l=0; l<ncols; l++){ 4732 *bufJ++ = cols[l]; 4733 } 4734 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4735 } 4736 } 4737 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4738 } 4739 4740 /* recvs and sends of j-array are completed */ 4741 i = nrecvs; 4742 while (i--) { 4743 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4744 } 4745 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4746 } else if (scall == MAT_REUSE_MATRIX){ 4747 sstartsj = *startsj; 4748 rstartsj = sstartsj + nsends +1; 4749 bufa = *bufa_ptr; 4750 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4751 b_otha = b_oth->a; 4752 } else { 4753 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4754 } 4755 4756 /* a-array */ 4757 /*---------*/ 4758 /* post receives of a-array */ 4759 for (i=0; i<nrecvs; i++){ 4760 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4761 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4762 } 4763 4764 /* pack the outgoing message a-array */ 4765 k = 0; 4766 for (i=0; i<nsends; i++){ 4767 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4768 bufA = bufa+sstartsj[i]; 4769 for (j=0; j<nrows; j++) { 4770 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4771 for (ll=0; ll<sbs; ll++){ 4772 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4773 for (l=0; l<ncols; l++){ 4774 *bufA++ = vals[l]; 4775 } 4776 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4777 } 4778 } 4779 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4780 } 4781 /* recvs and sends of a-array are completed */ 4782 i = nrecvs; 4783 while (i--) { 4784 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4785 } 4786 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4787 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4788 4789 if (scall == MAT_INITIAL_MATRIX){ 4790 /* put together the new matrix */ 4791 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4792 4793 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4794 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4795 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4796 b_oth->free_a = PETSC_TRUE; 4797 b_oth->free_ij = PETSC_TRUE; 4798 b_oth->nonew = 0; 4799 4800 ierr = PetscFree(bufj);CHKERRQ(ierr); 4801 if (!startsj || !bufa_ptr){ 4802 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4803 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4804 } else { 4805 *startsj = sstartsj; 4806 *bufa_ptr = bufa; 4807 } 4808 } 4809 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4810 PetscFunctionReturn(0); 4811 } 4812 4813 #undef __FUNCT__ 4814 #define __FUNCT__ "MatGetCommunicationStructs" 4815 /*@C 4816 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4817 4818 Not Collective 4819 4820 Input Parameters: 4821 . A - The matrix in mpiaij format 4822 4823 Output Parameter: 4824 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4825 . colmap - A map from global column index to local index into lvec 4826 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4827 4828 Level: developer 4829 4830 @*/ 4831 #if defined (PETSC_USE_CTABLE) 4832 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4833 #else 4834 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4835 #endif 4836 { 4837 Mat_MPIAIJ *a; 4838 4839 PetscFunctionBegin; 4840 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4841 PetscValidPointer(lvec, 2) 4842 PetscValidPointer(colmap, 3) 4843 PetscValidPointer(multScatter, 4) 4844 a = (Mat_MPIAIJ *) A->data; 4845 if (lvec) *lvec = a->lvec; 4846 if (colmap) *colmap = a->colmap; 4847 if (multScatter) *multScatter = a->Mvctx; 4848 PetscFunctionReturn(0); 4849 } 4850 4851 EXTERN_C_BEGIN 4852 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4853 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4854 EXTERN_C_END 4855 4856 #include "../src/mat/impls/dense/mpi/mpidense.h" 4857 4858 #undef __FUNCT__ 4859 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4860 /* 4861 Computes (B'*A')' since computing B*A directly is untenable 4862 4863 n p p 4864 ( ) ( ) ( ) 4865 m ( A ) * n ( B ) = m ( C ) 4866 ( ) ( ) ( ) 4867 4868 */ 4869 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4870 { 4871 PetscErrorCode ierr; 4872 Mat At,Bt,Ct; 4873 4874 PetscFunctionBegin; 4875 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4876 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4877 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4878 ierr = MatDestroy(At);CHKERRQ(ierr); 4879 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4880 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4881 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4882 PetscFunctionReturn(0); 4883 } 4884 4885 #undef __FUNCT__ 4886 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4887 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4888 { 4889 PetscErrorCode ierr; 4890 PetscInt m=A->rmap->n,n=B->cmap->n; 4891 Mat Cmat; 4892 4893 PetscFunctionBegin; 4894 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4895 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4896 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4897 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4898 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4899 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4900 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4901 *C = Cmat; 4902 PetscFunctionReturn(0); 4903 } 4904 4905 /* ----------------------------------------------------------------*/ 4906 #undef __FUNCT__ 4907 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4908 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4909 { 4910 PetscErrorCode ierr; 4911 4912 PetscFunctionBegin; 4913 if (scall == MAT_INITIAL_MATRIX){ 4914 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4915 } 4916 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4917 PetscFunctionReturn(0); 4918 } 4919 4920 EXTERN_C_BEGIN 4921 #if defined(PETSC_HAVE_MUMPS) 4922 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4923 #endif 4924 #if defined(PETSC_HAVE_PASTIX) 4925 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 4926 #endif 4927 #if defined(PETSC_HAVE_SUPERLU_DIST) 4928 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4929 #endif 4930 #if defined(PETSC_HAVE_SPOOLES) 4931 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 4932 #endif 4933 EXTERN_C_END 4934 4935 /*MC 4936 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4937 4938 Options Database Keys: 4939 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4940 4941 Level: beginner 4942 4943 .seealso: MatCreateMPIAIJ() 4944 M*/ 4945 4946 EXTERN_C_BEGIN 4947 #undef __FUNCT__ 4948 #define __FUNCT__ "MatCreate_MPIAIJ" 4949 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4950 { 4951 Mat_MPIAIJ *b; 4952 PetscErrorCode ierr; 4953 PetscMPIInt size; 4954 4955 PetscFunctionBegin; 4956 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4957 4958 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4959 B->data = (void*)b; 4960 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4961 B->rmap->bs = 1; 4962 B->assembled = PETSC_FALSE; 4963 B->mapping = 0; 4964 4965 B->insertmode = NOT_SET_VALUES; 4966 b->size = size; 4967 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4968 4969 /* build cache for off array entries formed */ 4970 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4971 b->donotstash = PETSC_FALSE; 4972 b->colmap = 0; 4973 b->garray = 0; 4974 b->roworiented = PETSC_TRUE; 4975 4976 /* stuff used for matrix vector multiply */ 4977 b->lvec = PETSC_NULL; 4978 b->Mvctx = PETSC_NULL; 4979 4980 /* stuff for MatGetRow() */ 4981 b->rowindices = 0; 4982 b->rowvalues = 0; 4983 b->getrowactive = PETSC_FALSE; 4984 4985 #if defined(PETSC_HAVE_SPOOLES) 4986 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 4987 "MatGetFactor_mpiaij_spooles", 4988 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 4989 #endif 4990 #if defined(PETSC_HAVE_MUMPS) 4991 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 4992 "MatGetFactor_mpiaij_mumps", 4993 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 4994 #endif 4995 #if defined(PETSC_HAVE_PASTIX) 4996 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 4997 "MatGetFactor_mpiaij_pastix", 4998 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 4999 #endif 5000 #if defined(PETSC_HAVE_SUPERLU_DIST) 5001 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 5002 "MatGetFactor_mpiaij_superlu_dist", 5003 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5004 #endif 5005 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5006 "MatStoreValues_MPIAIJ", 5007 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5008 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5009 "MatRetrieveValues_MPIAIJ", 5010 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5011 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5012 "MatGetDiagonalBlock_MPIAIJ", 5013 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5014 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5015 "MatIsTranspose_MPIAIJ", 5016 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5017 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5018 "MatMPIAIJSetPreallocation_MPIAIJ", 5019 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5020 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5021 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5022 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5023 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5024 "MatDiagonalScaleLocal_MPIAIJ", 5025 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5026 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5027 "MatConvert_MPIAIJ_MPICSRPERM", 5028 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5029 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5030 "MatConvert_MPIAIJ_MPICRL", 5031 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5032 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5033 "MatMatMult_MPIDense_MPIAIJ", 5034 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5035 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5036 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5037 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5038 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5039 "MatMatMultNumeric_MPIDense_MPIAIJ", 5040 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5041 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5042 PetscFunctionReturn(0); 5043 } 5044 EXTERN_C_END 5045 5046 #undef __FUNCT__ 5047 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5048 /*@ 5049 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5050 and "off-diagonal" part of the matrix in CSR format. 5051 5052 Collective on MPI_Comm 5053 5054 Input Parameters: 5055 + comm - MPI communicator 5056 . m - number of local rows (Cannot be PETSC_DECIDE) 5057 . n - This value should be the same as the local size used in creating the 5058 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5059 calculated if N is given) For square matrices n is almost always m. 5060 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5061 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5062 . i - row indices for "diagonal" portion of matrix 5063 . j - column indices 5064 . a - matrix values 5065 . oi - row indices for "off-diagonal" portion of matrix 5066 . oj - column indices 5067 - oa - matrix values 5068 5069 Output Parameter: 5070 . mat - the matrix 5071 5072 Level: advanced 5073 5074 Notes: 5075 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5076 5077 The i and j indices are 0 based 5078 5079 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5080 5081 5082 .keywords: matrix, aij, compressed row, sparse, parallel 5083 5084 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5085 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5086 @*/ 5087 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5088 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5089 { 5090 PetscErrorCode ierr; 5091 Mat_MPIAIJ *maij; 5092 5093 PetscFunctionBegin; 5094 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5095 if (i[0]) { 5096 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5097 } 5098 if (oi[0]) { 5099 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5100 } 5101 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5102 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5103 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5104 maij = (Mat_MPIAIJ*) (*mat)->data; 5105 maij->donotstash = PETSC_TRUE; 5106 (*mat)->preallocated = PETSC_TRUE; 5107 5108 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5109 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5110 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5111 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5112 5113 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5114 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5115 5116 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5117 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5118 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5119 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5120 5121 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5122 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5123 PetscFunctionReturn(0); 5124 } 5125 5126 /* 5127 Special version for direct calls from Fortran 5128 */ 5129 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5130 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5131 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5132 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5133 #endif 5134 5135 /* Change these macros so can be used in void function */ 5136 #undef CHKERRQ 5137 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5138 #undef SETERRQ2 5139 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5140 #undef SETERRQ 5141 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5142 5143 EXTERN_C_BEGIN 5144 #undef __FUNCT__ 5145 #define __FUNCT__ "matsetvaluesmpiaij_" 5146 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5147 { 5148 Mat mat = *mmat; 5149 PetscInt m = *mm, n = *mn; 5150 InsertMode addv = *maddv; 5151 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5152 PetscScalar value; 5153 PetscErrorCode ierr; 5154 5155 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5156 if (mat->insertmode == NOT_SET_VALUES) { 5157 mat->insertmode = addv; 5158 } 5159 #if defined(PETSC_USE_DEBUG) 5160 else if (mat->insertmode != addv) { 5161 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5162 } 5163 #endif 5164 { 5165 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5166 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5167 PetscTruth roworiented = aij->roworiented; 5168 5169 /* Some Variables required in the macro */ 5170 Mat A = aij->A; 5171 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5172 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5173 MatScalar *aa = a->a; 5174 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5175 Mat B = aij->B; 5176 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5177 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5178 MatScalar *ba = b->a; 5179 5180 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5181 PetscInt nonew = a->nonew; 5182 MatScalar *ap1,*ap2; 5183 5184 PetscFunctionBegin; 5185 for (i=0; i<m; i++) { 5186 if (im[i] < 0) continue; 5187 #if defined(PETSC_USE_DEBUG) 5188 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5189 #endif 5190 if (im[i] >= rstart && im[i] < rend) { 5191 row = im[i] - rstart; 5192 lastcol1 = -1; 5193 rp1 = aj + ai[row]; 5194 ap1 = aa + ai[row]; 5195 rmax1 = aimax[row]; 5196 nrow1 = ailen[row]; 5197 low1 = 0; 5198 high1 = nrow1; 5199 lastcol2 = -1; 5200 rp2 = bj + bi[row]; 5201 ap2 = ba + bi[row]; 5202 rmax2 = bimax[row]; 5203 nrow2 = bilen[row]; 5204 low2 = 0; 5205 high2 = nrow2; 5206 5207 for (j=0; j<n; j++) { 5208 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5209 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5210 if (in[j] >= cstart && in[j] < cend){ 5211 col = in[j] - cstart; 5212 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5213 } else if (in[j] < 0) continue; 5214 #if defined(PETSC_USE_DEBUG) 5215 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5216 #endif 5217 else { 5218 if (mat->was_assembled) { 5219 if (!aij->colmap) { 5220 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5221 } 5222 #if defined (PETSC_USE_CTABLE) 5223 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5224 col--; 5225 #else 5226 col = aij->colmap[in[j]] - 1; 5227 #endif 5228 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5229 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5230 col = in[j]; 5231 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5232 B = aij->B; 5233 b = (Mat_SeqAIJ*)B->data; 5234 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5235 rp2 = bj + bi[row]; 5236 ap2 = ba + bi[row]; 5237 rmax2 = bimax[row]; 5238 nrow2 = bilen[row]; 5239 low2 = 0; 5240 high2 = nrow2; 5241 bm = aij->B->rmap->n; 5242 ba = b->a; 5243 } 5244 } else col = in[j]; 5245 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5246 } 5247 } 5248 } else { 5249 if (!aij->donotstash) { 5250 if (roworiented) { 5251 if (ignorezeroentries && v[i*n] == 0.0) continue; 5252 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5253 } else { 5254 if (ignorezeroentries && v[i] == 0.0) continue; 5255 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5256 } 5257 } 5258 } 5259 }} 5260 PetscFunctionReturnVoid(); 5261 } 5262 EXTERN_C_END 5263 5264