1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 8 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 9 { 10 PetscErrorCode ierr; 11 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 12 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 13 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 14 const PetscInt *ia,*ib; 15 const MatScalar *aa,*bb; 16 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 17 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 18 19 PetscFunctionBegin; 20 *keptrows = 0; 21 ia = a->i; 22 ib = b->i; 23 for (i=0; i<m; i++) { 24 na = ia[i+1] - ia[i]; 25 nb = ib[i+1] - ib[i]; 26 if (!na && !nb) { 27 cnt++; 28 goto ok1; 29 } 30 aa = a->a + ia[i]; 31 for (j=0; j<na; j++) { 32 if (aa[j] != 0.0) goto ok1; 33 } 34 bb = b->a + ib[i]; 35 for (j=0; j <nb; j++) { 36 if (bb[j] != 0.0) goto ok1; 37 } 38 cnt++; 39 ok1:; 40 } 41 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 42 if (!n0rows) PetscFunctionReturn(0); 43 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 44 cnt = 0; 45 for (i=0; i<m; i++) { 46 na = ia[i+1] - ia[i]; 47 nb = ib[i+1] - ib[i]; 48 if (!na && !nb) continue; 49 aa = a->a + ia[i]; 50 for(j=0; j<na;j++) { 51 if (aa[j] != 0.0) { 52 rows[cnt++] = rstart + i; 53 goto ok2; 54 } 55 } 56 bb = b->a + ib[i]; 57 for (j=0; j<nb; j++) { 58 if (bb[j] != 0.0) { 59 rows[cnt++] = rstart + i; 60 goto ok2; 61 } 62 } 63 ok2:; 64 } 65 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatDistribute_MPIAIJ" 71 /* 72 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 73 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 74 75 Only for square matrices 76 */ 77 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 78 { 79 PetscMPIInt rank,size; 80 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 81 PetscErrorCode ierr; 82 Mat mat; 83 Mat_SeqAIJ *gmata; 84 PetscMPIInt tag; 85 MPI_Status status; 86 PetscBool aij; 87 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 88 89 PetscFunctionBegin; 90 CHKMEMQ; 91 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 92 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 93 if (!rank) { 94 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 95 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 96 } 97 if (reuse == MAT_INITIAL_MATRIX) { 98 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 99 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 100 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 101 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 102 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 103 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 104 rowners[0] = 0; 105 for (i=2; i<=size; i++) { 106 rowners[i] += rowners[i-1]; 107 } 108 rstart = rowners[rank]; 109 rend = rowners[rank+1]; 110 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 111 if (!rank) { 112 gmata = (Mat_SeqAIJ*) gmat->data; 113 /* send row lengths to all processors */ 114 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 115 for (i=1; i<size; i++) { 116 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 117 } 118 /* determine number diagonal and off-diagonal counts */ 119 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 120 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 121 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 122 jj = 0; 123 for (i=0; i<m; i++) { 124 for (j=0; j<dlens[i]; j++) { 125 if (gmata->j[jj] < rstart) ld[i]++; 126 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 127 jj++; 128 } 129 } 130 /* send column indices to other processes */ 131 for (i=1; i<size; i++) { 132 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 133 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 134 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 135 } 136 137 /* send numerical values to other processes */ 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 gmataa = gmata->a; 143 gmataj = gmata->j; 144 145 } else { 146 /* receive row lengths */ 147 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 148 /* receive column indices */ 149 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 150 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 151 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 152 /* determine number diagonal and off-diagonal counts */ 153 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 154 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 155 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 156 jj = 0; 157 for (i=0; i<m; i++) { 158 for (j=0; j<dlens[i]; j++) { 159 if (gmataj[jj] < rstart) ld[i]++; 160 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 161 jj++; 162 } 163 } 164 /* receive numerical values */ 165 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 166 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 167 } 168 /* set preallocation */ 169 for (i=0; i<m; i++) { 170 dlens[i] -= olens[i]; 171 } 172 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 173 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 174 175 for (i=0; i<m; i++) { 176 dlens[i] += olens[i]; 177 } 178 cnt = 0; 179 for (i=0; i<m; i++) { 180 row = rstart + i; 181 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 182 cnt += dlens[i]; 183 } 184 if (rank) { 185 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 186 } 187 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 188 ierr = PetscFree(rowners);CHKERRQ(ierr); 189 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 190 *inmat = mat; 191 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 192 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 193 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 194 mat = *inmat; 195 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 196 if (!rank) { 197 /* send numerical values to other processes */ 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 200 gmataa = gmata->a; 201 for (i=1; i<size; i++) { 202 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 203 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 204 } 205 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 206 } else { 207 /* receive numerical values from process 0*/ 208 nz = Ad->nz + Ao->nz; 209 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 210 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 211 } 212 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 213 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 214 ad = Ad->a; 215 ao = Ao->a; 216 if (mat->rmap->n) { 217 i = 0; 218 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 219 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 220 } 221 for (i=1; i<mat->rmap->n; i++) { 222 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 223 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 224 } 225 i--; 226 if (mat->rmap->n) { 227 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 228 } 229 if (rank) { 230 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 231 } 232 } 233 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 234 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 235 CHKMEMQ; 236 PetscFunctionReturn(0); 237 } 238 239 /* 240 Local utility routine that creates a mapping from the global column 241 number to the local number in the off-diagonal part of the local 242 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 243 a slightly higher hash table cost; without it it is not scalable (each processor 244 has an order N integer array but is fast to acess. 245 */ 246 #undef __FUNCT__ 247 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 248 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 249 { 250 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 251 PetscErrorCode ierr; 252 PetscInt n = aij->B->cmap->n,i; 253 254 PetscFunctionBegin; 255 #if defined (PETSC_USE_CTABLE) 256 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 257 for (i=0; i<n; i++){ 258 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 259 } 260 #else 261 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 262 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 264 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 265 #endif 266 PetscFunctionReturn(0); 267 } 268 269 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 270 { \ 271 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 272 lastcol1 = col;\ 273 while (high1-low1 > 5) { \ 274 t = (low1+high1)/2; \ 275 if (rp1[t] > col) high1 = t; \ 276 else low1 = t; \ 277 } \ 278 for (_i=low1; _i<high1; _i++) { \ 279 if (rp1[_i] > col) break; \ 280 if (rp1[_i] == col) { \ 281 if (addv == ADD_VALUES) ap1[_i] += value; \ 282 else ap1[_i] = value; \ 283 goto a_noinsert; \ 284 } \ 285 } \ 286 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 287 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 288 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 289 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 290 N = nrow1++ - 1; a->nz++; high1++; \ 291 /* shift up all the later entries in this row */ \ 292 for (ii=N; ii>=_i; ii--) { \ 293 rp1[ii+1] = rp1[ii]; \ 294 ap1[ii+1] = ap1[ii]; \ 295 } \ 296 rp1[_i] = col; \ 297 ap1[_i] = value; \ 298 a_noinsert: ; \ 299 ailen[row] = nrow1; \ 300 } 301 302 303 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 304 { \ 305 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 306 lastcol2 = col;\ 307 while (high2-low2 > 5) { \ 308 t = (low2+high2)/2; \ 309 if (rp2[t] > col) high2 = t; \ 310 else low2 = t; \ 311 } \ 312 for (_i=low2; _i<high2; _i++) { \ 313 if (rp2[_i] > col) break; \ 314 if (rp2[_i] == col) { \ 315 if (addv == ADD_VALUES) ap2[_i] += value; \ 316 else ap2[_i] = value; \ 317 goto b_noinsert; \ 318 } \ 319 } \ 320 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 321 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 322 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 323 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 324 N = nrow2++ - 1; b->nz++; high2++; \ 325 /* shift up all the later entries in this row */ \ 326 for (ii=N; ii>=_i; ii--) { \ 327 rp2[ii+1] = rp2[ii]; \ 328 ap2[ii+1] = ap2[ii]; \ 329 } \ 330 rp2[_i] = col; \ 331 ap2[_i] = value; \ 332 b_noinsert: ; \ 333 bilen[row] = nrow2; \ 334 } 335 336 #undef __FUNCT__ 337 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 338 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 339 { 340 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 341 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 342 PetscErrorCode ierr; 343 PetscInt l,*garray = mat->garray,diag; 344 345 PetscFunctionBegin; 346 /* code only works for square matrices A */ 347 348 /* find size of row to the left of the diagonal part */ 349 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 350 row = row - diag; 351 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 352 if (garray[b->j[b->i[row]+l]] > diag) break; 353 } 354 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 355 356 /* diagonal part */ 357 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 358 359 /* right of diagonal part */ 360 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 361 PetscFunctionReturn(0); 362 } 363 364 #undef __FUNCT__ 365 #define __FUNCT__ "MatSetValues_MPIAIJ" 366 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 367 { 368 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 369 PetscScalar value; 370 PetscErrorCode ierr; 371 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 372 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 373 PetscBool roworiented = aij->roworiented; 374 375 /* Some Variables required in the macro */ 376 Mat A = aij->A; 377 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 378 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 379 MatScalar *aa = a->a; 380 PetscBool ignorezeroentries = a->ignorezeroentries; 381 Mat B = aij->B; 382 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 383 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 384 MatScalar *ba = b->a; 385 386 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 387 PetscInt nonew = a->nonew; 388 MatScalar *ap1,*ap2; 389 390 PetscFunctionBegin; 391 if (v) PetscValidScalarPointer(v,6); 392 for (i=0; i<m; i++) { 393 if (im[i] < 0) continue; 394 #if defined(PETSC_USE_DEBUG) 395 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 396 #endif 397 if (im[i] >= rstart && im[i] < rend) { 398 row = im[i] - rstart; 399 lastcol1 = -1; 400 rp1 = aj + ai[row]; 401 ap1 = aa + ai[row]; 402 rmax1 = aimax[row]; 403 nrow1 = ailen[row]; 404 low1 = 0; 405 high1 = nrow1; 406 lastcol2 = -1; 407 rp2 = bj + bi[row]; 408 ap2 = ba + bi[row]; 409 rmax2 = bimax[row]; 410 nrow2 = bilen[row]; 411 low2 = 0; 412 high2 = nrow2; 413 414 for (j=0; j<n; j++) { 415 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 416 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 417 if (in[j] >= cstart && in[j] < cend){ 418 col = in[j] - cstart; 419 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 420 } else if (in[j] < 0) continue; 421 #if defined(PETSC_USE_DEBUG) 422 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 423 #endif 424 else { 425 if (mat->was_assembled) { 426 if (!aij->colmap) { 427 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 428 } 429 #if defined (PETSC_USE_CTABLE) 430 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 431 col--; 432 #else 433 col = aij->colmap[in[j]] - 1; 434 #endif 435 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 436 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 437 col = in[j]; 438 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 439 B = aij->B; 440 b = (Mat_SeqAIJ*)B->data; 441 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 442 rp2 = bj + bi[row]; 443 ap2 = ba + bi[row]; 444 rmax2 = bimax[row]; 445 nrow2 = bilen[row]; 446 low2 = 0; 447 high2 = nrow2; 448 bm = aij->B->rmap->n; 449 ba = b->a; 450 } 451 } else col = in[j]; 452 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 453 } 454 } 455 } else { 456 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 457 if (!aij->donotstash) { 458 if (roworiented) { 459 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 460 } else { 461 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 462 } 463 } 464 } 465 } 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatGetValues_MPIAIJ" 471 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 472 { 473 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 474 PetscErrorCode ierr; 475 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 476 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 477 478 PetscFunctionBegin; 479 for (i=0; i<m; i++) { 480 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 481 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 482 if (idxm[i] >= rstart && idxm[i] < rend) { 483 row = idxm[i] - rstart; 484 for (j=0; j<n; j++) { 485 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 486 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 487 if (idxn[j] >= cstart && idxn[j] < cend){ 488 col = idxn[j] - cstart; 489 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 490 } else { 491 if (!aij->colmap) { 492 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 493 } 494 #if defined (PETSC_USE_CTABLE) 495 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 496 col --; 497 #else 498 col = aij->colmap[idxn[j]] - 1; 499 #endif 500 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 501 else { 502 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 503 } 504 } 505 } 506 } else { 507 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 508 } 509 } 510 PetscFunctionReturn(0); 511 } 512 513 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 514 515 #undef __FUNCT__ 516 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 517 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 518 { 519 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 520 PetscErrorCode ierr; 521 PetscInt nstash,reallocs; 522 InsertMode addv; 523 524 PetscFunctionBegin; 525 if (aij->donotstash || mat->nooffprocentries) { 526 PetscFunctionReturn(0); 527 } 528 529 /* make sure all processors are either in INSERTMODE or ADDMODE */ 530 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 531 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 532 mat->insertmode = addv; /* in case this processor had no cache */ 533 534 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 535 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 536 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 542 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 543 { 544 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 545 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 546 PetscErrorCode ierr; 547 PetscMPIInt n; 548 PetscInt i,j,rstart,ncols,flg; 549 PetscInt *row,*col; 550 PetscBool other_disassembled; 551 PetscScalar *val; 552 InsertMode addv = mat->insertmode; 553 554 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 555 PetscFunctionBegin; 556 if (!aij->donotstash && !mat->nooffprocentries) { 557 while (1) { 558 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 559 if (!flg) break; 560 561 for (i=0; i<n;) { 562 /* Now identify the consecutive vals belonging to the same row */ 563 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 564 if (j < n) ncols = j-i; 565 else ncols = n-i; 566 /* Now assemble all these values with a single function call */ 567 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 568 i = j; 569 } 570 } 571 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 572 } 573 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 575 576 /* determine if any processor has disassembled, if so we must 577 also disassemble ourselfs, in order that we may reassemble. */ 578 /* 579 if nonzero structure of submatrix B cannot change then we know that 580 no processor disassembled thus we can skip this stuff 581 */ 582 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 583 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 584 if (mat->was_assembled && !other_disassembled) { 585 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 586 } 587 } 588 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 589 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 590 } 591 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 592 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 593 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 594 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 595 596 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 597 aij->rowvalues = 0; 598 599 /* used by MatAXPY() */ 600 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 601 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 602 603 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 604 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 610 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 611 { 612 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 613 PetscErrorCode ierr; 614 615 PetscFunctionBegin; 616 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 617 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 618 PetscFunctionReturn(0); 619 } 620 621 #undef __FUNCT__ 622 #define __FUNCT__ "MatZeroRows_MPIAIJ" 623 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 624 { 625 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 626 PetscErrorCode ierr; 627 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 628 PetscInt i,*owners = A->rmap->range; 629 PetscInt *nprocs,j,idx,nsends,row; 630 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 631 PetscInt *rvalues,count,base,slen,*source; 632 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 633 MPI_Comm comm = ((PetscObject)A)->comm; 634 MPI_Request *send_waits,*recv_waits; 635 MPI_Status recv_status,*send_status; 636 const PetscScalar *xx; 637 PetscScalar *bb; 638 #if defined(PETSC_DEBUG) 639 PetscBool found = PETSC_FALSE; 640 #endif 641 642 PetscFunctionBegin; 643 /* first count number of contributors to each processor */ 644 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 645 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 646 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 647 j = 0; 648 for (i=0; i<N; i++) { 649 if (lastidx > (idx = rows[i])) j = 0; 650 lastidx = idx; 651 for (; j<size; j++) { 652 if (idx >= owners[j] && idx < owners[j+1]) { 653 nprocs[2*j]++; 654 nprocs[2*j+1] = 1; 655 owner[i] = j; 656 #if defined(PETSC_DEBUG) 657 found = PETSC_TRUE; 658 #endif 659 break; 660 } 661 } 662 #if defined(PETSC_DEBUG) 663 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 664 found = PETSC_FALSE; 665 #endif 666 } 667 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 668 669 if (A->nooffproczerorows) { 670 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 671 nrecvs = nsends; 672 nmax = N; 673 } else { 674 /* inform other processors of number of messages and max length*/ 675 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 676 } 677 678 /* post receives: */ 679 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 680 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 681 for (i=0; i<nrecvs; i++) { 682 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 683 } 684 685 /* do sends: 686 1) starts[i] gives the starting index in svalues for stuff going to 687 the ith processor 688 */ 689 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 690 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 691 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 692 starts[0] = 0; 693 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 694 for (i=0; i<N; i++) { 695 svalues[starts[owner[i]]++] = rows[i]; 696 } 697 698 starts[0] = 0; 699 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 700 count = 0; 701 for (i=0; i<size; i++) { 702 if (nprocs[2*i+1]) { 703 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 704 } 705 } 706 ierr = PetscFree(starts);CHKERRQ(ierr); 707 708 base = owners[rank]; 709 710 /* wait on receives */ 711 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 712 count = nrecvs; slen = 0; 713 while (count) { 714 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 715 /* unpack receives into our local space */ 716 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 717 source[imdex] = recv_status.MPI_SOURCE; 718 lens[imdex] = n; 719 slen += n; 720 count--; 721 } 722 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 723 724 /* move the data into the send scatter */ 725 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 726 count = 0; 727 for (i=0; i<nrecvs; i++) { 728 values = rvalues + i*nmax; 729 for (j=0; j<lens[i]; j++) { 730 lrows[count++] = values[j] - base; 731 } 732 } 733 ierr = PetscFree(rvalues);CHKERRQ(ierr); 734 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 735 ierr = PetscFree(owner);CHKERRQ(ierr); 736 ierr = PetscFree(nprocs);CHKERRQ(ierr); 737 738 /* fix right hand side if needed */ 739 if (x && b) { 740 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 741 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 742 for (i=0; i<slen; i++) { 743 bb[lrows[i]] = diag*xx[lrows[i]]; 744 } 745 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 746 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 747 } 748 /* 749 Zero the required rows. If the "diagonal block" of the matrix 750 is square and the user wishes to set the diagonal we use separate 751 code so that MatSetValues() is not called for each diagonal allocating 752 new memory, thus calling lots of mallocs and slowing things down. 753 754 */ 755 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 756 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 757 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 758 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 759 } else if (diag != 0.0) { 760 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 761 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 762 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 763 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 764 } 765 for (i = 0; i < slen; i++) { 766 row = lrows[i] + rstart; 767 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 768 } 769 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 770 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 771 } else { 772 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 773 } 774 ierr = PetscFree(lrows);CHKERRQ(ierr); 775 776 /* wait on sends */ 777 if (nsends) { 778 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 779 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 780 ierr = PetscFree(send_status);CHKERRQ(ierr); 781 } 782 ierr = PetscFree(send_waits);CHKERRQ(ierr); 783 ierr = PetscFree(svalues);CHKERRQ(ierr); 784 785 PetscFunctionReturn(0); 786 } 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 790 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 791 { 792 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 793 PetscErrorCode ierr; 794 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 795 PetscInt i,*owners = A->rmap->range; 796 PetscInt *nprocs,j,idx,nsends; 797 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 798 PetscInt *rvalues,count,base,slen,*source; 799 PetscInt *lens,*lrows,*values,m; 800 MPI_Comm comm = ((PetscObject)A)->comm; 801 MPI_Request *send_waits,*recv_waits; 802 MPI_Status recv_status,*send_status; 803 const PetscScalar *xx; 804 PetscScalar *bb,*mask; 805 Vec xmask,lmask; 806 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 807 const PetscInt *aj, *ii,*ridx; 808 PetscScalar *aa; 809 #if defined(PETSC_DEBUG) 810 PetscBool found = PETSC_FALSE; 811 #endif 812 813 PetscFunctionBegin; 814 /* first count number of contributors to each processor */ 815 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 816 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 817 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 818 j = 0; 819 for (i=0; i<N; i++) { 820 if (lastidx > (idx = rows[i])) j = 0; 821 lastidx = idx; 822 for (; j<size; j++) { 823 if (idx >= owners[j] && idx < owners[j+1]) { 824 nprocs[2*j]++; 825 nprocs[2*j+1] = 1; 826 owner[i] = j; 827 #if defined(PETSC_DEBUG) 828 found = PETSC_TRUE; 829 #endif 830 break; 831 } 832 } 833 #if defined(PETSC_DEBUG) 834 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 835 found = PETSC_FALSE; 836 #endif 837 } 838 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 839 840 /* inform other processors of number of messages and max length*/ 841 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 842 843 /* post receives: */ 844 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 845 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 846 for (i=0; i<nrecvs; i++) { 847 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 848 } 849 850 /* do sends: 851 1) starts[i] gives the starting index in svalues for stuff going to 852 the ith processor 853 */ 854 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 855 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 856 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 857 starts[0] = 0; 858 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 859 for (i=0; i<N; i++) { 860 svalues[starts[owner[i]]++] = rows[i]; 861 } 862 863 starts[0] = 0; 864 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 865 count = 0; 866 for (i=0; i<size; i++) { 867 if (nprocs[2*i+1]) { 868 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 869 } 870 } 871 ierr = PetscFree(starts);CHKERRQ(ierr); 872 873 base = owners[rank]; 874 875 /* wait on receives */ 876 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 877 count = nrecvs; slen = 0; 878 while (count) { 879 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 880 /* unpack receives into our local space */ 881 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 882 source[imdex] = recv_status.MPI_SOURCE; 883 lens[imdex] = n; 884 slen += n; 885 count--; 886 } 887 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 888 889 /* move the data into the send scatter */ 890 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 891 count = 0; 892 for (i=0; i<nrecvs; i++) { 893 values = rvalues + i*nmax; 894 for (j=0; j<lens[i]; j++) { 895 lrows[count++] = values[j] - base; 896 } 897 } 898 ierr = PetscFree(rvalues);CHKERRQ(ierr); 899 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 900 ierr = PetscFree(owner);CHKERRQ(ierr); 901 ierr = PetscFree(nprocs);CHKERRQ(ierr); 902 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 903 904 /* zero diagonal part of matrix */ 905 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 906 907 /* handle off diagonal part of matrix */ 908 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 909 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 910 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 911 for (i=0; i<slen; i++) { 912 bb[lrows[i]] = 1; 913 } 914 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 915 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 ierr = VecDestroy(xmask);CHKERRQ(ierr); 918 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 919 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 920 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 921 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 922 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 923 924 /* remove zeroed rows of off diagonal matrix */ 925 ii = aij->i; 926 for (i=0; i<slen; i++) { 927 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 928 } 929 930 /* loop over all elements of off process part of matrix zeroing removed columns*/ 931 if (aij->compressedrow.use){ 932 m = aij->compressedrow.nrows; 933 ii = aij->compressedrow.i; 934 ridx = aij->compressedrow.rindex; 935 for (i=0; i<m; i++){ 936 n = ii[i+1] - ii[i]; 937 aj = aij->j + ii[i]; 938 aa = aij->a + ii[i]; 939 940 for (j=0; j<n; j++) { 941 if (PetscAbsScalar(mask[*aj])) { 942 bb[*ridx] -= *aa*xx[*aj]; 943 *aa = 0.0; 944 } 945 aa++; 946 aj++; 947 } 948 ridx++; 949 } 950 } else { /* do not use compressed row format */ 951 m = l->B->rmap->n; 952 for (i=0; i<m; i++) { 953 n = ii[i+1] - ii[i]; 954 aj = aij->j + ii[i]; 955 aa = aij->a + ii[i]; 956 for (j=0; j<n; j++) { 957 if (PetscAbsScalar(mask[*aj])) { 958 bb[i] -= *aa*xx[*aj]; 959 *aa = 0.0; 960 } 961 aa++; 962 aj++; 963 } 964 } 965 } 966 967 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 968 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 969 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 970 ierr = VecDestroy(lmask);CHKERRQ(ierr); 971 ierr = PetscFree(lrows);CHKERRQ(ierr); 972 973 /* wait on sends */ 974 if (nsends) { 975 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 976 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 977 ierr = PetscFree(send_status);CHKERRQ(ierr); 978 } 979 ierr = PetscFree(send_waits);CHKERRQ(ierr); 980 ierr = PetscFree(svalues);CHKERRQ(ierr); 981 982 PetscFunctionReturn(0); 983 } 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatMult_MPIAIJ" 987 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 988 { 989 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 990 PetscErrorCode ierr; 991 PetscInt nt; 992 993 PetscFunctionBegin; 994 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 995 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 996 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 997 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 998 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 999 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1005 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1006 { 1007 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1008 PetscErrorCode ierr; 1009 1010 PetscFunctionBegin; 1011 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1017 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1018 { 1019 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1020 PetscErrorCode ierr; 1021 1022 PetscFunctionBegin; 1023 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1024 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1025 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1032 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1033 { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1035 PetscErrorCode ierr; 1036 PetscBool merged; 1037 1038 PetscFunctionBegin; 1039 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1040 /* do nondiagonal part */ 1041 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1042 if (!merged) { 1043 /* send it on its way */ 1044 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1045 /* do local part */ 1046 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1047 /* receive remote parts: note this assumes the values are not actually */ 1048 /* added in yy until the next line, */ 1049 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1050 } else { 1051 /* do local part */ 1052 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1053 /* send it on its way */ 1054 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1055 /* values actually were received in the Begin() but we need to call this nop */ 1056 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1057 } 1058 PetscFunctionReturn(0); 1059 } 1060 1061 EXTERN_C_BEGIN 1062 #undef __FUNCT__ 1063 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1064 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1065 { 1066 MPI_Comm comm; 1067 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1068 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1069 IS Me,Notme; 1070 PetscErrorCode ierr; 1071 PetscInt M,N,first,last,*notme,i; 1072 PetscMPIInt size; 1073 1074 PetscFunctionBegin; 1075 1076 /* Easy test: symmetric diagonal block */ 1077 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1078 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1079 if (!*f) PetscFunctionReturn(0); 1080 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1081 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1082 if (size == 1) PetscFunctionReturn(0); 1083 1084 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1085 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1086 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1087 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1088 for (i=0; i<first; i++) notme[i] = i; 1089 for (i=last; i<M; i++) notme[i-last+first] = i; 1090 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1091 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1092 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1093 Aoff = Aoffs[0]; 1094 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1095 Boff = Boffs[0]; 1096 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1097 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1098 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1099 ierr = ISDestroy(Me);CHKERRQ(ierr); 1100 ierr = ISDestroy(Notme);CHKERRQ(ierr); 1101 1102 PetscFunctionReturn(0); 1103 } 1104 EXTERN_C_END 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1108 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 /* do nondiagonal part */ 1115 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1116 /* send it on its way */ 1117 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1118 /* do local part */ 1119 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1120 /* receive remote parts */ 1121 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 /* 1126 This only works correctly for square matrices where the subblock A->A is the 1127 diagonal block 1128 */ 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1131 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1132 { 1133 PetscErrorCode ierr; 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 1136 PetscFunctionBegin; 1137 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1138 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1139 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "MatScale_MPIAIJ" 1145 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1152 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatDestroy_MPIAIJ" 1158 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1159 { 1160 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1161 PetscErrorCode ierr; 1162 1163 PetscFunctionBegin; 1164 #if defined(PETSC_USE_LOG) 1165 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1166 #endif 1167 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1168 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 1169 if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);} 1170 if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);} 1171 #if defined (PETSC_USE_CTABLE) 1172 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 1173 #else 1174 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1175 #endif 1176 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1177 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 1178 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 1179 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1180 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1181 ierr = PetscFree(aij);CHKERRQ(ierr); 1182 1183 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1184 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1185 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1186 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1187 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1188 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1189 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1190 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1191 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1197 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1198 { 1199 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1200 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1201 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1202 PetscErrorCode ierr; 1203 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1204 int fd; 1205 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1206 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1207 PetscScalar *column_values; 1208 PetscInt message_count,flowcontrolcount; 1209 1210 PetscFunctionBegin; 1211 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1212 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1213 nz = A->nz + B->nz; 1214 if (!rank) { 1215 header[0] = MAT_FILE_CLASSID; 1216 header[1] = mat->rmap->N; 1217 header[2] = mat->cmap->N; 1218 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1219 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1220 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1221 /* get largest number of rows any processor has */ 1222 rlen = mat->rmap->n; 1223 range = mat->rmap->range; 1224 for (i=1; i<size; i++) { 1225 rlen = PetscMax(rlen,range[i+1] - range[i]); 1226 } 1227 } else { 1228 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1229 rlen = mat->rmap->n; 1230 } 1231 1232 /* load up the local row counts */ 1233 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1234 for (i=0; i<mat->rmap->n; i++) { 1235 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1236 } 1237 1238 /* store the row lengths to the file */ 1239 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1240 if (!rank) { 1241 MPI_Status status; 1242 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1243 for (i=1; i<size; i++) { 1244 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1245 rlen = range[i+1] - range[i]; 1246 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1247 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1248 } 1249 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1250 } else { 1251 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1252 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1253 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1254 } 1255 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1256 1257 /* load up the local column indices */ 1258 nzmax = nz; /* )th processor needs space a largest processor needs */ 1259 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1260 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1261 cnt = 0; 1262 for (i=0; i<mat->rmap->n; i++) { 1263 for (j=B->i[i]; j<B->i[i+1]; j++) { 1264 if ( (col = garray[B->j[j]]) > cstart) break; 1265 column_indices[cnt++] = col; 1266 } 1267 for (k=A->i[i]; k<A->i[i+1]; k++) { 1268 column_indices[cnt++] = A->j[k] + cstart; 1269 } 1270 for (; j<B->i[i+1]; j++) { 1271 column_indices[cnt++] = garray[B->j[j]]; 1272 } 1273 } 1274 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1275 1276 /* store the column indices to the file */ 1277 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1278 if (!rank) { 1279 MPI_Status status; 1280 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1281 for (i=1; i<size; i++) { 1282 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1283 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1284 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1285 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1286 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1287 } 1288 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1289 } else { 1290 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1291 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1292 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1293 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1294 } 1295 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1296 1297 /* load up the local column values */ 1298 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1299 cnt = 0; 1300 for (i=0; i<mat->rmap->n; i++) { 1301 for (j=B->i[i]; j<B->i[i+1]; j++) { 1302 if ( garray[B->j[j]] > cstart) break; 1303 column_values[cnt++] = B->a[j]; 1304 } 1305 for (k=A->i[i]; k<A->i[i+1]; k++) { 1306 column_values[cnt++] = A->a[k]; 1307 } 1308 for (; j<B->i[i+1]; j++) { 1309 column_values[cnt++] = B->a[j]; 1310 } 1311 } 1312 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1313 1314 /* store the column values to the file */ 1315 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1316 if (!rank) { 1317 MPI_Status status; 1318 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1319 for (i=1; i<size; i++) { 1320 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1321 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1322 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1323 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1324 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1325 } 1326 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1327 } else { 1328 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1329 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1330 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1331 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1332 } 1333 ierr = PetscFree(column_values);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1339 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1340 { 1341 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1342 PetscErrorCode ierr; 1343 PetscMPIInt rank = aij->rank,size = aij->size; 1344 PetscBool isdraw,iascii,isbinary; 1345 PetscViewer sviewer; 1346 PetscViewerFormat format; 1347 1348 PetscFunctionBegin; 1349 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1350 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1351 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1352 if (iascii) { 1353 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1354 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1355 MatInfo info; 1356 PetscBool inodes; 1357 1358 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1359 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1360 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1361 if (!inodes) { 1362 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1363 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1364 } else { 1365 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1366 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1367 } 1368 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1369 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1370 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1371 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1372 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1373 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1374 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1377 PetscInt inodecount,inodelimit,*inodes; 1378 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1379 if (inodes) { 1380 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1381 } else { 1382 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1383 } 1384 PetscFunctionReturn(0); 1385 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1386 PetscFunctionReturn(0); 1387 } 1388 } else if (isbinary) { 1389 if (size == 1) { 1390 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1391 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1392 } else { 1393 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1394 } 1395 PetscFunctionReturn(0); 1396 } else if (isdraw) { 1397 PetscDraw draw; 1398 PetscBool isnull; 1399 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1400 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1401 } 1402 1403 if (size == 1) { 1404 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1405 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1406 } else { 1407 /* assemble the entire matrix onto first processor. */ 1408 Mat A; 1409 Mat_SeqAIJ *Aloc; 1410 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1411 MatScalar *a; 1412 1413 if (mat->rmap->N > 1024) { 1414 PetscBool flg = PETSC_FALSE; 1415 1416 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1417 if (!flg) { 1418 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1419 } 1420 } 1421 1422 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1423 if (!rank) { 1424 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1425 } else { 1426 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1427 } 1428 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1429 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1430 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1431 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1432 1433 /* copy over the A part */ 1434 Aloc = (Mat_SeqAIJ*)aij->A->data; 1435 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1436 row = mat->rmap->rstart; 1437 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1438 for (i=0; i<m; i++) { 1439 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1440 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1441 } 1442 aj = Aloc->j; 1443 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1444 1445 /* copy over the B part */ 1446 Aloc = (Mat_SeqAIJ*)aij->B->data; 1447 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1448 row = mat->rmap->rstart; 1449 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1450 ct = cols; 1451 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1452 for (i=0; i<m; i++) { 1453 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1454 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1455 } 1456 ierr = PetscFree(ct);CHKERRQ(ierr); 1457 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1458 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1459 /* 1460 Everyone has to call to draw the matrix since the graphics waits are 1461 synchronized across all processors that share the PetscDraw object 1462 */ 1463 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1464 if (!rank) { 1465 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1466 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1467 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1468 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1469 } 1470 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1471 ierr = MatDestroy(A);CHKERRQ(ierr); 1472 } 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatView_MPIAIJ" 1478 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1479 { 1480 PetscErrorCode ierr; 1481 PetscBool iascii,isdraw,issocket,isbinary; 1482 1483 PetscFunctionBegin; 1484 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1485 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1486 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1487 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1488 if (iascii || isdraw || isbinary || issocket) { 1489 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1490 } else { 1491 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1492 } 1493 PetscFunctionReturn(0); 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "MatSOR_MPIAIJ" 1498 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1499 { 1500 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1501 PetscErrorCode ierr; 1502 Vec bb1 = 0; 1503 PetscBool hasop; 1504 1505 PetscFunctionBegin; 1506 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1507 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1508 } 1509 1510 if (flag == SOR_APPLY_UPPER) { 1511 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1512 PetscFunctionReturn(0); 1513 } 1514 1515 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1516 if (flag & SOR_ZERO_INITIAL_GUESS) { 1517 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1518 its--; 1519 } 1520 1521 while (its--) { 1522 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1523 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1524 1525 /* update rhs: bb1 = bb - B*x */ 1526 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1527 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1528 1529 /* local sweep */ 1530 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1531 } 1532 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1533 if (flag & SOR_ZERO_INITIAL_GUESS) { 1534 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1535 its--; 1536 } 1537 while (its--) { 1538 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1539 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1540 1541 /* update rhs: bb1 = bb - B*x */ 1542 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1543 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1544 1545 /* local sweep */ 1546 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1547 } 1548 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1549 if (flag & SOR_ZERO_INITIAL_GUESS) { 1550 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1551 its--; 1552 } 1553 while (its--) { 1554 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1555 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1556 1557 /* update rhs: bb1 = bb - B*x */ 1558 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1559 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1560 1561 /* local sweep */ 1562 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1563 } 1564 } else if (flag & SOR_EISENSTAT) { 1565 Vec xx1; 1566 1567 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1568 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1569 1570 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 if (!mat->diag) { 1573 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1574 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1575 } 1576 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1577 if (hasop) { 1578 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1579 } else { 1580 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1581 } 1582 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1583 1584 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1585 1586 /* local sweep */ 1587 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1588 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1589 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1590 } else { 1591 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1592 } 1593 1594 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatPermute_MPIAIJ" 1600 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1601 { 1602 MPI_Comm comm,pcomm; 1603 PetscInt first,local_size,nrows; 1604 const PetscInt *rows; 1605 PetscMPIInt size; 1606 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1607 PetscErrorCode ierr; 1608 1609 PetscFunctionBegin; 1610 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1611 /* make a collective version of 'rowp' */ 1612 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1613 if (pcomm==comm) { 1614 crowp = rowp; 1615 } else { 1616 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1617 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1618 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1619 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1620 } 1621 /* collect the global row permutation and invert it */ 1622 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1623 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1624 if (pcomm!=comm) { 1625 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1626 } 1627 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1628 /* get the local target indices */ 1629 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1630 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1631 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1632 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1633 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1634 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1635 /* the column permutation is so much easier; 1636 make a local version of 'colp' and invert it */ 1637 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1638 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1639 if (size==1) { 1640 lcolp = colp; 1641 } else { 1642 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1643 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1644 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1645 } 1646 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1647 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1648 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1649 if (size>1) { 1650 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1651 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1652 } 1653 /* now we just get the submatrix */ 1654 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1655 /* clean up */ 1656 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1657 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1658 PetscFunctionReturn(0); 1659 } 1660 1661 #undef __FUNCT__ 1662 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1663 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1664 { 1665 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1666 Mat A = mat->A,B = mat->B; 1667 PetscErrorCode ierr; 1668 PetscReal isend[5],irecv[5]; 1669 1670 PetscFunctionBegin; 1671 info->block_size = 1.0; 1672 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1673 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1674 isend[3] = info->memory; isend[4] = info->mallocs; 1675 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1676 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1677 isend[3] += info->memory; isend[4] += info->mallocs; 1678 if (flag == MAT_LOCAL) { 1679 info->nz_used = isend[0]; 1680 info->nz_allocated = isend[1]; 1681 info->nz_unneeded = isend[2]; 1682 info->memory = isend[3]; 1683 info->mallocs = isend[4]; 1684 } else if (flag == MAT_GLOBAL_MAX) { 1685 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1686 info->nz_used = irecv[0]; 1687 info->nz_allocated = irecv[1]; 1688 info->nz_unneeded = irecv[2]; 1689 info->memory = irecv[3]; 1690 info->mallocs = irecv[4]; 1691 } else if (flag == MAT_GLOBAL_SUM) { 1692 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1693 info->nz_used = irecv[0]; 1694 info->nz_allocated = irecv[1]; 1695 info->nz_unneeded = irecv[2]; 1696 info->memory = irecv[3]; 1697 info->mallocs = irecv[4]; 1698 } 1699 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1700 info->fill_ratio_needed = 0; 1701 info->factor_mallocs = 0; 1702 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatSetOption_MPIAIJ" 1708 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1709 { 1710 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1711 PetscErrorCode ierr; 1712 1713 PetscFunctionBegin; 1714 switch (op) { 1715 case MAT_NEW_NONZERO_LOCATIONS: 1716 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1717 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1718 case MAT_KEEP_NONZERO_PATTERN: 1719 case MAT_NEW_NONZERO_LOCATION_ERR: 1720 case MAT_USE_INODES: 1721 case MAT_IGNORE_ZERO_ENTRIES: 1722 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1723 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1724 break; 1725 case MAT_ROW_ORIENTED: 1726 a->roworiented = flg; 1727 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1728 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1729 break; 1730 case MAT_NEW_DIAGONALS: 1731 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1732 break; 1733 case MAT_IGNORE_OFF_PROC_ENTRIES: 1734 a->donotstash = PETSC_TRUE; 1735 break; 1736 case MAT_SPD: 1737 A->spd_set = PETSC_TRUE; 1738 A->spd = flg; 1739 if (flg) { 1740 A->symmetric = PETSC_TRUE; 1741 A->structurally_symmetric = PETSC_TRUE; 1742 A->symmetric_set = PETSC_TRUE; 1743 A->structurally_symmetric_set = PETSC_TRUE; 1744 } 1745 break; 1746 case MAT_SYMMETRIC: 1747 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1748 break; 1749 case MAT_STRUCTURALLY_SYMMETRIC: 1750 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1751 break; 1752 case MAT_HERMITIAN: 1753 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1754 break; 1755 case MAT_SYMMETRY_ETERNAL: 1756 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1757 break; 1758 default: 1759 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1760 } 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatGetRow_MPIAIJ" 1766 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1767 { 1768 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1769 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1770 PetscErrorCode ierr; 1771 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1772 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1773 PetscInt *cmap,*idx_p; 1774 1775 PetscFunctionBegin; 1776 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1777 mat->getrowactive = PETSC_TRUE; 1778 1779 if (!mat->rowvalues && (idx || v)) { 1780 /* 1781 allocate enough space to hold information from the longest row. 1782 */ 1783 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1784 PetscInt max = 1,tmp; 1785 for (i=0; i<matin->rmap->n; i++) { 1786 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1787 if (max < tmp) { max = tmp; } 1788 } 1789 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1790 } 1791 1792 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1793 lrow = row - rstart; 1794 1795 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1796 if (!v) {pvA = 0; pvB = 0;} 1797 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1798 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1799 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1800 nztot = nzA + nzB; 1801 1802 cmap = mat->garray; 1803 if (v || idx) { 1804 if (nztot) { 1805 /* Sort by increasing column numbers, assuming A and B already sorted */ 1806 PetscInt imark = -1; 1807 if (v) { 1808 *v = v_p = mat->rowvalues; 1809 for (i=0; i<nzB; i++) { 1810 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1811 else break; 1812 } 1813 imark = i; 1814 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1815 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1816 } 1817 if (idx) { 1818 *idx = idx_p = mat->rowindices; 1819 if (imark > -1) { 1820 for (i=0; i<imark; i++) { 1821 idx_p[i] = cmap[cworkB[i]]; 1822 } 1823 } else { 1824 for (i=0; i<nzB; i++) { 1825 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1826 else break; 1827 } 1828 imark = i; 1829 } 1830 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1831 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1832 } 1833 } else { 1834 if (idx) *idx = 0; 1835 if (v) *v = 0; 1836 } 1837 } 1838 *nz = nztot; 1839 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1840 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1841 PetscFunctionReturn(0); 1842 } 1843 1844 #undef __FUNCT__ 1845 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1846 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1847 { 1848 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1849 1850 PetscFunctionBegin; 1851 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1852 aij->getrowactive = PETSC_FALSE; 1853 PetscFunctionReturn(0); 1854 } 1855 1856 #undef __FUNCT__ 1857 #define __FUNCT__ "MatNorm_MPIAIJ" 1858 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1859 { 1860 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1861 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1862 PetscErrorCode ierr; 1863 PetscInt i,j,cstart = mat->cmap->rstart; 1864 PetscReal sum = 0.0; 1865 MatScalar *v; 1866 1867 PetscFunctionBegin; 1868 if (aij->size == 1) { 1869 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1870 } else { 1871 if (type == NORM_FROBENIUS) { 1872 v = amat->a; 1873 for (i=0; i<amat->nz; i++) { 1874 #if defined(PETSC_USE_COMPLEX) 1875 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1876 #else 1877 sum += (*v)*(*v); v++; 1878 #endif 1879 } 1880 v = bmat->a; 1881 for (i=0; i<bmat->nz; i++) { 1882 #if defined(PETSC_USE_COMPLEX) 1883 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1884 #else 1885 sum += (*v)*(*v); v++; 1886 #endif 1887 } 1888 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1889 *norm = sqrt(*norm); 1890 } else if (type == NORM_1) { /* max column norm */ 1891 PetscReal *tmp,*tmp2; 1892 PetscInt *jj,*garray = aij->garray; 1893 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1894 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1895 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1896 *norm = 0.0; 1897 v = amat->a; jj = amat->j; 1898 for (j=0; j<amat->nz; j++) { 1899 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1900 } 1901 v = bmat->a; jj = bmat->j; 1902 for (j=0; j<bmat->nz; j++) { 1903 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1904 } 1905 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1906 for (j=0; j<mat->cmap->N; j++) { 1907 if (tmp2[j] > *norm) *norm = tmp2[j]; 1908 } 1909 ierr = PetscFree(tmp);CHKERRQ(ierr); 1910 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1911 } else if (type == NORM_INFINITY) { /* max row norm */ 1912 PetscReal ntemp = 0.0; 1913 for (j=0; j<aij->A->rmap->n; j++) { 1914 v = amat->a + amat->i[j]; 1915 sum = 0.0; 1916 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1917 sum += PetscAbsScalar(*v); v++; 1918 } 1919 v = bmat->a + bmat->i[j]; 1920 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1921 sum += PetscAbsScalar(*v); v++; 1922 } 1923 if (sum > ntemp) ntemp = sum; 1924 } 1925 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1926 } else { 1927 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1928 } 1929 } 1930 PetscFunctionReturn(0); 1931 } 1932 1933 #undef __FUNCT__ 1934 #define __FUNCT__ "MatTranspose_MPIAIJ" 1935 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1936 { 1937 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1938 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1939 PetscErrorCode ierr; 1940 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1941 PetscInt cstart=A->cmap->rstart,ncol; 1942 Mat B; 1943 MatScalar *array; 1944 1945 PetscFunctionBegin; 1946 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1947 1948 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1949 ai = Aloc->i; aj = Aloc->j; 1950 bi = Bloc->i; bj = Bloc->j; 1951 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1952 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1953 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1954 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1955 for (i=0; i<ai[ma]; i++){ 1956 d_nnz[aj[i]] ++; 1957 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1958 } 1959 1960 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1961 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1962 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1963 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1964 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1965 } else { 1966 B = *matout; 1967 } 1968 1969 /* copy over the A part */ 1970 array = Aloc->a; 1971 row = A->rmap->rstart; 1972 for (i=0; i<ma; i++) { 1973 ncol = ai[i+1]-ai[i]; 1974 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1975 row++; array += ncol; aj += ncol; 1976 } 1977 aj = Aloc->j; 1978 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1979 1980 /* copy over the B part */ 1981 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1982 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1983 array = Bloc->a; 1984 row = A->rmap->rstart; 1985 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1986 cols_tmp = cols; 1987 for (i=0; i<mb; i++) { 1988 ncol = bi[i+1]-bi[i]; 1989 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1990 row++; array += ncol; cols_tmp += ncol; 1991 } 1992 ierr = PetscFree(cols);CHKERRQ(ierr); 1993 1994 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1995 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1996 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1997 *matout = B; 1998 } else { 1999 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2000 } 2001 PetscFunctionReturn(0); 2002 } 2003 2004 #undef __FUNCT__ 2005 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2006 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2007 { 2008 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2009 Mat a = aij->A,b = aij->B; 2010 PetscErrorCode ierr; 2011 PetscInt s1,s2,s3; 2012 2013 PetscFunctionBegin; 2014 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2015 if (rr) { 2016 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2017 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2018 /* Overlap communication with computation. */ 2019 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2020 } 2021 if (ll) { 2022 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2023 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2024 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2025 } 2026 /* scale the diagonal block */ 2027 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2028 2029 if (rr) { 2030 /* Do a scatter end and then right scale the off-diagonal block */ 2031 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2032 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2033 } 2034 2035 PetscFunctionReturn(0); 2036 } 2037 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2040 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2041 { 2042 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2043 PetscErrorCode ierr; 2044 2045 PetscFunctionBegin; 2046 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2047 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2048 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2049 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2050 PetscFunctionReturn(0); 2051 } 2052 #undef __FUNCT__ 2053 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2054 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2055 { 2056 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2057 PetscErrorCode ierr; 2058 2059 PetscFunctionBegin; 2060 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2061 PetscFunctionReturn(0); 2062 } 2063 2064 #undef __FUNCT__ 2065 #define __FUNCT__ "MatEqual_MPIAIJ" 2066 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2067 { 2068 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2069 Mat a,b,c,d; 2070 PetscBool flg; 2071 PetscErrorCode ierr; 2072 2073 PetscFunctionBegin; 2074 a = matA->A; b = matA->B; 2075 c = matB->A; d = matB->B; 2076 2077 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2078 if (flg) { 2079 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2080 } 2081 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2082 PetscFunctionReturn(0); 2083 } 2084 2085 #undef __FUNCT__ 2086 #define __FUNCT__ "MatCopy_MPIAIJ" 2087 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2088 { 2089 PetscErrorCode ierr; 2090 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2091 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2092 2093 PetscFunctionBegin; 2094 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2095 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2096 /* because of the column compression in the off-processor part of the matrix a->B, 2097 the number of columns in a->B and b->B may be different, hence we cannot call 2098 the MatCopy() directly on the two parts. If need be, we can provide a more 2099 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2100 then copying the submatrices */ 2101 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2102 } else { 2103 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2104 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2105 } 2106 PetscFunctionReturn(0); 2107 } 2108 2109 #undef __FUNCT__ 2110 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2111 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2112 { 2113 PetscErrorCode ierr; 2114 2115 PetscFunctionBegin; 2116 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2117 PetscFunctionReturn(0); 2118 } 2119 2120 #undef __FUNCT__ 2121 #define __FUNCT__ "MatAXPY_MPIAIJ" 2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2123 { 2124 PetscErrorCode ierr; 2125 PetscInt i; 2126 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2127 PetscBLASInt bnz,one=1; 2128 Mat_SeqAIJ *x,*y; 2129 2130 PetscFunctionBegin; 2131 if (str == SAME_NONZERO_PATTERN) { 2132 PetscScalar alpha = a; 2133 x = (Mat_SeqAIJ *)xx->A->data; 2134 y = (Mat_SeqAIJ *)yy->A->data; 2135 bnz = PetscBLASIntCast(x->nz); 2136 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2137 x = (Mat_SeqAIJ *)xx->B->data; 2138 y = (Mat_SeqAIJ *)yy->B->data; 2139 bnz = PetscBLASIntCast(x->nz); 2140 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2141 } else if (str == SUBSET_NONZERO_PATTERN) { 2142 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2143 2144 x = (Mat_SeqAIJ *)xx->B->data; 2145 y = (Mat_SeqAIJ *)yy->B->data; 2146 if (y->xtoy && y->XtoY != xx->B) { 2147 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2148 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2149 } 2150 if (!y->xtoy) { /* get xtoy */ 2151 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2152 y->XtoY = xx->B; 2153 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2154 } 2155 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2156 } else { 2157 Mat B; 2158 PetscInt *nnz_d,*nnz_o; 2159 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2160 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2161 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2162 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2163 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2164 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2165 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2166 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2167 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2168 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2169 ierr = MatHeaderReplace(Y,B); 2170 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2171 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2172 } 2173 PetscFunctionReturn(0); 2174 } 2175 2176 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2177 2178 #undef __FUNCT__ 2179 #define __FUNCT__ "MatConjugate_MPIAIJ" 2180 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2181 { 2182 #if defined(PETSC_USE_COMPLEX) 2183 PetscErrorCode ierr; 2184 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2185 2186 PetscFunctionBegin; 2187 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2188 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2189 #else 2190 PetscFunctionBegin; 2191 #endif 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatRealPart_MPIAIJ" 2197 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2198 { 2199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2200 PetscErrorCode ierr; 2201 2202 PetscFunctionBegin; 2203 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2204 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2205 PetscFunctionReturn(0); 2206 } 2207 2208 #undef __FUNCT__ 2209 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2210 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2211 { 2212 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2213 PetscErrorCode ierr; 2214 2215 PetscFunctionBegin; 2216 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2217 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 #ifdef PETSC_HAVE_PBGL 2222 2223 #include <boost/parallel/mpi/bsp_process_group.hpp> 2224 #include <boost/graph/distributed/ilu_default_graph.hpp> 2225 #include <boost/graph/distributed/ilu_0_block.hpp> 2226 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2227 #include <boost/graph/distributed/petsc/interface.hpp> 2228 #include <boost/multi_array.hpp> 2229 #include <boost/parallel/distributed_property_map->hpp> 2230 2231 #undef __FUNCT__ 2232 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2233 /* 2234 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2235 */ 2236 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2237 { 2238 namespace petsc = boost::distributed::petsc; 2239 2240 namespace graph_dist = boost::graph::distributed; 2241 using boost::graph::distributed::ilu_default::process_group_type; 2242 using boost::graph::ilu_permuted; 2243 2244 PetscBool row_identity, col_identity; 2245 PetscContainer c; 2246 PetscInt m, n, M, N; 2247 PetscErrorCode ierr; 2248 2249 PetscFunctionBegin; 2250 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2251 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2252 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2253 if (!row_identity || !col_identity) { 2254 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2255 } 2256 2257 process_group_type pg; 2258 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2259 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2260 lgraph_type& level_graph = *lgraph_p; 2261 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2262 2263 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2264 ilu_permuted(level_graph); 2265 2266 /* put together the new matrix */ 2267 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2268 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2269 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2270 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2271 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2272 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2273 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2274 2275 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2276 ierr = PetscContainerSetPointer(c, lgraph_p); 2277 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2278 PetscFunctionReturn(0); 2279 } 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2283 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2284 { 2285 PetscFunctionBegin; 2286 PetscFunctionReturn(0); 2287 } 2288 2289 #undef __FUNCT__ 2290 #define __FUNCT__ "MatSolve_MPIAIJ" 2291 /* 2292 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2293 */ 2294 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2295 { 2296 namespace graph_dist = boost::graph::distributed; 2297 2298 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2299 lgraph_type* lgraph_p; 2300 PetscContainer c; 2301 PetscErrorCode ierr; 2302 2303 PetscFunctionBegin; 2304 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2305 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2306 ierr = VecCopy(b, x);CHKERRQ(ierr); 2307 2308 PetscScalar* array_x; 2309 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2310 PetscInt sx; 2311 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2312 2313 PetscScalar* array_b; 2314 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2315 PetscInt sb; 2316 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2317 2318 lgraph_type& level_graph = *lgraph_p; 2319 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2320 2321 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2322 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2323 ref_x(array_x, boost::extents[num_vertices(graph)]); 2324 2325 typedef boost::iterator_property_map<array_ref_type::iterator, 2326 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2327 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2328 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2329 2330 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2331 2332 PetscFunctionReturn(0); 2333 } 2334 #endif 2335 2336 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2337 PetscInt nzlocal,nsends,nrecvs; 2338 PetscMPIInt *send_rank,*recv_rank; 2339 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2340 PetscScalar *sbuf_a,**rbuf_a; 2341 PetscErrorCode (*MatDestroy)(Mat); 2342 } Mat_Redundant; 2343 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2346 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2347 { 2348 PetscErrorCode ierr; 2349 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2350 PetscInt i; 2351 2352 PetscFunctionBegin; 2353 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2354 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2355 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2356 for (i=0; i<redund->nrecvs; i++){ 2357 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2358 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2359 } 2360 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2361 ierr = PetscFree(redund);CHKERRQ(ierr); 2362 PetscFunctionReturn(0); 2363 } 2364 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatDestroy_MatRedundant" 2367 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2368 { 2369 PetscErrorCode ierr; 2370 PetscContainer container; 2371 Mat_Redundant *redund=PETSC_NULL; 2372 2373 PetscFunctionBegin; 2374 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2375 if (container) { 2376 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2377 } else { 2378 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2379 } 2380 A->ops->destroy = redund->MatDestroy; 2381 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2382 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2383 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2384 PetscFunctionReturn(0); 2385 } 2386 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2389 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2390 { 2391 PetscMPIInt rank,size; 2392 MPI_Comm comm=((PetscObject)mat)->comm; 2393 PetscErrorCode ierr; 2394 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2395 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2396 PetscInt *rowrange=mat->rmap->range; 2397 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2398 Mat A=aij->A,B=aij->B,C=*matredundant; 2399 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2400 PetscScalar *sbuf_a; 2401 PetscInt nzlocal=a->nz+b->nz; 2402 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2403 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2404 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2405 MatScalar *aworkA,*aworkB; 2406 PetscScalar *vals; 2407 PetscMPIInt tag1,tag2,tag3,imdex; 2408 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2409 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2410 MPI_Status recv_status,*send_status; 2411 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2412 PetscInt **rbuf_j=PETSC_NULL; 2413 PetscScalar **rbuf_a=PETSC_NULL; 2414 Mat_Redundant *redund=PETSC_NULL; 2415 PetscContainer container; 2416 2417 PetscFunctionBegin; 2418 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2419 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2420 2421 if (reuse == MAT_REUSE_MATRIX) { 2422 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2423 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2424 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2425 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2426 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2427 if (container) { 2428 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2429 } else { 2430 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2431 } 2432 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2433 2434 nsends = redund->nsends; 2435 nrecvs = redund->nrecvs; 2436 send_rank = redund->send_rank; 2437 recv_rank = redund->recv_rank; 2438 sbuf_nz = redund->sbuf_nz; 2439 rbuf_nz = redund->rbuf_nz; 2440 sbuf_j = redund->sbuf_j; 2441 sbuf_a = redund->sbuf_a; 2442 rbuf_j = redund->rbuf_j; 2443 rbuf_a = redund->rbuf_a; 2444 } 2445 2446 if (reuse == MAT_INITIAL_MATRIX){ 2447 PetscMPIInt subrank,subsize; 2448 PetscInt nleftover,np_subcomm; 2449 /* get the destination processors' id send_rank, nsends and nrecvs */ 2450 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2451 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2452 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2453 np_subcomm = size/nsubcomm; 2454 nleftover = size - nsubcomm*np_subcomm; 2455 nsends = 0; nrecvs = 0; 2456 for (i=0; i<size; i++){ /* i=rank*/ 2457 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2458 send_rank[nsends] = i; nsends++; 2459 recv_rank[nrecvs++] = i; 2460 } 2461 } 2462 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2463 i = size-nleftover-1; 2464 j = 0; 2465 while (j < nsubcomm - nleftover){ 2466 send_rank[nsends++] = i; 2467 i--; j++; 2468 } 2469 } 2470 2471 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2472 for (i=0; i<nleftover; i++){ 2473 recv_rank[nrecvs++] = size-nleftover+i; 2474 } 2475 } 2476 2477 /* allocate sbuf_j, sbuf_a */ 2478 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2479 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2480 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2481 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2482 2483 /* copy mat's local entries into the buffers */ 2484 if (reuse == MAT_INITIAL_MATRIX){ 2485 rownz_max = 0; 2486 rptr = sbuf_j; 2487 cols = sbuf_j + rend-rstart + 1; 2488 vals = sbuf_a; 2489 rptr[0] = 0; 2490 for (i=0; i<rend-rstart; i++){ 2491 row = i + rstart; 2492 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2493 ncols = nzA + nzB; 2494 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2495 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2496 /* load the column indices for this row into cols */ 2497 lwrite = 0; 2498 for (l=0; l<nzB; l++) { 2499 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2500 vals[lwrite] = aworkB[l]; 2501 cols[lwrite++] = ctmp; 2502 } 2503 } 2504 for (l=0; l<nzA; l++){ 2505 vals[lwrite] = aworkA[l]; 2506 cols[lwrite++] = cstart + cworkA[l]; 2507 } 2508 for (l=0; l<nzB; l++) { 2509 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2510 vals[lwrite] = aworkB[l]; 2511 cols[lwrite++] = ctmp; 2512 } 2513 } 2514 vals += ncols; 2515 cols += ncols; 2516 rptr[i+1] = rptr[i] + ncols; 2517 if (rownz_max < ncols) rownz_max = ncols; 2518 } 2519 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2520 } else { /* only copy matrix values into sbuf_a */ 2521 rptr = sbuf_j; 2522 vals = sbuf_a; 2523 rptr[0] = 0; 2524 for (i=0; i<rend-rstart; i++){ 2525 row = i + rstart; 2526 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2527 ncols = nzA + nzB; 2528 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2529 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2530 lwrite = 0; 2531 for (l=0; l<nzB; l++) { 2532 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2533 } 2534 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2535 for (l=0; l<nzB; l++) { 2536 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2537 } 2538 vals += ncols; 2539 rptr[i+1] = rptr[i] + ncols; 2540 } 2541 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2542 2543 /* send nzlocal to others, and recv other's nzlocal */ 2544 /*--------------------------------------------------*/ 2545 if (reuse == MAT_INITIAL_MATRIX){ 2546 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2547 s_waits2 = s_waits3 + nsends; 2548 s_waits1 = s_waits2 + nsends; 2549 r_waits1 = s_waits1 + nsends; 2550 r_waits2 = r_waits1 + nrecvs; 2551 r_waits3 = r_waits2 + nrecvs; 2552 } else { 2553 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2554 r_waits3 = s_waits3 + nsends; 2555 } 2556 2557 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2558 if (reuse == MAT_INITIAL_MATRIX){ 2559 /* get new tags to keep the communication clean */ 2560 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2561 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2562 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2563 2564 /* post receives of other's nzlocal */ 2565 for (i=0; i<nrecvs; i++){ 2566 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2567 } 2568 /* send nzlocal to others */ 2569 for (i=0; i<nsends; i++){ 2570 sbuf_nz[i] = nzlocal; 2571 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2572 } 2573 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2574 count = nrecvs; 2575 while (count) { 2576 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2577 recv_rank[imdex] = recv_status.MPI_SOURCE; 2578 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2579 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2580 2581 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2582 rbuf_nz[imdex] += i + 2; 2583 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2584 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2585 count--; 2586 } 2587 /* wait on sends of nzlocal */ 2588 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2589 /* send mat->i,j to others, and recv from other's */ 2590 /*------------------------------------------------*/ 2591 for (i=0; i<nsends; i++){ 2592 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2593 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2594 } 2595 /* wait on receives of mat->i,j */ 2596 /*------------------------------*/ 2597 count = nrecvs; 2598 while (count) { 2599 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2600 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2601 count--; 2602 } 2603 /* wait on sends of mat->i,j */ 2604 /*---------------------------*/ 2605 if (nsends) { 2606 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2607 } 2608 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2609 2610 /* post receives, send and receive mat->a */ 2611 /*----------------------------------------*/ 2612 for (imdex=0; imdex<nrecvs; imdex++) { 2613 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2614 } 2615 for (i=0; i<nsends; i++){ 2616 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2617 } 2618 count = nrecvs; 2619 while (count) { 2620 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2621 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2622 count--; 2623 } 2624 if (nsends) { 2625 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2626 } 2627 2628 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2629 2630 /* create redundant matrix */ 2631 /*-------------------------*/ 2632 if (reuse == MAT_INITIAL_MATRIX){ 2633 /* compute rownz_max for preallocation */ 2634 for (imdex=0; imdex<nrecvs; imdex++){ 2635 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2636 rptr = rbuf_j[imdex]; 2637 for (i=0; i<j; i++){ 2638 ncols = rptr[i+1] - rptr[i]; 2639 if (rownz_max < ncols) rownz_max = ncols; 2640 } 2641 } 2642 2643 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2644 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2645 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2646 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2647 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2648 } else { 2649 C = *matredundant; 2650 } 2651 2652 /* insert local matrix entries */ 2653 rptr = sbuf_j; 2654 cols = sbuf_j + rend-rstart + 1; 2655 vals = sbuf_a; 2656 for (i=0; i<rend-rstart; i++){ 2657 row = i + rstart; 2658 ncols = rptr[i+1] - rptr[i]; 2659 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2660 vals += ncols; 2661 cols += ncols; 2662 } 2663 /* insert received matrix entries */ 2664 for (imdex=0; imdex<nrecvs; imdex++){ 2665 rstart = rowrange[recv_rank[imdex]]; 2666 rend = rowrange[recv_rank[imdex]+1]; 2667 rptr = rbuf_j[imdex]; 2668 cols = rbuf_j[imdex] + rend-rstart + 1; 2669 vals = rbuf_a[imdex]; 2670 for (i=0; i<rend-rstart; i++){ 2671 row = i + rstart; 2672 ncols = rptr[i+1] - rptr[i]; 2673 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2674 vals += ncols; 2675 cols += ncols; 2676 } 2677 } 2678 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2679 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2680 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2681 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2682 if (reuse == MAT_INITIAL_MATRIX){ 2683 PetscContainer container; 2684 *matredundant = C; 2685 /* create a supporting struct and attach it to C for reuse */ 2686 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2687 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2688 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2689 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2690 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2691 2692 redund->nzlocal = nzlocal; 2693 redund->nsends = nsends; 2694 redund->nrecvs = nrecvs; 2695 redund->send_rank = send_rank; 2696 redund->recv_rank = recv_rank; 2697 redund->sbuf_nz = sbuf_nz; 2698 redund->rbuf_nz = rbuf_nz; 2699 redund->sbuf_j = sbuf_j; 2700 redund->sbuf_a = sbuf_a; 2701 redund->rbuf_j = rbuf_j; 2702 redund->rbuf_a = rbuf_a; 2703 2704 redund->MatDestroy = C->ops->destroy; 2705 C->ops->destroy = MatDestroy_MatRedundant; 2706 } 2707 PetscFunctionReturn(0); 2708 } 2709 2710 #undef __FUNCT__ 2711 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2712 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2713 { 2714 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2715 PetscErrorCode ierr; 2716 PetscInt i,*idxb = 0; 2717 PetscScalar *va,*vb; 2718 Vec vtmp; 2719 2720 PetscFunctionBegin; 2721 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2722 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2723 if (idx) { 2724 for (i=0; i<A->rmap->n; i++) { 2725 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2726 } 2727 } 2728 2729 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2730 if (idx) { 2731 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2732 } 2733 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2734 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2735 2736 for (i=0; i<A->rmap->n; i++){ 2737 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2738 va[i] = vb[i]; 2739 if (idx) idx[i] = a->garray[idxb[i]]; 2740 } 2741 } 2742 2743 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2744 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2745 if (idxb) { 2746 ierr = PetscFree(idxb);CHKERRQ(ierr); 2747 } 2748 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751 2752 #undef __FUNCT__ 2753 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2754 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2755 { 2756 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2757 PetscErrorCode ierr; 2758 PetscInt i,*idxb = 0; 2759 PetscScalar *va,*vb; 2760 Vec vtmp; 2761 2762 PetscFunctionBegin; 2763 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2764 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2765 if (idx) { 2766 for (i=0; i<A->cmap->n; i++) { 2767 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2768 } 2769 } 2770 2771 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2772 if (idx) { 2773 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2774 } 2775 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2776 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2777 2778 for (i=0; i<A->rmap->n; i++){ 2779 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2780 va[i] = vb[i]; 2781 if (idx) idx[i] = a->garray[idxb[i]]; 2782 } 2783 } 2784 2785 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2786 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2787 if (idxb) { 2788 ierr = PetscFree(idxb);CHKERRQ(ierr); 2789 } 2790 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2791 PetscFunctionReturn(0); 2792 } 2793 2794 #undef __FUNCT__ 2795 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2796 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2797 { 2798 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2799 PetscInt n = A->rmap->n; 2800 PetscInt cstart = A->cmap->rstart; 2801 PetscInt *cmap = mat->garray; 2802 PetscInt *diagIdx, *offdiagIdx; 2803 Vec diagV, offdiagV; 2804 PetscScalar *a, *diagA, *offdiagA; 2805 PetscInt r; 2806 PetscErrorCode ierr; 2807 2808 PetscFunctionBegin; 2809 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2810 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2811 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2812 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2813 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2814 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2815 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2816 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2817 for(r = 0; r < n; ++r) { 2818 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2819 a[r] = diagA[r]; 2820 idx[r] = cstart + diagIdx[r]; 2821 } else { 2822 a[r] = offdiagA[r]; 2823 idx[r] = cmap[offdiagIdx[r]]; 2824 } 2825 } 2826 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2827 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2828 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2829 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2830 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2831 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2832 PetscFunctionReturn(0); 2833 } 2834 2835 #undef __FUNCT__ 2836 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2837 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2838 { 2839 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2840 PetscInt n = A->rmap->n; 2841 PetscInt cstart = A->cmap->rstart; 2842 PetscInt *cmap = mat->garray; 2843 PetscInt *diagIdx, *offdiagIdx; 2844 Vec diagV, offdiagV; 2845 PetscScalar *a, *diagA, *offdiagA; 2846 PetscInt r; 2847 PetscErrorCode ierr; 2848 2849 PetscFunctionBegin; 2850 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2851 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2852 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2853 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2854 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2855 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2856 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2857 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2858 for(r = 0; r < n; ++r) { 2859 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2860 a[r] = diagA[r]; 2861 idx[r] = cstart + diagIdx[r]; 2862 } else { 2863 a[r] = offdiagA[r]; 2864 idx[r] = cmap[offdiagIdx[r]]; 2865 } 2866 } 2867 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2868 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2869 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2870 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2871 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2872 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2873 PetscFunctionReturn(0); 2874 } 2875 2876 #undef __FUNCT__ 2877 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2878 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2879 { 2880 PetscErrorCode ierr; 2881 Mat *dummy; 2882 2883 PetscFunctionBegin; 2884 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2885 *newmat = *dummy; 2886 ierr = PetscFree(dummy);CHKERRQ(ierr); 2887 PetscFunctionReturn(0); 2888 } 2889 2890 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2891 /* -------------------------------------------------------------------*/ 2892 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2893 MatGetRow_MPIAIJ, 2894 MatRestoreRow_MPIAIJ, 2895 MatMult_MPIAIJ, 2896 /* 4*/ MatMultAdd_MPIAIJ, 2897 MatMultTranspose_MPIAIJ, 2898 MatMultTransposeAdd_MPIAIJ, 2899 #ifdef PETSC_HAVE_PBGL 2900 MatSolve_MPIAIJ, 2901 #else 2902 0, 2903 #endif 2904 0, 2905 0, 2906 /*10*/ 0, 2907 0, 2908 0, 2909 MatSOR_MPIAIJ, 2910 MatTranspose_MPIAIJ, 2911 /*15*/ MatGetInfo_MPIAIJ, 2912 MatEqual_MPIAIJ, 2913 MatGetDiagonal_MPIAIJ, 2914 MatDiagonalScale_MPIAIJ, 2915 MatNorm_MPIAIJ, 2916 /*20*/ MatAssemblyBegin_MPIAIJ, 2917 MatAssemblyEnd_MPIAIJ, 2918 MatSetOption_MPIAIJ, 2919 MatZeroEntries_MPIAIJ, 2920 /*24*/ MatZeroRows_MPIAIJ, 2921 0, 2922 #ifdef PETSC_HAVE_PBGL 2923 0, 2924 #else 2925 0, 2926 #endif 2927 0, 2928 0, 2929 /*29*/ MatSetUpPreallocation_MPIAIJ, 2930 #ifdef PETSC_HAVE_PBGL 2931 0, 2932 #else 2933 0, 2934 #endif 2935 0, 2936 0, 2937 0, 2938 /*34*/ MatDuplicate_MPIAIJ, 2939 0, 2940 0, 2941 0, 2942 0, 2943 /*39*/ MatAXPY_MPIAIJ, 2944 MatGetSubMatrices_MPIAIJ, 2945 MatIncreaseOverlap_MPIAIJ, 2946 MatGetValues_MPIAIJ, 2947 MatCopy_MPIAIJ, 2948 /*44*/ MatGetRowMax_MPIAIJ, 2949 MatScale_MPIAIJ, 2950 0, 2951 0, 2952 MatZeroRowsColumns_MPIAIJ, 2953 /*49*/ MatSetBlockSize_MPIAIJ, 2954 0, 2955 0, 2956 0, 2957 0, 2958 /*54*/ MatFDColoringCreate_MPIAIJ, 2959 0, 2960 MatSetUnfactored_MPIAIJ, 2961 MatPermute_MPIAIJ, 2962 0, 2963 /*59*/ MatGetSubMatrix_MPIAIJ, 2964 MatDestroy_MPIAIJ, 2965 MatView_MPIAIJ, 2966 0, 2967 0, 2968 /*64*/ 0, 2969 0, 2970 0, 2971 0, 2972 0, 2973 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2974 MatGetRowMinAbs_MPIAIJ, 2975 0, 2976 MatSetColoring_MPIAIJ, 2977 #if defined(PETSC_HAVE_ADIC) 2978 MatSetValuesAdic_MPIAIJ, 2979 #else 2980 0, 2981 #endif 2982 MatSetValuesAdifor_MPIAIJ, 2983 /*75*/ MatFDColoringApply_AIJ, 2984 0, 2985 0, 2986 0, 2987 0, 2988 /*80*/ 0, 2989 0, 2990 0, 2991 /*83*/ MatLoad_MPIAIJ, 2992 0, 2993 0, 2994 0, 2995 0, 2996 0, 2997 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2998 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2999 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3000 MatPtAP_Basic, 3001 MatPtAPSymbolic_MPIAIJ, 3002 /*94*/ MatPtAPNumeric_MPIAIJ, 3003 0, 3004 0, 3005 0, 3006 0, 3007 /*99*/ 0, 3008 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3009 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3010 MatConjugate_MPIAIJ, 3011 0, 3012 /*104*/MatSetValuesRow_MPIAIJ, 3013 MatRealPart_MPIAIJ, 3014 MatImaginaryPart_MPIAIJ, 3015 0, 3016 0, 3017 /*109*/0, 3018 MatGetRedundantMatrix_MPIAIJ, 3019 MatGetRowMin_MPIAIJ, 3020 0, 3021 0, 3022 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 3023 0, 3024 0, 3025 0, 3026 0, 3027 /*119*/0, 3028 0, 3029 0, 3030 0, 3031 MatGetMultiProcBlock_MPIAIJ, 3032 /*124*/MatFindNonZeroRows_MPIAIJ, 3033 0, 3034 0, 3035 0, 3036 MatGetSubMatricesParallel_MPIAIJ 3037 }; 3038 3039 /* ----------------------------------------------------------------------------------------*/ 3040 3041 EXTERN_C_BEGIN 3042 #undef __FUNCT__ 3043 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3044 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3045 { 3046 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3047 PetscErrorCode ierr; 3048 3049 PetscFunctionBegin; 3050 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3051 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 EXTERN_C_END 3055 3056 EXTERN_C_BEGIN 3057 #undef __FUNCT__ 3058 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3059 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3060 { 3061 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3062 PetscErrorCode ierr; 3063 3064 PetscFunctionBegin; 3065 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3066 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3067 PetscFunctionReturn(0); 3068 } 3069 EXTERN_C_END 3070 3071 EXTERN_C_BEGIN 3072 #undef __FUNCT__ 3073 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3074 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3075 { 3076 Mat_MPIAIJ *b; 3077 PetscErrorCode ierr; 3078 PetscInt i; 3079 3080 PetscFunctionBegin; 3081 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3082 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3083 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3084 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3085 3086 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3087 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3088 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3089 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3090 if (d_nnz) { 3091 for (i=0; i<B->rmap->n; i++) { 3092 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3093 } 3094 } 3095 if (o_nnz) { 3096 for (i=0; i<B->rmap->n; i++) { 3097 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3098 } 3099 } 3100 b = (Mat_MPIAIJ*)B->data; 3101 3102 if (!B->preallocated) { 3103 /* Explicitly create 2 MATSEQAIJ matrices. */ 3104 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3105 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3106 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3107 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3108 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3109 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3110 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3111 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3112 } 3113 3114 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3115 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3116 B->preallocated = PETSC_TRUE; 3117 PetscFunctionReturn(0); 3118 } 3119 EXTERN_C_END 3120 3121 #undef __FUNCT__ 3122 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3123 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3124 { 3125 Mat mat; 3126 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3127 PetscErrorCode ierr; 3128 3129 PetscFunctionBegin; 3130 *newmat = 0; 3131 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3132 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3133 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3134 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3135 a = (Mat_MPIAIJ*)mat->data; 3136 3137 mat->factortype = matin->factortype; 3138 mat->rmap->bs = matin->rmap->bs; 3139 mat->assembled = PETSC_TRUE; 3140 mat->insertmode = NOT_SET_VALUES; 3141 mat->preallocated = PETSC_TRUE; 3142 3143 a->size = oldmat->size; 3144 a->rank = oldmat->rank; 3145 a->donotstash = oldmat->donotstash; 3146 a->roworiented = oldmat->roworiented; 3147 a->rowindices = 0; 3148 a->rowvalues = 0; 3149 a->getrowactive = PETSC_FALSE; 3150 3151 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3152 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3153 3154 if (oldmat->colmap) { 3155 #if defined (PETSC_USE_CTABLE) 3156 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3157 #else 3158 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3159 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3160 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3161 #endif 3162 } else a->colmap = 0; 3163 if (oldmat->garray) { 3164 PetscInt len; 3165 len = oldmat->B->cmap->n; 3166 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3167 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3168 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3169 } else a->garray = 0; 3170 3171 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3172 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3173 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3174 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3175 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3176 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3177 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3178 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3179 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3180 *newmat = mat; 3181 PetscFunctionReturn(0); 3182 } 3183 3184 /* 3185 Allows sending/receiving larger messages then 2 gigabytes in a single call 3186 */ 3187 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 3188 { 3189 int ierr; 3190 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3191 PetscInt i,numchunks; 3192 PetscMPIInt icnt; 3193 3194 numchunks = cnt/CHUNKSIZE + 1; 3195 for (i=0; i<numchunks; i++) { 3196 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3197 ierr = MPI_Send(mess,icnt,type,to,tag,comm); 3198 if (type == MPIU_INT) { 3199 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3200 } else if (type == MPIU_SCALAR) { 3201 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3202 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3203 } 3204 return 0; 3205 } 3206 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 3207 { 3208 int ierr; 3209 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3210 MPI_Status status; 3211 PetscInt i,numchunks; 3212 PetscMPIInt icnt; 3213 3214 numchunks = cnt/CHUNKSIZE + 1; 3215 for (i=0; i<numchunks; i++) { 3216 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3217 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status); 3218 if (type == MPIU_INT) { 3219 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3220 } else if (type == MPIU_SCALAR) { 3221 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3222 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3223 } 3224 return 0; 3225 } 3226 3227 #undef __FUNCT__ 3228 #define __FUNCT__ "MatLoad_MPIAIJ" 3229 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3230 { 3231 PetscScalar *vals,*svals; 3232 MPI_Comm comm = ((PetscObject)viewer)->comm; 3233 PetscErrorCode ierr; 3234 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3235 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3236 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3237 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3238 PetscInt cend,cstart,n,*rowners,sizesset=1; 3239 int fd; 3240 3241 PetscFunctionBegin; 3242 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3243 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3244 if (!rank) { 3245 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3246 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3247 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3248 } 3249 3250 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3251 3252 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3253 M = header[1]; N = header[2]; 3254 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3255 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3256 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3257 3258 /* If global sizes are set, check if they are consistent with that given in the file */ 3259 if (sizesset) { 3260 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3261 } 3262 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3263 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3264 3265 /* determine ownership of all rows */ 3266 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3267 else m = newMat->rmap->n; /* Set by user */ 3268 3269 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3270 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3271 3272 /* First process needs enough room for process with most rows */ 3273 if (!rank) { 3274 mmax = rowners[1]; 3275 for (i=2; i<size; i++) { 3276 mmax = PetscMax(mmax,rowners[i]); 3277 } 3278 } else mmax = m; 3279 3280 rowners[0] = 0; 3281 for (i=2; i<=size; i++) { 3282 rowners[i] += rowners[i-1]; 3283 } 3284 rstart = rowners[rank]; 3285 rend = rowners[rank+1]; 3286 3287 /* distribute row lengths to all processors */ 3288 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3289 if (!rank) { 3290 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3291 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3292 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3293 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3294 for (j=0; j<m; j++) { 3295 procsnz[0] += ourlens[j]; 3296 } 3297 for (i=1; i<size; i++) { 3298 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3299 /* calculate the number of nonzeros on each processor */ 3300 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3301 procsnz[i] += rowlengths[j]; 3302 } 3303 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3304 } 3305 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3306 } else { 3307 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3308 } 3309 3310 if (!rank) { 3311 /* determine max buffer needed and allocate it */ 3312 maxnz = 0; 3313 for (i=0; i<size; i++) { 3314 maxnz = PetscMax(maxnz,procsnz[i]); 3315 } 3316 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3317 3318 /* read in my part of the matrix column indices */ 3319 nz = procsnz[0]; 3320 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3321 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3322 3323 /* read in every one elses and ship off */ 3324 for (i=1; i<size; i++) { 3325 nz = procsnz[i]; 3326 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3327 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3328 } 3329 ierr = PetscFree(cols);CHKERRQ(ierr); 3330 } else { 3331 /* determine buffer space needed for message */ 3332 nz = 0; 3333 for (i=0; i<m; i++) { 3334 nz += ourlens[i]; 3335 } 3336 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3337 3338 /* receive message of column indices*/ 3339 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3340 } 3341 3342 /* determine column ownership if matrix is not square */ 3343 if (N != M) { 3344 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3345 else n = newMat->cmap->n; 3346 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3347 cstart = cend - n; 3348 } else { 3349 cstart = rstart; 3350 cend = rend; 3351 n = cend - cstart; 3352 } 3353 3354 /* loop over local rows, determining number of off diagonal entries */ 3355 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3356 jj = 0; 3357 for (i=0; i<m; i++) { 3358 for (j=0; j<ourlens[i]; j++) { 3359 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3360 jj++; 3361 } 3362 } 3363 3364 for (i=0; i<m; i++) { 3365 ourlens[i] -= offlens[i]; 3366 } 3367 if (!sizesset) { 3368 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3369 } 3370 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3371 3372 for (i=0; i<m; i++) { 3373 ourlens[i] += offlens[i]; 3374 } 3375 3376 if (!rank) { 3377 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3378 3379 /* read in my part of the matrix numerical values */ 3380 nz = procsnz[0]; 3381 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3382 3383 /* insert into matrix */ 3384 jj = rstart; 3385 smycols = mycols; 3386 svals = vals; 3387 for (i=0; i<m; i++) { 3388 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3389 smycols += ourlens[i]; 3390 svals += ourlens[i]; 3391 jj++; 3392 } 3393 3394 /* read in other processors and ship out */ 3395 for (i=1; i<size; i++) { 3396 nz = procsnz[i]; 3397 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3398 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3399 } 3400 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3401 } else { 3402 /* receive numeric values */ 3403 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3404 3405 /* receive message of values*/ 3406 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3407 3408 /* insert into matrix */ 3409 jj = rstart; 3410 smycols = mycols; 3411 svals = vals; 3412 for (i=0; i<m; i++) { 3413 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3414 smycols += ourlens[i]; 3415 svals += ourlens[i]; 3416 jj++; 3417 } 3418 } 3419 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3420 ierr = PetscFree(vals);CHKERRQ(ierr); 3421 ierr = PetscFree(mycols);CHKERRQ(ierr); 3422 ierr = PetscFree(rowners);CHKERRQ(ierr); 3423 3424 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3425 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3426 PetscFunctionReturn(0); 3427 } 3428 3429 #undef __FUNCT__ 3430 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3431 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3432 { 3433 PetscErrorCode ierr; 3434 IS iscol_local; 3435 PetscInt csize; 3436 3437 PetscFunctionBegin; 3438 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3439 if (call == MAT_REUSE_MATRIX) { 3440 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3441 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3442 } else { 3443 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3444 } 3445 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3446 if (call == MAT_INITIAL_MATRIX) { 3447 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3448 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3449 } 3450 PetscFunctionReturn(0); 3451 } 3452 3453 #undef __FUNCT__ 3454 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3455 /* 3456 Not great since it makes two copies of the submatrix, first an SeqAIJ 3457 in local and then by concatenating the local matrices the end result. 3458 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3459 3460 Note: This requires a sequential iscol with all indices. 3461 */ 3462 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3463 { 3464 PetscErrorCode ierr; 3465 PetscMPIInt rank,size; 3466 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3467 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3468 Mat *local,M,Mreuse; 3469 MatScalar *vwork,*aa; 3470 MPI_Comm comm = ((PetscObject)mat)->comm; 3471 Mat_SeqAIJ *aij; 3472 3473 3474 PetscFunctionBegin; 3475 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3476 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3477 3478 if (call == MAT_REUSE_MATRIX) { 3479 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3480 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3481 local = &Mreuse; 3482 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3483 } else { 3484 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3485 Mreuse = *local; 3486 ierr = PetscFree(local);CHKERRQ(ierr); 3487 } 3488 3489 /* 3490 m - number of local rows 3491 n - number of columns (same on all processors) 3492 rstart - first row in new global matrix generated 3493 */ 3494 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3495 if (call == MAT_INITIAL_MATRIX) { 3496 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3497 ii = aij->i; 3498 jj = aij->j; 3499 3500 /* 3501 Determine the number of non-zeros in the diagonal and off-diagonal 3502 portions of the matrix in order to do correct preallocation 3503 */ 3504 3505 /* first get start and end of "diagonal" columns */ 3506 if (csize == PETSC_DECIDE) { 3507 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3508 if (mglobal == n) { /* square matrix */ 3509 nlocal = m; 3510 } else { 3511 nlocal = n/size + ((n % size) > rank); 3512 } 3513 } else { 3514 nlocal = csize; 3515 } 3516 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3517 rstart = rend - nlocal; 3518 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3519 3520 /* next, compute all the lengths */ 3521 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3522 olens = dlens + m; 3523 for (i=0; i<m; i++) { 3524 jend = ii[i+1] - ii[i]; 3525 olen = 0; 3526 dlen = 0; 3527 for (j=0; j<jend; j++) { 3528 if (*jj < rstart || *jj >= rend) olen++; 3529 else dlen++; 3530 jj++; 3531 } 3532 olens[i] = olen; 3533 dlens[i] = dlen; 3534 } 3535 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3536 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3537 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3538 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3539 ierr = PetscFree(dlens);CHKERRQ(ierr); 3540 } else { 3541 PetscInt ml,nl; 3542 3543 M = *newmat; 3544 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3545 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3546 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3547 /* 3548 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3549 rather than the slower MatSetValues(). 3550 */ 3551 M->was_assembled = PETSC_TRUE; 3552 M->assembled = PETSC_FALSE; 3553 } 3554 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3555 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3556 ii = aij->i; 3557 jj = aij->j; 3558 aa = aij->a; 3559 for (i=0; i<m; i++) { 3560 row = rstart + i; 3561 nz = ii[i+1] - ii[i]; 3562 cwork = jj; jj += nz; 3563 vwork = aa; aa += nz; 3564 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3565 } 3566 3567 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3568 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3569 *newmat = M; 3570 3571 /* save submatrix used in processor for next request */ 3572 if (call == MAT_INITIAL_MATRIX) { 3573 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3574 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3575 } 3576 3577 PetscFunctionReturn(0); 3578 } 3579 3580 EXTERN_C_BEGIN 3581 #undef __FUNCT__ 3582 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3583 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3584 { 3585 PetscInt m,cstart, cend,j,nnz,i,d; 3586 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3587 const PetscInt *JJ; 3588 PetscScalar *values; 3589 PetscErrorCode ierr; 3590 3591 PetscFunctionBegin; 3592 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3593 3594 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3595 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3596 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3597 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3598 m = B->rmap->n; 3599 cstart = B->cmap->rstart; 3600 cend = B->cmap->rend; 3601 rstart = B->rmap->rstart; 3602 3603 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3604 3605 #if defined(PETSC_USE_DEBUGGING) 3606 for (i=0; i<m; i++) { 3607 nnz = Ii[i+1]- Ii[i]; 3608 JJ = J + Ii[i]; 3609 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3610 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3611 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3612 } 3613 #endif 3614 3615 for (i=0; i<m; i++) { 3616 nnz = Ii[i+1]- Ii[i]; 3617 JJ = J + Ii[i]; 3618 nnz_max = PetscMax(nnz_max,nnz); 3619 d = 0; 3620 for (j=0; j<nnz; j++) { 3621 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3622 } 3623 d_nnz[i] = d; 3624 o_nnz[i] = nnz - d; 3625 } 3626 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3627 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3628 3629 if (v) values = (PetscScalar*)v; 3630 else { 3631 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3632 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3633 } 3634 3635 for (i=0; i<m; i++) { 3636 ii = i + rstart; 3637 nnz = Ii[i+1]- Ii[i]; 3638 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3639 } 3640 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3641 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3642 3643 if (!v) { 3644 ierr = PetscFree(values);CHKERRQ(ierr); 3645 } 3646 PetscFunctionReturn(0); 3647 } 3648 EXTERN_C_END 3649 3650 #undef __FUNCT__ 3651 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3652 /*@ 3653 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3654 (the default parallel PETSc format). 3655 3656 Collective on MPI_Comm 3657 3658 Input Parameters: 3659 + B - the matrix 3660 . i - the indices into j for the start of each local row (starts with zero) 3661 . j - the column indices for each local row (starts with zero) 3662 - v - optional values in the matrix 3663 3664 Level: developer 3665 3666 Notes: 3667 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3668 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3669 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3670 3671 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3672 3673 The format which is used for the sparse matrix input, is equivalent to a 3674 row-major ordering.. i.e for the following matrix, the input data expected is 3675 as shown: 3676 3677 1 0 0 3678 2 0 3 P0 3679 ------- 3680 4 5 6 P1 3681 3682 Process0 [P0]: rows_owned=[0,1] 3683 i = {0,1,3} [size = nrow+1 = 2+1] 3684 j = {0,0,2} [size = nz = 6] 3685 v = {1,2,3} [size = nz = 6] 3686 3687 Process1 [P1]: rows_owned=[2] 3688 i = {0,3} [size = nrow+1 = 1+1] 3689 j = {0,1,2} [size = nz = 6] 3690 v = {4,5,6} [size = nz = 6] 3691 3692 .keywords: matrix, aij, compressed row, sparse, parallel 3693 3694 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3695 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3696 @*/ 3697 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3698 { 3699 PetscErrorCode ierr; 3700 3701 PetscFunctionBegin; 3702 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3703 PetscFunctionReturn(0); 3704 } 3705 3706 #undef __FUNCT__ 3707 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3708 /*@C 3709 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3710 (the default parallel PETSc format). For good matrix assembly performance 3711 the user should preallocate the matrix storage by setting the parameters 3712 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3713 performance can be increased by more than a factor of 50. 3714 3715 Collective on MPI_Comm 3716 3717 Input Parameters: 3718 + A - the matrix 3719 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3720 (same value is used for all local rows) 3721 . d_nnz - array containing the number of nonzeros in the various rows of the 3722 DIAGONAL portion of the local submatrix (possibly different for each row) 3723 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3724 The size of this array is equal to the number of local rows, i.e 'm'. 3725 You must leave room for the diagonal entry even if it is zero. 3726 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3727 submatrix (same value is used for all local rows). 3728 - o_nnz - array containing the number of nonzeros in the various rows of the 3729 OFF-DIAGONAL portion of the local submatrix (possibly different for 3730 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3731 structure. The size of this array is equal to the number 3732 of local rows, i.e 'm'. 3733 3734 If the *_nnz parameter is given then the *_nz parameter is ignored 3735 3736 The AIJ format (also called the Yale sparse matrix format or 3737 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3738 storage. The stored row and column indices begin with zero. 3739 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3740 3741 The parallel matrix is partitioned such that the first m0 rows belong to 3742 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3743 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3744 3745 The DIAGONAL portion of the local submatrix of a processor can be defined 3746 as the submatrix which is obtained by extraction the part corresponding to 3747 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3748 first row that belongs to the processor, r2 is the last row belonging to 3749 the this processor, and c1-c2 is range of indices of the local part of a 3750 vector suitable for applying the matrix to. This is an mxn matrix. In the 3751 common case of a square matrix, the row and column ranges are the same and 3752 the DIAGONAL part is also square. The remaining portion of the local 3753 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3754 3755 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3756 3757 You can call MatGetInfo() to get information on how effective the preallocation was; 3758 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3759 You can also run with the option -info and look for messages with the string 3760 malloc in them to see if additional memory allocation was needed. 3761 3762 Example usage: 3763 3764 Consider the following 8x8 matrix with 34 non-zero values, that is 3765 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3766 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3767 as follows: 3768 3769 .vb 3770 1 2 0 | 0 3 0 | 0 4 3771 Proc0 0 5 6 | 7 0 0 | 8 0 3772 9 0 10 | 11 0 0 | 12 0 3773 ------------------------------------- 3774 13 0 14 | 15 16 17 | 0 0 3775 Proc1 0 18 0 | 19 20 21 | 0 0 3776 0 0 0 | 22 23 0 | 24 0 3777 ------------------------------------- 3778 Proc2 25 26 27 | 0 0 28 | 29 0 3779 30 0 0 | 31 32 33 | 0 34 3780 .ve 3781 3782 This can be represented as a collection of submatrices as: 3783 3784 .vb 3785 A B C 3786 D E F 3787 G H I 3788 .ve 3789 3790 Where the submatrices A,B,C are owned by proc0, D,E,F are 3791 owned by proc1, G,H,I are owned by proc2. 3792 3793 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3794 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3795 The 'M','N' parameters are 8,8, and have the same values on all procs. 3796 3797 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3798 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3799 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3800 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3801 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3802 matrix, ans [DF] as another SeqAIJ matrix. 3803 3804 When d_nz, o_nz parameters are specified, d_nz storage elements are 3805 allocated for every row of the local diagonal submatrix, and o_nz 3806 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3807 One way to choose d_nz and o_nz is to use the max nonzerors per local 3808 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3809 In this case, the values of d_nz,o_nz are: 3810 .vb 3811 proc0 : dnz = 2, o_nz = 2 3812 proc1 : dnz = 3, o_nz = 2 3813 proc2 : dnz = 1, o_nz = 4 3814 .ve 3815 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3816 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3817 for proc3. i.e we are using 12+15+10=37 storage locations to store 3818 34 values. 3819 3820 When d_nnz, o_nnz parameters are specified, the storage is specified 3821 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3822 In the above case the values for d_nnz,o_nnz are: 3823 .vb 3824 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3825 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3826 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3827 .ve 3828 Here the space allocated is sum of all the above values i.e 34, and 3829 hence pre-allocation is perfect. 3830 3831 Level: intermediate 3832 3833 .keywords: matrix, aij, compressed row, sparse, parallel 3834 3835 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3836 MPIAIJ, MatGetInfo() 3837 @*/ 3838 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3839 { 3840 PetscErrorCode ierr; 3841 3842 PetscFunctionBegin; 3843 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3844 PetscFunctionReturn(0); 3845 } 3846 3847 #undef __FUNCT__ 3848 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3849 /*@ 3850 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3851 CSR format the local rows. 3852 3853 Collective on MPI_Comm 3854 3855 Input Parameters: 3856 + comm - MPI communicator 3857 . m - number of local rows (Cannot be PETSC_DECIDE) 3858 . n - This value should be the same as the local size used in creating the 3859 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3860 calculated if N is given) For square matrices n is almost always m. 3861 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3862 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3863 . i - row indices 3864 . j - column indices 3865 - a - matrix values 3866 3867 Output Parameter: 3868 . mat - the matrix 3869 3870 Level: intermediate 3871 3872 Notes: 3873 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3874 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3875 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3876 3877 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3878 3879 The format which is used for the sparse matrix input, is equivalent to a 3880 row-major ordering.. i.e for the following matrix, the input data expected is 3881 as shown: 3882 3883 1 0 0 3884 2 0 3 P0 3885 ------- 3886 4 5 6 P1 3887 3888 Process0 [P0]: rows_owned=[0,1] 3889 i = {0,1,3} [size = nrow+1 = 2+1] 3890 j = {0,0,2} [size = nz = 6] 3891 v = {1,2,3} [size = nz = 6] 3892 3893 Process1 [P1]: rows_owned=[2] 3894 i = {0,3} [size = nrow+1 = 1+1] 3895 j = {0,1,2} [size = nz = 6] 3896 v = {4,5,6} [size = nz = 6] 3897 3898 .keywords: matrix, aij, compressed row, sparse, parallel 3899 3900 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3901 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3902 @*/ 3903 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3904 { 3905 PetscErrorCode ierr; 3906 3907 PetscFunctionBegin; 3908 if (i[0]) { 3909 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3910 } 3911 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3912 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3913 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3914 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3915 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3916 PetscFunctionReturn(0); 3917 } 3918 3919 #undef __FUNCT__ 3920 #define __FUNCT__ "MatCreateMPIAIJ" 3921 /*@C 3922 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3923 (the default parallel PETSc format). For good matrix assembly performance 3924 the user should preallocate the matrix storage by setting the parameters 3925 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3926 performance can be increased by more than a factor of 50. 3927 3928 Collective on MPI_Comm 3929 3930 Input Parameters: 3931 + comm - MPI communicator 3932 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3933 This value should be the same as the local size used in creating the 3934 y vector for the matrix-vector product y = Ax. 3935 . n - This value should be the same as the local size used in creating the 3936 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3937 calculated if N is given) For square matrices n is almost always m. 3938 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3939 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3940 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3941 (same value is used for all local rows) 3942 . d_nnz - array containing the number of nonzeros in the various rows of the 3943 DIAGONAL portion of the local submatrix (possibly different for each row) 3944 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3945 The size of this array is equal to the number of local rows, i.e 'm'. 3946 You must leave room for the diagonal entry even if it is zero. 3947 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3948 submatrix (same value is used for all local rows). 3949 - o_nnz - array containing the number of nonzeros in the various rows of the 3950 OFF-DIAGONAL portion of the local submatrix (possibly different for 3951 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3952 structure. The size of this array is equal to the number 3953 of local rows, i.e 'm'. 3954 3955 Output Parameter: 3956 . A - the matrix 3957 3958 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3959 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3960 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3961 3962 Notes: 3963 If the *_nnz parameter is given then the *_nz parameter is ignored 3964 3965 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3966 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3967 storage requirements for this matrix. 3968 3969 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3970 processor than it must be used on all processors that share the object for 3971 that argument. 3972 3973 The user MUST specify either the local or global matrix dimensions 3974 (possibly both). 3975 3976 The parallel matrix is partitioned across processors such that the 3977 first m0 rows belong to process 0, the next m1 rows belong to 3978 process 1, the next m2 rows belong to process 2 etc.. where 3979 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3980 values corresponding to [m x N] submatrix. 3981 3982 The columns are logically partitioned with the n0 columns belonging 3983 to 0th partition, the next n1 columns belonging to the next 3984 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3985 3986 The DIAGONAL portion of the local submatrix on any given processor 3987 is the submatrix corresponding to the rows and columns m,n 3988 corresponding to the given processor. i.e diagonal matrix on 3989 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3990 etc. The remaining portion of the local submatrix [m x (N-n)] 3991 constitute the OFF-DIAGONAL portion. The example below better 3992 illustrates this concept. 3993 3994 For a square global matrix we define each processor's diagonal portion 3995 to be its local rows and the corresponding columns (a square submatrix); 3996 each processor's off-diagonal portion encompasses the remainder of the 3997 local matrix (a rectangular submatrix). 3998 3999 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4000 4001 When calling this routine with a single process communicator, a matrix of 4002 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4003 type of communicator, use the construction mechanism: 4004 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4005 4006 By default, this format uses inodes (identical nodes) when possible. 4007 We search for consecutive rows with the same nonzero structure, thereby 4008 reusing matrix information to achieve increased efficiency. 4009 4010 Options Database Keys: 4011 + -mat_no_inode - Do not use inodes 4012 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4013 - -mat_aij_oneindex - Internally use indexing starting at 1 4014 rather than 0. Note that when calling MatSetValues(), 4015 the user still MUST index entries starting at 0! 4016 4017 4018 Example usage: 4019 4020 Consider the following 8x8 matrix with 34 non-zero values, that is 4021 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4022 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4023 as follows: 4024 4025 .vb 4026 1 2 0 | 0 3 0 | 0 4 4027 Proc0 0 5 6 | 7 0 0 | 8 0 4028 9 0 10 | 11 0 0 | 12 0 4029 ------------------------------------- 4030 13 0 14 | 15 16 17 | 0 0 4031 Proc1 0 18 0 | 19 20 21 | 0 0 4032 0 0 0 | 22 23 0 | 24 0 4033 ------------------------------------- 4034 Proc2 25 26 27 | 0 0 28 | 29 0 4035 30 0 0 | 31 32 33 | 0 34 4036 .ve 4037 4038 This can be represented as a collection of submatrices as: 4039 4040 .vb 4041 A B C 4042 D E F 4043 G H I 4044 .ve 4045 4046 Where the submatrices A,B,C are owned by proc0, D,E,F are 4047 owned by proc1, G,H,I are owned by proc2. 4048 4049 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4050 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4051 The 'M','N' parameters are 8,8, and have the same values on all procs. 4052 4053 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4054 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4055 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4056 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4057 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4058 matrix, ans [DF] as another SeqAIJ matrix. 4059 4060 When d_nz, o_nz parameters are specified, d_nz storage elements are 4061 allocated for every row of the local diagonal submatrix, and o_nz 4062 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4063 One way to choose d_nz and o_nz is to use the max nonzerors per local 4064 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4065 In this case, the values of d_nz,o_nz are: 4066 .vb 4067 proc0 : dnz = 2, o_nz = 2 4068 proc1 : dnz = 3, o_nz = 2 4069 proc2 : dnz = 1, o_nz = 4 4070 .ve 4071 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4072 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4073 for proc3. i.e we are using 12+15+10=37 storage locations to store 4074 34 values. 4075 4076 When d_nnz, o_nnz parameters are specified, the storage is specified 4077 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4078 In the above case the values for d_nnz,o_nnz are: 4079 .vb 4080 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4081 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4082 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4083 .ve 4084 Here the space allocated is sum of all the above values i.e 34, and 4085 hence pre-allocation is perfect. 4086 4087 Level: intermediate 4088 4089 .keywords: matrix, aij, compressed row, sparse, parallel 4090 4091 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4092 MPIAIJ, MatCreateMPIAIJWithArrays() 4093 @*/ 4094 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4095 { 4096 PetscErrorCode ierr; 4097 PetscMPIInt size; 4098 4099 PetscFunctionBegin; 4100 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4101 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4102 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4103 if (size > 1) { 4104 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4105 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4106 } else { 4107 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4108 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4109 } 4110 PetscFunctionReturn(0); 4111 } 4112 4113 #undef __FUNCT__ 4114 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4115 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4116 { 4117 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4118 4119 PetscFunctionBegin; 4120 *Ad = a->A; 4121 *Ao = a->B; 4122 *colmap = a->garray; 4123 PetscFunctionReturn(0); 4124 } 4125 4126 #undef __FUNCT__ 4127 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4128 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4129 { 4130 PetscErrorCode ierr; 4131 PetscInt i; 4132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4133 4134 PetscFunctionBegin; 4135 if (coloring->ctype == IS_COLORING_GLOBAL) { 4136 ISColoringValue *allcolors,*colors; 4137 ISColoring ocoloring; 4138 4139 /* set coloring for diagonal portion */ 4140 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4141 4142 /* set coloring for off-diagonal portion */ 4143 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4144 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4145 for (i=0; i<a->B->cmap->n; i++) { 4146 colors[i] = allcolors[a->garray[i]]; 4147 } 4148 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4149 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4150 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4151 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4152 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4153 ISColoringValue *colors; 4154 PetscInt *larray; 4155 ISColoring ocoloring; 4156 4157 /* set coloring for diagonal portion */ 4158 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4159 for (i=0; i<a->A->cmap->n; i++) { 4160 larray[i] = i + A->cmap->rstart; 4161 } 4162 ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4163 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4164 for (i=0; i<a->A->cmap->n; i++) { 4165 colors[i] = coloring->colors[larray[i]]; 4166 } 4167 ierr = PetscFree(larray);CHKERRQ(ierr); 4168 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4169 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4170 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4171 4172 /* set coloring for off-diagonal portion */ 4173 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4174 ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4175 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4176 for (i=0; i<a->B->cmap->n; i++) { 4177 colors[i] = coloring->colors[larray[i]]; 4178 } 4179 ierr = PetscFree(larray);CHKERRQ(ierr); 4180 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4181 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4182 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4183 } else { 4184 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4185 } 4186 4187 PetscFunctionReturn(0); 4188 } 4189 4190 #if defined(PETSC_HAVE_ADIC) 4191 #undef __FUNCT__ 4192 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4193 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4194 { 4195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4196 PetscErrorCode ierr; 4197 4198 PetscFunctionBegin; 4199 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4200 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4201 PetscFunctionReturn(0); 4202 } 4203 #endif 4204 4205 #undef __FUNCT__ 4206 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4207 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4208 { 4209 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4210 PetscErrorCode ierr; 4211 4212 PetscFunctionBegin; 4213 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4214 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4215 PetscFunctionReturn(0); 4216 } 4217 4218 #undef __FUNCT__ 4219 #define __FUNCT__ "MatMerge" 4220 /*@ 4221 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4222 matrices from each processor 4223 4224 Collective on MPI_Comm 4225 4226 Input Parameters: 4227 + comm - the communicators the parallel matrix will live on 4228 . inmat - the input sequential matrices 4229 . n - number of local columns (or PETSC_DECIDE) 4230 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4231 4232 Output Parameter: 4233 . outmat - the parallel matrix generated 4234 4235 Level: advanced 4236 4237 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4238 4239 @*/ 4240 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4241 { 4242 PetscErrorCode ierr; 4243 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4244 PetscInt *indx; 4245 PetscScalar *values; 4246 4247 PetscFunctionBegin; 4248 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4249 if (scall == MAT_INITIAL_MATRIX){ 4250 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4251 if (n == PETSC_DECIDE){ 4252 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4253 } 4254 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4255 rstart -= m; 4256 4257 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4258 for (i=0;i<m;i++) { 4259 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4260 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4261 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4262 } 4263 /* This routine will ONLY return MPIAIJ type matrix */ 4264 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4265 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4266 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4267 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4268 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4269 4270 } else if (scall == MAT_REUSE_MATRIX){ 4271 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4272 } else { 4273 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4274 } 4275 4276 for (i=0;i<m;i++) { 4277 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4278 Ii = i + rstart; 4279 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4280 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4281 } 4282 ierr = MatDestroy(inmat);CHKERRQ(ierr); 4283 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4284 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4285 4286 PetscFunctionReturn(0); 4287 } 4288 4289 #undef __FUNCT__ 4290 #define __FUNCT__ "MatFileSplit" 4291 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4292 { 4293 PetscErrorCode ierr; 4294 PetscMPIInt rank; 4295 PetscInt m,N,i,rstart,nnz; 4296 size_t len; 4297 const PetscInt *indx; 4298 PetscViewer out; 4299 char *name; 4300 Mat B; 4301 const PetscScalar *values; 4302 4303 PetscFunctionBegin; 4304 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4305 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4306 /* Should this be the type of the diagonal block of A? */ 4307 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4308 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4309 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4310 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4311 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4312 for (i=0;i<m;i++) { 4313 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4314 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4315 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4316 } 4317 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4318 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4319 4320 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4321 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4322 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4323 sprintf(name,"%s.%d",outfile,rank); 4324 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4325 ierr = PetscFree(name); 4326 ierr = MatView(B,out);CHKERRQ(ierr); 4327 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4328 ierr = MatDestroy(B);CHKERRQ(ierr); 4329 PetscFunctionReturn(0); 4330 } 4331 4332 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4333 #undef __FUNCT__ 4334 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4335 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4336 { 4337 PetscErrorCode ierr; 4338 Mat_Merge_SeqsToMPI *merge; 4339 PetscContainer container; 4340 4341 PetscFunctionBegin; 4342 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4343 if (container) { 4344 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4345 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4346 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4347 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4348 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4349 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4350 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4351 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4352 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4353 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4354 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4355 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4356 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4357 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4358 4359 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4360 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4361 } 4362 ierr = PetscFree(merge);CHKERRQ(ierr); 4363 4364 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4365 PetscFunctionReturn(0); 4366 } 4367 4368 #include "../src/mat/utils/freespace.h" 4369 #include "petscbt.h" 4370 4371 #undef __FUNCT__ 4372 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4373 /*@C 4374 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4375 matrices from each processor 4376 4377 Collective on MPI_Comm 4378 4379 Input Parameters: 4380 + comm - the communicators the parallel matrix will live on 4381 . seqmat - the input sequential matrices 4382 . m - number of local rows (or PETSC_DECIDE) 4383 . n - number of local columns (or PETSC_DECIDE) 4384 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4385 4386 Output Parameter: 4387 . mpimat - the parallel matrix generated 4388 4389 Level: advanced 4390 4391 Notes: 4392 The dimensions of the sequential matrix in each processor MUST be the same. 4393 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4394 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4395 @*/ 4396 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4397 { 4398 PetscErrorCode ierr; 4399 MPI_Comm comm=((PetscObject)mpimat)->comm; 4400 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4401 PetscMPIInt size,rank,taga,*len_s; 4402 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4403 PetscInt proc,m; 4404 PetscInt **buf_ri,**buf_rj; 4405 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4406 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4407 MPI_Request *s_waits,*r_waits; 4408 MPI_Status *status; 4409 MatScalar *aa=a->a; 4410 MatScalar **abuf_r,*ba_i; 4411 Mat_Merge_SeqsToMPI *merge; 4412 PetscContainer container; 4413 4414 PetscFunctionBegin; 4415 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4416 4417 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4418 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4419 4420 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4421 if (container) { 4422 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4423 } 4424 bi = merge->bi; 4425 bj = merge->bj; 4426 buf_ri = merge->buf_ri; 4427 buf_rj = merge->buf_rj; 4428 4429 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4430 owners = merge->rowmap->range; 4431 len_s = merge->len_s; 4432 4433 /* send and recv matrix values */ 4434 /*-----------------------------*/ 4435 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4436 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4437 4438 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4439 for (proc=0,k=0; proc<size; proc++){ 4440 if (!len_s[proc]) continue; 4441 i = owners[proc]; 4442 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4443 k++; 4444 } 4445 4446 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4447 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4448 ierr = PetscFree(status);CHKERRQ(ierr); 4449 4450 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4451 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4452 4453 /* insert mat values of mpimat */ 4454 /*----------------------------*/ 4455 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4456 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4457 4458 for (k=0; k<merge->nrecv; k++){ 4459 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4460 nrows = *(buf_ri_k[k]); 4461 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4462 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4463 } 4464 4465 /* set values of ba */ 4466 m = merge->rowmap->n; 4467 for (i=0; i<m; i++) { 4468 arow = owners[rank] + i; 4469 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4470 bnzi = bi[i+1] - bi[i]; 4471 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4472 4473 /* add local non-zero vals of this proc's seqmat into ba */ 4474 anzi = ai[arow+1] - ai[arow]; 4475 aj = a->j + ai[arow]; 4476 aa = a->a + ai[arow]; 4477 nextaj = 0; 4478 for (j=0; nextaj<anzi; j++){ 4479 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4480 ba_i[j] += aa[nextaj++]; 4481 } 4482 } 4483 4484 /* add received vals into ba */ 4485 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4486 /* i-th row */ 4487 if (i == *nextrow[k]) { 4488 anzi = *(nextai[k]+1) - *nextai[k]; 4489 aj = buf_rj[k] + *(nextai[k]); 4490 aa = abuf_r[k] + *(nextai[k]); 4491 nextaj = 0; 4492 for (j=0; nextaj<anzi; j++){ 4493 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4494 ba_i[j] += aa[nextaj++]; 4495 } 4496 } 4497 nextrow[k]++; nextai[k]++; 4498 } 4499 } 4500 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4501 } 4502 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4503 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4504 4505 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4506 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4507 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4508 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4509 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4510 PetscFunctionReturn(0); 4511 } 4512 4513 #undef __FUNCT__ 4514 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4515 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4516 { 4517 PetscErrorCode ierr; 4518 Mat B_mpi; 4519 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4520 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4521 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4522 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4523 PetscInt len,proc,*dnz,*onz; 4524 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4525 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4526 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4527 MPI_Status *status; 4528 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4529 PetscBT lnkbt; 4530 Mat_Merge_SeqsToMPI *merge; 4531 PetscContainer container; 4532 4533 PetscFunctionBegin; 4534 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4535 4536 /* make sure it is a PETSc comm */ 4537 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4538 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4539 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4540 4541 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4542 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4543 4544 /* determine row ownership */ 4545 /*---------------------------------------------------------*/ 4546 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4547 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4548 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4549 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4550 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4551 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4552 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4553 4554 m = merge->rowmap->n; 4555 M = merge->rowmap->N; 4556 owners = merge->rowmap->range; 4557 4558 /* determine the number of messages to send, their lengths */ 4559 /*---------------------------------------------------------*/ 4560 len_s = merge->len_s; 4561 4562 len = 0; /* length of buf_si[] */ 4563 merge->nsend = 0; 4564 for (proc=0; proc<size; proc++){ 4565 len_si[proc] = 0; 4566 if (proc == rank){ 4567 len_s[proc] = 0; 4568 } else { 4569 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4570 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4571 } 4572 if (len_s[proc]) { 4573 merge->nsend++; 4574 nrows = 0; 4575 for (i=owners[proc]; i<owners[proc+1]; i++){ 4576 if (ai[i+1] > ai[i]) nrows++; 4577 } 4578 len_si[proc] = 2*(nrows+1); 4579 len += len_si[proc]; 4580 } 4581 } 4582 4583 /* determine the number and length of messages to receive for ij-structure */ 4584 /*-------------------------------------------------------------------------*/ 4585 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4586 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4587 4588 /* post the Irecv of j-structure */ 4589 /*-------------------------------*/ 4590 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4591 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4592 4593 /* post the Isend of j-structure */ 4594 /*--------------------------------*/ 4595 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4596 4597 for (proc=0, k=0; proc<size; proc++){ 4598 if (!len_s[proc]) continue; 4599 i = owners[proc]; 4600 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4601 k++; 4602 } 4603 4604 /* receives and sends of j-structure are complete */ 4605 /*------------------------------------------------*/ 4606 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4607 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4608 4609 /* send and recv i-structure */ 4610 /*---------------------------*/ 4611 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4612 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4613 4614 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4615 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4616 for (proc=0,k=0; proc<size; proc++){ 4617 if (!len_s[proc]) continue; 4618 /* form outgoing message for i-structure: 4619 buf_si[0]: nrows to be sent 4620 [1:nrows]: row index (global) 4621 [nrows+1:2*nrows+1]: i-structure index 4622 */ 4623 /*-------------------------------------------*/ 4624 nrows = len_si[proc]/2 - 1; 4625 buf_si_i = buf_si + nrows+1; 4626 buf_si[0] = nrows; 4627 buf_si_i[0] = 0; 4628 nrows = 0; 4629 for (i=owners[proc]; i<owners[proc+1]; i++){ 4630 anzi = ai[i+1] - ai[i]; 4631 if (anzi) { 4632 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4633 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4634 nrows++; 4635 } 4636 } 4637 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4638 k++; 4639 buf_si += len_si[proc]; 4640 } 4641 4642 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4643 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4644 4645 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4646 for (i=0; i<merge->nrecv; i++){ 4647 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4648 } 4649 4650 ierr = PetscFree(len_si);CHKERRQ(ierr); 4651 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4652 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4653 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4654 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4655 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4656 ierr = PetscFree(status);CHKERRQ(ierr); 4657 4658 /* compute a local seq matrix in each processor */ 4659 /*----------------------------------------------*/ 4660 /* allocate bi array and free space for accumulating nonzero column info */ 4661 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4662 bi[0] = 0; 4663 4664 /* create and initialize a linked list */ 4665 nlnk = N+1; 4666 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4667 4668 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4669 len = 0; 4670 len = ai[owners[rank+1]] - ai[owners[rank]]; 4671 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4672 current_space = free_space; 4673 4674 /* determine symbolic info for each local row */ 4675 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4676 4677 for (k=0; k<merge->nrecv; k++){ 4678 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4679 nrows = *buf_ri_k[k]; 4680 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4681 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4682 } 4683 4684 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4685 len = 0; 4686 for (i=0;i<m;i++) { 4687 bnzi = 0; 4688 /* add local non-zero cols of this proc's seqmat into lnk */ 4689 arow = owners[rank] + i; 4690 anzi = ai[arow+1] - ai[arow]; 4691 aj = a->j + ai[arow]; 4692 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4693 bnzi += nlnk; 4694 /* add received col data into lnk */ 4695 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4696 if (i == *nextrow[k]) { /* i-th row */ 4697 anzi = *(nextai[k]+1) - *nextai[k]; 4698 aj = buf_rj[k] + *nextai[k]; 4699 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4700 bnzi += nlnk; 4701 nextrow[k]++; nextai[k]++; 4702 } 4703 } 4704 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4705 4706 /* if free space is not available, make more free space */ 4707 if (current_space->local_remaining<bnzi) { 4708 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4709 nspacedouble++; 4710 } 4711 /* copy data into free space, then initialize lnk */ 4712 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4713 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4714 4715 current_space->array += bnzi; 4716 current_space->local_used += bnzi; 4717 current_space->local_remaining -= bnzi; 4718 4719 bi[i+1] = bi[i] + bnzi; 4720 } 4721 4722 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4723 4724 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4725 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4726 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4727 4728 /* create symbolic parallel matrix B_mpi */ 4729 /*---------------------------------------*/ 4730 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4731 if (n==PETSC_DECIDE) { 4732 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4733 } else { 4734 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4735 } 4736 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4737 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4738 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4739 4740 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4741 B_mpi->assembled = PETSC_FALSE; 4742 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4743 merge->bi = bi; 4744 merge->bj = bj; 4745 merge->buf_ri = buf_ri; 4746 merge->buf_rj = buf_rj; 4747 merge->coi = PETSC_NULL; 4748 merge->coj = PETSC_NULL; 4749 merge->owners_co = PETSC_NULL; 4750 4751 /* attach the supporting struct to B_mpi for reuse */ 4752 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4753 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4754 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4755 *mpimat = B_mpi; 4756 4757 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4758 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4759 PetscFunctionReturn(0); 4760 } 4761 4762 #undef __FUNCT__ 4763 #define __FUNCT__ "MatMerge_SeqsToMPI" 4764 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4765 { 4766 PetscErrorCode ierr; 4767 4768 PetscFunctionBegin; 4769 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4770 if (scall == MAT_INITIAL_MATRIX){ 4771 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4772 } 4773 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4774 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4775 PetscFunctionReturn(0); 4776 } 4777 4778 #undef __FUNCT__ 4779 #define __FUNCT__ "MatGetLocalMat" 4780 /*@ 4781 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4782 4783 Not Collective 4784 4785 Input Parameters: 4786 + A - the matrix 4787 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4788 4789 Output Parameter: 4790 . A_loc - the local sequential matrix generated 4791 4792 Level: developer 4793 4794 @*/ 4795 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4796 { 4797 PetscErrorCode ierr; 4798 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4799 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4800 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4801 MatScalar *aa=a->a,*ba=b->a,*cam; 4802 PetscScalar *ca; 4803 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4804 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4805 4806 PetscFunctionBegin; 4807 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4808 if (scall == MAT_INITIAL_MATRIX){ 4809 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4810 ci[0] = 0; 4811 for (i=0; i<am; i++){ 4812 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4813 } 4814 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4815 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4816 k = 0; 4817 for (i=0; i<am; i++) { 4818 ncols_o = bi[i+1] - bi[i]; 4819 ncols_d = ai[i+1] - ai[i]; 4820 /* off-diagonal portion of A */ 4821 for (jo=0; jo<ncols_o; jo++) { 4822 col = cmap[*bj]; 4823 if (col >= cstart) break; 4824 cj[k] = col; bj++; 4825 ca[k++] = *ba++; 4826 } 4827 /* diagonal portion of A */ 4828 for (j=0; j<ncols_d; j++) { 4829 cj[k] = cstart + *aj++; 4830 ca[k++] = *aa++; 4831 } 4832 /* off-diagonal portion of A */ 4833 for (j=jo; j<ncols_o; j++) { 4834 cj[k] = cmap[*bj++]; 4835 ca[k++] = *ba++; 4836 } 4837 } 4838 /* put together the new matrix */ 4839 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4840 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4841 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4842 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4843 mat->free_a = PETSC_TRUE; 4844 mat->free_ij = PETSC_TRUE; 4845 mat->nonew = 0; 4846 } else if (scall == MAT_REUSE_MATRIX){ 4847 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4848 ci = mat->i; cj = mat->j; cam = mat->a; 4849 for (i=0; i<am; i++) { 4850 /* off-diagonal portion of A */ 4851 ncols_o = bi[i+1] - bi[i]; 4852 for (jo=0; jo<ncols_o; jo++) { 4853 col = cmap[*bj]; 4854 if (col >= cstart) break; 4855 *cam++ = *ba++; bj++; 4856 } 4857 /* diagonal portion of A */ 4858 ncols_d = ai[i+1] - ai[i]; 4859 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4860 /* off-diagonal portion of A */ 4861 for (j=jo; j<ncols_o; j++) { 4862 *cam++ = *ba++; bj++; 4863 } 4864 } 4865 } else { 4866 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4867 } 4868 4869 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4870 PetscFunctionReturn(0); 4871 } 4872 4873 #undef __FUNCT__ 4874 #define __FUNCT__ "MatGetLocalMatCondensed" 4875 /*@C 4876 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4877 4878 Not Collective 4879 4880 Input Parameters: 4881 + A - the matrix 4882 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4883 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4884 4885 Output Parameter: 4886 . A_loc - the local sequential matrix generated 4887 4888 Level: developer 4889 4890 @*/ 4891 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4892 { 4893 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4894 PetscErrorCode ierr; 4895 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4896 IS isrowa,iscola; 4897 Mat *aloc; 4898 4899 PetscFunctionBegin; 4900 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4901 if (!row){ 4902 start = A->rmap->rstart; end = A->rmap->rend; 4903 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4904 } else { 4905 isrowa = *row; 4906 } 4907 if (!col){ 4908 start = A->cmap->rstart; 4909 cmap = a->garray; 4910 nzA = a->A->cmap->n; 4911 nzB = a->B->cmap->n; 4912 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4913 ncols = 0; 4914 for (i=0; i<nzB; i++) { 4915 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4916 else break; 4917 } 4918 imark = i; 4919 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4920 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4921 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4922 } else { 4923 iscola = *col; 4924 } 4925 if (scall != MAT_INITIAL_MATRIX){ 4926 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4927 aloc[0] = *A_loc; 4928 } 4929 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4930 *A_loc = aloc[0]; 4931 ierr = PetscFree(aloc);CHKERRQ(ierr); 4932 if (!row){ 4933 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4934 } 4935 if (!col){ 4936 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4937 } 4938 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4939 PetscFunctionReturn(0); 4940 } 4941 4942 #undef __FUNCT__ 4943 #define __FUNCT__ "MatGetBrowsOfAcols" 4944 /*@C 4945 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4946 4947 Collective on Mat 4948 4949 Input Parameters: 4950 + A,B - the matrices in mpiaij format 4951 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4952 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4953 4954 Output Parameter: 4955 + rowb, colb - index sets of rows and columns of B to extract 4956 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4957 - B_seq - the sequential matrix generated 4958 4959 Level: developer 4960 4961 @*/ 4962 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4963 { 4964 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4965 PetscErrorCode ierr; 4966 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4967 IS isrowb,iscolb; 4968 Mat *bseq; 4969 4970 PetscFunctionBegin; 4971 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4972 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4973 } 4974 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4975 4976 if (scall == MAT_INITIAL_MATRIX){ 4977 start = A->cmap->rstart; 4978 cmap = a->garray; 4979 nzA = a->A->cmap->n; 4980 nzB = a->B->cmap->n; 4981 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4982 ncols = 0; 4983 for (i=0; i<nzB; i++) { /* row < local row index */ 4984 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4985 else break; 4986 } 4987 imark = i; 4988 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4989 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4990 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4991 *brstart = imark; 4992 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4993 } else { 4994 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4995 isrowb = *rowb; iscolb = *colb; 4996 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4997 bseq[0] = *B_seq; 4998 } 4999 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5000 *B_seq = bseq[0]; 5001 ierr = PetscFree(bseq);CHKERRQ(ierr); 5002 if (!rowb){ 5003 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 5004 } else { 5005 *rowb = isrowb; 5006 } 5007 if (!colb){ 5008 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 5009 } else { 5010 *colb = iscolb; 5011 } 5012 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5013 PetscFunctionReturn(0); 5014 } 5015 5016 #undef __FUNCT__ 5017 #define __FUNCT__ "MatGetBrowsOfAoCols" 5018 /*@C 5019 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5020 of the OFF-DIAGONAL portion of local A 5021 5022 Collective on Mat 5023 5024 Input Parameters: 5025 + A,B - the matrices in mpiaij format 5026 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5027 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5028 . startsj_r - similar to startsj for receives 5029 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5030 5031 Output Parameter: 5032 + B_oth - the sequential matrix generated 5033 5034 Level: developer 5035 5036 @*/ 5037 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5038 { 5039 VecScatter_MPI_General *gen_to,*gen_from; 5040 PetscErrorCode ierr; 5041 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5042 Mat_SeqAIJ *b_oth; 5043 VecScatter ctx=a->Mvctx; 5044 MPI_Comm comm=((PetscObject)ctx)->comm; 5045 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5046 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5047 PetscScalar *rvalues,*svalues; 5048 MatScalar *b_otha,*bufa,*bufA; 5049 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5050 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5051 MPI_Status *sstatus,rstatus; 5052 PetscMPIInt jj; 5053 PetscInt *cols,sbs,rbs; 5054 PetscScalar *vals; 5055 5056 PetscFunctionBegin; 5057 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5058 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5059 } 5060 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5061 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5062 5063 gen_to = (VecScatter_MPI_General*)ctx->todata; 5064 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5065 rvalues = gen_from->values; /* holds the length of receiving row */ 5066 svalues = gen_to->values; /* holds the length of sending row */ 5067 nrecvs = gen_from->n; 5068 nsends = gen_to->n; 5069 5070 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5071 srow = gen_to->indices; /* local row index to be sent */ 5072 sstarts = gen_to->starts; 5073 sprocs = gen_to->procs; 5074 sstatus = gen_to->sstatus; 5075 sbs = gen_to->bs; 5076 rstarts = gen_from->starts; 5077 rprocs = gen_from->procs; 5078 rbs = gen_from->bs; 5079 5080 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5081 if (scall == MAT_INITIAL_MATRIX){ 5082 /* i-array */ 5083 /*---------*/ 5084 /* post receives */ 5085 for (i=0; i<nrecvs; i++){ 5086 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5087 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5088 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5089 } 5090 5091 /* pack the outgoing message */ 5092 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5093 sstartsj[0] = 0; rstartsj[0] = 0; 5094 len = 0; /* total length of j or a array to be sent */ 5095 k = 0; 5096 for (i=0; i<nsends; i++){ 5097 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5098 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5099 for (j=0; j<nrows; j++) { 5100 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5101 for (l=0; l<sbs; l++){ 5102 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5103 rowlen[j*sbs+l] = ncols; 5104 len += ncols; 5105 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5106 } 5107 k++; 5108 } 5109 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5110 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5111 } 5112 /* recvs and sends of i-array are completed */ 5113 i = nrecvs; 5114 while (i--) { 5115 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5116 } 5117 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5118 5119 /* allocate buffers for sending j and a arrays */ 5120 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5121 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5122 5123 /* create i-array of B_oth */ 5124 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5125 b_othi[0] = 0; 5126 len = 0; /* total length of j or a array to be received */ 5127 k = 0; 5128 for (i=0; i<nrecvs; i++){ 5129 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5130 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5131 for (j=0; j<nrows; j++) { 5132 b_othi[k+1] = b_othi[k] + rowlen[j]; 5133 len += rowlen[j]; k++; 5134 } 5135 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5136 } 5137 5138 /* allocate space for j and a arrrays of B_oth */ 5139 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5140 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5141 5142 /* j-array */ 5143 /*---------*/ 5144 /* post receives of j-array */ 5145 for (i=0; i<nrecvs; i++){ 5146 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5147 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5148 } 5149 5150 /* pack the outgoing message j-array */ 5151 k = 0; 5152 for (i=0; i<nsends; i++){ 5153 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5154 bufJ = bufj+sstartsj[i]; 5155 for (j=0; j<nrows; j++) { 5156 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5157 for (ll=0; ll<sbs; ll++){ 5158 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5159 for (l=0; l<ncols; l++){ 5160 *bufJ++ = cols[l]; 5161 } 5162 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5163 } 5164 } 5165 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5166 } 5167 5168 /* recvs and sends of j-array are completed */ 5169 i = nrecvs; 5170 while (i--) { 5171 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5172 } 5173 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5174 } else if (scall == MAT_REUSE_MATRIX){ 5175 sstartsj = *startsj; 5176 rstartsj = *startsj_r; 5177 bufa = *bufa_ptr; 5178 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5179 b_otha = b_oth->a; 5180 } else { 5181 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5182 } 5183 5184 /* a-array */ 5185 /*---------*/ 5186 /* post receives of a-array */ 5187 for (i=0; i<nrecvs; i++){ 5188 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5189 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5190 } 5191 5192 /* pack the outgoing message a-array */ 5193 k = 0; 5194 for (i=0; i<nsends; i++){ 5195 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5196 bufA = bufa+sstartsj[i]; 5197 for (j=0; j<nrows; j++) { 5198 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5199 for (ll=0; ll<sbs; ll++){ 5200 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5201 for (l=0; l<ncols; l++){ 5202 *bufA++ = vals[l]; 5203 } 5204 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5205 } 5206 } 5207 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5208 } 5209 /* recvs and sends of a-array are completed */ 5210 i = nrecvs; 5211 while (i--) { 5212 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5213 } 5214 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5215 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5216 5217 if (scall == MAT_INITIAL_MATRIX){ 5218 /* put together the new matrix */ 5219 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5220 5221 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5222 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5223 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5224 b_oth->free_a = PETSC_TRUE; 5225 b_oth->free_ij = PETSC_TRUE; 5226 b_oth->nonew = 0; 5227 5228 ierr = PetscFree(bufj);CHKERRQ(ierr); 5229 if (!startsj || !bufa_ptr){ 5230 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5231 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5232 } else { 5233 *startsj = sstartsj; 5234 *startsj_r = rstartsj; 5235 *bufa_ptr = bufa; 5236 } 5237 } 5238 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5239 PetscFunctionReturn(0); 5240 } 5241 5242 #undef __FUNCT__ 5243 #define __FUNCT__ "MatGetCommunicationStructs" 5244 /*@C 5245 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5246 5247 Not Collective 5248 5249 Input Parameters: 5250 . A - The matrix in mpiaij format 5251 5252 Output Parameter: 5253 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5254 . colmap - A map from global column index to local index into lvec 5255 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5256 5257 Level: developer 5258 5259 @*/ 5260 #if defined (PETSC_USE_CTABLE) 5261 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5262 #else 5263 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5264 #endif 5265 { 5266 Mat_MPIAIJ *a; 5267 5268 PetscFunctionBegin; 5269 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5270 PetscValidPointer(lvec, 2); 5271 PetscValidPointer(colmap, 3); 5272 PetscValidPointer(multScatter, 4); 5273 a = (Mat_MPIAIJ *) A->data; 5274 if (lvec) *lvec = a->lvec; 5275 if (colmap) *colmap = a->colmap; 5276 if (multScatter) *multScatter = a->Mvctx; 5277 PetscFunctionReturn(0); 5278 } 5279 5280 EXTERN_C_BEGIN 5281 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5282 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5283 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5284 EXTERN_C_END 5285 5286 #undef __FUNCT__ 5287 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5288 /* 5289 Computes (B'*A')' since computing B*A directly is untenable 5290 5291 n p p 5292 ( ) ( ) ( ) 5293 m ( A ) * n ( B ) = m ( C ) 5294 ( ) ( ) ( ) 5295 5296 */ 5297 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5298 { 5299 PetscErrorCode ierr; 5300 Mat At,Bt,Ct; 5301 5302 PetscFunctionBegin; 5303 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5304 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5305 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5306 ierr = MatDestroy(At);CHKERRQ(ierr); 5307 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5308 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5309 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5310 PetscFunctionReturn(0); 5311 } 5312 5313 #undef __FUNCT__ 5314 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5315 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5316 { 5317 PetscErrorCode ierr; 5318 PetscInt m=A->rmap->n,n=B->cmap->n; 5319 Mat Cmat; 5320 5321 PetscFunctionBegin; 5322 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5323 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5324 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5325 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5326 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5327 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5328 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5329 *C = Cmat; 5330 PetscFunctionReturn(0); 5331 } 5332 5333 /* ----------------------------------------------------------------*/ 5334 #undef __FUNCT__ 5335 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5336 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5337 { 5338 PetscErrorCode ierr; 5339 5340 PetscFunctionBegin; 5341 if (scall == MAT_INITIAL_MATRIX){ 5342 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5343 } 5344 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5345 PetscFunctionReturn(0); 5346 } 5347 5348 EXTERN_C_BEGIN 5349 #if defined(PETSC_HAVE_MUMPS) 5350 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5351 #endif 5352 #if defined(PETSC_HAVE_PASTIX) 5353 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5354 #endif 5355 #if defined(PETSC_HAVE_SUPERLU_DIST) 5356 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5357 #endif 5358 #if defined(PETSC_HAVE_SPOOLES) 5359 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5360 #endif 5361 EXTERN_C_END 5362 5363 /*MC 5364 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5365 5366 Options Database Keys: 5367 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5368 5369 Level: beginner 5370 5371 .seealso: MatCreateMPIAIJ() 5372 M*/ 5373 5374 EXTERN_C_BEGIN 5375 #undef __FUNCT__ 5376 #define __FUNCT__ "MatCreate_MPIAIJ" 5377 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5378 { 5379 Mat_MPIAIJ *b; 5380 PetscErrorCode ierr; 5381 PetscMPIInt size; 5382 5383 PetscFunctionBegin; 5384 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5385 5386 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5387 B->data = (void*)b; 5388 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5389 B->rmap->bs = 1; 5390 B->assembled = PETSC_FALSE; 5391 5392 B->insertmode = NOT_SET_VALUES; 5393 b->size = size; 5394 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5395 5396 /* build cache for off array entries formed */ 5397 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5398 b->donotstash = PETSC_FALSE; 5399 b->colmap = 0; 5400 b->garray = 0; 5401 b->roworiented = PETSC_TRUE; 5402 5403 /* stuff used for matrix vector multiply */ 5404 b->lvec = PETSC_NULL; 5405 b->Mvctx = PETSC_NULL; 5406 5407 /* stuff for MatGetRow() */ 5408 b->rowindices = 0; 5409 b->rowvalues = 0; 5410 b->getrowactive = PETSC_FALSE; 5411 5412 #if defined(PETSC_HAVE_SPOOLES) 5413 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5414 "MatGetFactor_mpiaij_spooles", 5415 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5416 #endif 5417 #if defined(PETSC_HAVE_MUMPS) 5418 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5419 "MatGetFactor_aij_mumps", 5420 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5421 #endif 5422 #if defined(PETSC_HAVE_PASTIX) 5423 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5424 "MatGetFactor_mpiaij_pastix", 5425 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5426 #endif 5427 #if defined(PETSC_HAVE_SUPERLU_DIST) 5428 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5429 "MatGetFactor_mpiaij_superlu_dist", 5430 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5431 #endif 5432 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5433 "MatStoreValues_MPIAIJ", 5434 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5435 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5436 "MatRetrieveValues_MPIAIJ", 5437 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5438 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5439 "MatGetDiagonalBlock_MPIAIJ", 5440 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5441 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5442 "MatIsTranspose_MPIAIJ", 5443 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5444 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5445 "MatMPIAIJSetPreallocation_MPIAIJ", 5446 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5447 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5448 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5449 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5450 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5451 "MatDiagonalScaleLocal_MPIAIJ", 5452 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5453 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5454 "MatConvert_MPIAIJ_MPIAIJPERM", 5455 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5456 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5457 "MatConvert_MPIAIJ_MPIAIJCRL", 5458 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5459 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5460 "MatConvert_MPIAIJ_MPISBAIJ", 5461 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5462 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5463 "MatMatMult_MPIDense_MPIAIJ", 5464 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5465 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5466 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5467 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5468 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5469 "MatMatMultNumeric_MPIDense_MPIAIJ", 5470 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5471 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5472 PetscFunctionReturn(0); 5473 } 5474 EXTERN_C_END 5475 5476 #undef __FUNCT__ 5477 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5478 /*@ 5479 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5480 and "off-diagonal" part of the matrix in CSR format. 5481 5482 Collective on MPI_Comm 5483 5484 Input Parameters: 5485 + comm - MPI communicator 5486 . m - number of local rows (Cannot be PETSC_DECIDE) 5487 . n - This value should be the same as the local size used in creating the 5488 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5489 calculated if N is given) For square matrices n is almost always m. 5490 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5491 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5492 . i - row indices for "diagonal" portion of matrix 5493 . j - column indices 5494 . a - matrix values 5495 . oi - row indices for "off-diagonal" portion of matrix 5496 . oj - column indices 5497 - oa - matrix values 5498 5499 Output Parameter: 5500 . mat - the matrix 5501 5502 Level: advanced 5503 5504 Notes: 5505 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5506 5507 The i and j indices are 0 based 5508 5509 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5510 5511 This sets local rows and cannot be used to set off-processor values. 5512 5513 You cannot later use MatSetValues() to change values in this matrix. 5514 5515 .keywords: matrix, aij, compressed row, sparse, parallel 5516 5517 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5518 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5519 @*/ 5520 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5521 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5522 { 5523 PetscErrorCode ierr; 5524 Mat_MPIAIJ *maij; 5525 5526 PetscFunctionBegin; 5527 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5528 if (i[0]) { 5529 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5530 } 5531 if (oi[0]) { 5532 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5533 } 5534 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5535 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5536 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5537 maij = (Mat_MPIAIJ*) (*mat)->data; 5538 maij->donotstash = PETSC_TRUE; 5539 (*mat)->preallocated = PETSC_TRUE; 5540 5541 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5542 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5543 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5544 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5545 5546 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5547 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5548 5549 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5550 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5551 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5552 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5553 5554 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5555 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5556 PetscFunctionReturn(0); 5557 } 5558 5559 /* 5560 Special version for direct calls from Fortran 5561 */ 5562 #include "private/fortranimpl.h" 5563 5564 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5565 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5566 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5567 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5568 #endif 5569 5570 /* Change these macros so can be used in void function */ 5571 #undef CHKERRQ 5572 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5573 #undef SETERRQ2 5574 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5575 #undef SETERRQ 5576 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5577 5578 EXTERN_C_BEGIN 5579 #undef __FUNCT__ 5580 #define __FUNCT__ "matsetvaluesmpiaij_" 5581 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5582 { 5583 Mat mat = *mmat; 5584 PetscInt m = *mm, n = *mn; 5585 InsertMode addv = *maddv; 5586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5587 PetscScalar value; 5588 PetscErrorCode ierr; 5589 5590 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5591 if (mat->insertmode == NOT_SET_VALUES) { 5592 mat->insertmode = addv; 5593 } 5594 #if defined(PETSC_USE_DEBUG) 5595 else if (mat->insertmode != addv) { 5596 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5597 } 5598 #endif 5599 { 5600 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5601 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5602 PetscBool roworiented = aij->roworiented; 5603 5604 /* Some Variables required in the macro */ 5605 Mat A = aij->A; 5606 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5607 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5608 MatScalar *aa = a->a; 5609 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5610 Mat B = aij->B; 5611 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5612 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5613 MatScalar *ba = b->a; 5614 5615 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5616 PetscInt nonew = a->nonew; 5617 MatScalar *ap1,*ap2; 5618 5619 PetscFunctionBegin; 5620 for (i=0; i<m; i++) { 5621 if (im[i] < 0) continue; 5622 #if defined(PETSC_USE_DEBUG) 5623 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5624 #endif 5625 if (im[i] >= rstart && im[i] < rend) { 5626 row = im[i] - rstart; 5627 lastcol1 = -1; 5628 rp1 = aj + ai[row]; 5629 ap1 = aa + ai[row]; 5630 rmax1 = aimax[row]; 5631 nrow1 = ailen[row]; 5632 low1 = 0; 5633 high1 = nrow1; 5634 lastcol2 = -1; 5635 rp2 = bj + bi[row]; 5636 ap2 = ba + bi[row]; 5637 rmax2 = bimax[row]; 5638 nrow2 = bilen[row]; 5639 low2 = 0; 5640 high2 = nrow2; 5641 5642 for (j=0; j<n; j++) { 5643 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5644 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5645 if (in[j] >= cstart && in[j] < cend){ 5646 col = in[j] - cstart; 5647 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5648 } else if (in[j] < 0) continue; 5649 #if defined(PETSC_USE_DEBUG) 5650 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5651 #endif 5652 else { 5653 if (mat->was_assembled) { 5654 if (!aij->colmap) { 5655 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5656 } 5657 #if defined (PETSC_USE_CTABLE) 5658 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5659 col--; 5660 #else 5661 col = aij->colmap[in[j]] - 1; 5662 #endif 5663 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5664 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5665 col = in[j]; 5666 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5667 B = aij->B; 5668 b = (Mat_SeqAIJ*)B->data; 5669 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5670 rp2 = bj + bi[row]; 5671 ap2 = ba + bi[row]; 5672 rmax2 = bimax[row]; 5673 nrow2 = bilen[row]; 5674 low2 = 0; 5675 high2 = nrow2; 5676 bm = aij->B->rmap->n; 5677 ba = b->a; 5678 } 5679 } else col = in[j]; 5680 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5681 } 5682 } 5683 } else { 5684 if (!aij->donotstash) { 5685 if (roworiented) { 5686 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5687 } else { 5688 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5689 } 5690 } 5691 } 5692 }} 5693 PetscFunctionReturnVoid(); 5694 } 5695 EXTERN_C_END 5696 5697