1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 415 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 642 source = lens + nrecvs; 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree(lens);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 Contributed by: Matthew Knepley 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultAdd_MPIAIJ" 733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 741 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 PetscTruth merged; 753 754 PetscFunctionBegin; 755 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 756 /* do nondiagonal part */ 757 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 758 if (!merged) { 759 /* send it on its way */ 760 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 761 /* do local part */ 762 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 763 /* receive remote parts: note this assumes the values are not actually */ 764 /* added in yy until the next line, */ 765 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 } else { 767 /* do local part */ 768 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 769 /* send it on its way */ 770 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 /* values actually were received in the Begin() but we need to call this nop */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 EXTERN_C_BEGIN 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 781 { 782 MPI_Comm comm; 783 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 784 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 785 IS Me,Notme; 786 PetscErrorCode ierr; 787 PetscInt M,N,first,last,*notme,i; 788 PetscMPIInt size; 789 790 PetscFunctionBegin; 791 792 /* Easy test: symmetric diagonal block */ 793 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 794 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 795 if (!*f) PetscFunctionReturn(0); 796 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 798 if (size == 1) PetscFunctionReturn(0); 799 800 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 801 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 802 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 803 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 804 for (i=0; i<first; i++) notme[i] = i; 805 for (i=last; i<M; i++) notme[i-last+first] = i; 806 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 807 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 808 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 809 Aoff = Aoffs[0]; 810 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 811 Boff = Boffs[0]; 812 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 813 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 815 ierr = ISDestroy(Me);CHKERRQ(ierr); 816 ierr = ISDestroy(Notme);CHKERRQ(ierr); 817 818 PetscFunctionReturn(0); 819 } 820 EXTERN_C_END 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 /* do nondiagonal part */ 831 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 832 /* send it on its way */ 833 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 834 /* do local part */ 835 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 836 /* receive remote parts */ 837 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 This only works correctly for square matrices where the subblock A->A is the 843 diagonal block 844 */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 848 { 849 PetscErrorCode ierr; 850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 851 852 PetscFunctionBegin; 853 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 854 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 855 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 856 } 857 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatScale_MPIAIJ" 863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 864 { 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 870 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDestroy_MPIAIJ" 876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 877 { 878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 #if defined(PETSC_USE_LOG) 883 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 884 #endif 885 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 886 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 887 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 888 #if defined (PETSC_USE_CTABLE) 889 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 890 #else 891 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 892 #endif 893 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 894 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 895 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 896 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 897 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 898 ierr = PetscFree(aij);CHKERRQ(ierr); 899 900 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPIAIJ_Binary" 913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 914 { 915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 916 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 917 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 920 int fd; 921 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 922 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 923 PetscScalar *column_values; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 927 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 928 nz = A->nz + B->nz; 929 if (!rank) { 930 header[0] = MAT_FILE_COOKIE; 931 header[1] = mat->rmap->N; 932 header[2] = mat->cmap->N; 933 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 936 /* get largest number of rows any processor has */ 937 rlen = mat->rmap->n; 938 range = mat->rmap->range; 939 for (i=1; i<size; i++) { 940 rlen = PetscMax(rlen,range[i+1] - range[i]); 941 } 942 } else { 943 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 944 rlen = mat->rmap->n; 945 } 946 947 /* load up the local row counts */ 948 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 949 for (i=0; i<mat->rmap->n; i++) { 950 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 951 } 952 953 /* store the row lengths to the file */ 954 if (!rank) { 955 MPI_Status status; 956 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 957 for (i=1; i<size; i++) { 958 rlen = range[i+1] - range[i]; 959 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 960 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 961 } 962 } else { 963 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 964 } 965 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 966 967 /* load up the local column indices */ 968 nzmax = nz; /* )th processor needs space a largest processor needs */ 969 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 971 cnt = 0; 972 for (i=0; i<mat->rmap->n; i++) { 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ( (col = garray[B->j[j]]) > cstart) break; 975 column_indices[cnt++] = col; 976 } 977 for (k=A->i[i]; k<A->i[i+1]; k++) { 978 column_indices[cnt++] = A->j[k] + cstart; 979 } 980 for (; j<B->i[i+1]; j++) { 981 column_indices[cnt++] = garray[B->j[j]]; 982 } 983 } 984 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 985 986 /* store the column indices to the file */ 987 if (!rank) { 988 MPI_Status status; 989 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 990 for (i=1; i<size; i++) { 991 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 993 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 995 } 996 } else { 997 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 998 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1001 1002 /* load up the local column values */ 1003 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1004 cnt = 0; 1005 for (i=0; i<mat->rmap->n; i++) { 1006 for (j=B->i[i]; j<B->i[i+1]; j++) { 1007 if ( garray[B->j[j]] > cstart) break; 1008 column_values[cnt++] = B->a[j]; 1009 } 1010 for (k=A->i[i]; k<A->i[i+1]; k++) { 1011 column_values[cnt++] = A->a[k]; 1012 } 1013 for (; j<B->i[i+1]; j++) { 1014 column_values[cnt++] = B->a[j]; 1015 } 1016 } 1017 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1018 1019 /* store the column values to the file */ 1020 if (!rank) { 1021 MPI_Status status; 1022 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 } else { 1030 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1031 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(column_values);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1040 { 1041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1042 PetscErrorCode ierr; 1043 PetscMPIInt rank = aij->rank,size = aij->size; 1044 PetscTruth isdraw,iascii,isbinary; 1045 PetscViewer sviewer; 1046 PetscViewerFormat format; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1052 if (iascii) { 1053 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1054 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1055 MatInfo info; 1056 PetscTruth inodes; 1057 1058 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1059 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1061 if (!inodes) { 1062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1063 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1064 } else { 1065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1066 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1067 } 1068 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1074 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscInt inodecount,inodelimit,*inodes; 1078 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1079 if (inodes) { 1080 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } 1088 } else if (isbinary) { 1089 if (size == 1) { 1090 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1091 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (isdraw) { 1097 PetscDraw draw; 1098 PetscTruth isnull; 1099 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1100 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1101 } 1102 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 /* assemble the entire matrix onto first processor. */ 1108 Mat A; 1109 Mat_SeqAIJ *Aloc; 1110 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1111 MatScalar *a; 1112 1113 if (mat->rmap->N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead"); 1114 1115 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1116 if (!rank) { 1117 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1118 } else { 1119 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1120 } 1121 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1122 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1123 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1125 1126 /* copy over the A part */ 1127 Aloc = (Mat_SeqAIJ*)aij->A->data; 1128 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1129 row = mat->rmap->rstart; 1130 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1131 for (i=0; i<m; i++) { 1132 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1133 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1134 } 1135 aj = Aloc->j; 1136 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1137 1138 /* copy over the B part */ 1139 Aloc = (Mat_SeqAIJ*)aij->B->data; 1140 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1141 row = mat->rmap->rstart; 1142 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1143 ct = cols; 1144 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1148 } 1149 ierr = PetscFree(ct);CHKERRQ(ierr); 1150 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1151 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1152 /* 1153 Everyone has to call to draw the matrix since the graphics waits are 1154 synchronized across all processors that share the PetscDraw object 1155 */ 1156 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1157 if (!rank) { 1158 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1159 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1160 } 1161 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1162 ierr = MatDestroy(A);CHKERRQ(ierr); 1163 } 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatView_MPIAIJ" 1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1170 { 1171 PetscErrorCode ierr; 1172 PetscTruth iascii,isdraw,issocket,isbinary; 1173 1174 PetscFunctionBegin; 1175 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1176 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1177 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1178 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1179 if (iascii || isdraw || isbinary || issocket) { 1180 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1181 } else { 1182 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatRelax_MPIAIJ" 1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1190 { 1191 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1192 PetscErrorCode ierr; 1193 Vec bb1; 1194 1195 PetscFunctionBegin; 1196 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1197 1198 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1199 if (flag & SOR_ZERO_INITIAL_GUESS) { 1200 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1201 its--; 1202 } 1203 1204 while (its--) { 1205 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 1208 /* update rhs: bb1 = bb - B*x */ 1209 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1210 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1211 1212 /* local sweep */ 1213 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1214 } 1215 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1216 if (flag & SOR_ZERO_INITIAL_GUESS) { 1217 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1218 its--; 1219 } 1220 while (its--) { 1221 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1222 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 1224 /* update rhs: bb1 = bb - B*x */ 1225 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1226 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1227 1228 /* local sweep */ 1229 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1230 } 1231 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1232 if (flag & SOR_ZERO_INITIAL_GUESS) { 1233 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1234 its--; 1235 } 1236 while (its--) { 1237 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 1240 /* update rhs: bb1 = bb - B*x */ 1241 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1242 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1243 1244 /* local sweep */ 1245 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1246 } 1247 } else { 1248 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1249 } 1250 1251 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatPermute_MPIAIJ" 1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1258 { 1259 MPI_Comm comm,pcomm; 1260 PetscInt first,local_size,nrows,*rows; 1261 int ntids; 1262 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1267 /* make a collective version of 'rowp' */ 1268 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1269 if (pcomm==comm) { 1270 crowp = rowp; 1271 } else { 1272 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1273 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1274 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1275 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1276 } 1277 /* collect the global row permutation and invert it */ 1278 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1279 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1280 if (pcomm!=comm) { 1281 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1282 } 1283 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1284 /* get the local target indices */ 1285 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1286 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1287 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1288 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1289 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1290 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1291 /* the column permutation is so much easier; 1292 make a local version of 'colp' and invert it */ 1293 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1294 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1295 if (ntids==1) { 1296 lcolp = colp; 1297 } else { 1298 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1299 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1300 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1301 } 1302 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1303 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1304 if (ntids>1) { 1305 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1306 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1307 } 1308 /* now we just get the submatrix */ 1309 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1310 /* clean up */ 1311 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1312 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1318 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1319 { 1320 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1321 Mat A = mat->A,B = mat->B; 1322 PetscErrorCode ierr; 1323 PetscReal isend[5],irecv[5]; 1324 1325 PetscFunctionBegin; 1326 info->block_size = 1.0; 1327 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1328 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1329 isend[3] = info->memory; isend[4] = info->mallocs; 1330 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1331 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1332 isend[3] += info->memory; isend[4] += info->mallocs; 1333 if (flag == MAT_LOCAL) { 1334 info->nz_used = isend[0]; 1335 info->nz_allocated = isend[1]; 1336 info->nz_unneeded = isend[2]; 1337 info->memory = isend[3]; 1338 info->mallocs = isend[4]; 1339 } else if (flag == MAT_GLOBAL_MAX) { 1340 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1341 info->nz_used = irecv[0]; 1342 info->nz_allocated = irecv[1]; 1343 info->nz_unneeded = irecv[2]; 1344 info->memory = irecv[3]; 1345 info->mallocs = irecv[4]; 1346 } else if (flag == MAT_GLOBAL_SUM) { 1347 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1348 info->nz_used = irecv[0]; 1349 info->nz_allocated = irecv[1]; 1350 info->nz_unneeded = irecv[2]; 1351 info->memory = irecv[3]; 1352 info->mallocs = irecv[4]; 1353 } 1354 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1355 info->fill_ratio_needed = 0; 1356 info->factor_mallocs = 0; 1357 info->rows_global = (double)matin->rmap->N; 1358 info->columns_global = (double)matin->cmap->N; 1359 info->rows_local = (double)matin->rmap->n; 1360 info->columns_local = (double)matin->cmap->N; 1361 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatSetOption_MPIAIJ" 1367 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1368 { 1369 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1370 PetscErrorCode ierr; 1371 1372 PetscFunctionBegin; 1373 switch (op) { 1374 case MAT_NEW_NONZERO_LOCATIONS: 1375 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1376 case MAT_KEEP_ZEROED_ROWS: 1377 case MAT_NEW_NONZERO_LOCATION_ERR: 1378 case MAT_USE_INODES: 1379 case MAT_IGNORE_ZERO_ENTRIES: 1380 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1381 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1382 break; 1383 case MAT_ROW_ORIENTED: 1384 a->roworiented = flg; 1385 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1386 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1387 break; 1388 case MAT_NEW_DIAGONALS: 1389 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1390 break; 1391 case MAT_IGNORE_OFF_PROC_ENTRIES: 1392 a->donotstash = PETSC_TRUE; 1393 break; 1394 case MAT_SYMMETRIC: 1395 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1396 break; 1397 case MAT_STRUCTURALLY_SYMMETRIC: 1398 case MAT_HERMITIAN: 1399 case MAT_SYMMETRY_ETERNAL: 1400 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1401 break; 1402 default: 1403 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1404 } 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatGetRow_MPIAIJ" 1410 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1411 { 1412 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1413 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1414 PetscErrorCode ierr; 1415 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1416 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1417 PetscInt *cmap,*idx_p; 1418 1419 PetscFunctionBegin; 1420 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1421 mat->getrowactive = PETSC_TRUE; 1422 1423 if (!mat->rowvalues && (idx || v)) { 1424 /* 1425 allocate enough space to hold information from the longest row. 1426 */ 1427 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1428 PetscInt max = 1,tmp; 1429 for (i=0; i<matin->rmap->n; i++) { 1430 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1431 if (max < tmp) { max = tmp; } 1432 } 1433 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1434 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1435 } 1436 1437 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1438 lrow = row - rstart; 1439 1440 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1441 if (!v) {pvA = 0; pvB = 0;} 1442 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1443 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1444 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1445 nztot = nzA + nzB; 1446 1447 cmap = mat->garray; 1448 if (v || idx) { 1449 if (nztot) { 1450 /* Sort by increasing column numbers, assuming A and B already sorted */ 1451 PetscInt imark = -1; 1452 if (v) { 1453 *v = v_p = mat->rowvalues; 1454 for (i=0; i<nzB; i++) { 1455 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1456 else break; 1457 } 1458 imark = i; 1459 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1460 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1461 } 1462 if (idx) { 1463 *idx = idx_p = mat->rowindices; 1464 if (imark > -1) { 1465 for (i=0; i<imark; i++) { 1466 idx_p[i] = cmap[cworkB[i]]; 1467 } 1468 } else { 1469 for (i=0; i<nzB; i++) { 1470 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1471 else break; 1472 } 1473 imark = i; 1474 } 1475 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1476 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1477 } 1478 } else { 1479 if (idx) *idx = 0; 1480 if (v) *v = 0; 1481 } 1482 } 1483 *nz = nztot; 1484 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1485 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1491 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1492 { 1493 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1494 1495 PetscFunctionBegin; 1496 if (!aij->getrowactive) { 1497 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1498 } 1499 aij->getrowactive = PETSC_FALSE; 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatNorm_MPIAIJ" 1505 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1506 { 1507 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1508 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1509 PetscErrorCode ierr; 1510 PetscInt i,j,cstart = mat->cmap->rstart; 1511 PetscReal sum = 0.0; 1512 MatScalar *v; 1513 1514 PetscFunctionBegin; 1515 if (aij->size == 1) { 1516 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1517 } else { 1518 if (type == NORM_FROBENIUS) { 1519 v = amat->a; 1520 for (i=0; i<amat->nz; i++) { 1521 #if defined(PETSC_USE_COMPLEX) 1522 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1523 #else 1524 sum += (*v)*(*v); v++; 1525 #endif 1526 } 1527 v = bmat->a; 1528 for (i=0; i<bmat->nz; i++) { 1529 #if defined(PETSC_USE_COMPLEX) 1530 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1531 #else 1532 sum += (*v)*(*v); v++; 1533 #endif 1534 } 1535 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1536 *norm = sqrt(*norm); 1537 } else if (type == NORM_1) { /* max column norm */ 1538 PetscReal *tmp,*tmp2; 1539 PetscInt *jj,*garray = aij->garray; 1540 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1541 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1542 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1543 *norm = 0.0; 1544 v = amat->a; jj = amat->j; 1545 for (j=0; j<amat->nz; j++) { 1546 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1547 } 1548 v = bmat->a; jj = bmat->j; 1549 for (j=0; j<bmat->nz; j++) { 1550 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1551 } 1552 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1553 for (j=0; j<mat->cmap->N; j++) { 1554 if (tmp2[j] > *norm) *norm = tmp2[j]; 1555 } 1556 ierr = PetscFree(tmp);CHKERRQ(ierr); 1557 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1558 } else if (type == NORM_INFINITY) { /* max row norm */ 1559 PetscReal ntemp = 0.0; 1560 for (j=0; j<aij->A->rmap->n; j++) { 1561 v = amat->a + amat->i[j]; 1562 sum = 0.0; 1563 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1564 sum += PetscAbsScalar(*v); v++; 1565 } 1566 v = bmat->a + bmat->i[j]; 1567 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1568 sum += PetscAbsScalar(*v); v++; 1569 } 1570 if (sum > ntemp) ntemp = sum; 1571 } 1572 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1573 } else { 1574 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1575 } 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatTranspose_MPIAIJ" 1582 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1583 { 1584 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1585 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1586 PetscErrorCode ierr; 1587 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1588 PetscInt cstart=A->cmap->rstart,ncol; 1589 Mat B; 1590 MatScalar *array; 1591 1592 PetscFunctionBegin; 1593 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1594 1595 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1596 ai = Aloc->i; aj = Aloc->j; 1597 bi = Bloc->i; bj = Bloc->j; 1598 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1599 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1600 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1601 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1602 for (i=0; i<ai[ma]; i++){ 1603 d_nnz[aj[i]] ++; 1604 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1605 } 1606 1607 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1608 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1609 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1610 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1611 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1612 } else { 1613 B = *matout; 1614 } 1615 1616 /* copy over the A part */ 1617 array = Aloc->a; 1618 row = A->rmap->rstart; 1619 for (i=0; i<ma; i++) { 1620 ncol = ai[i+1]-ai[i]; 1621 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1622 row++; array += ncol; aj += ncol; 1623 } 1624 aj = Aloc->j; 1625 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1626 1627 /* copy over the B part */ 1628 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1629 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1630 array = Bloc->a; 1631 row = A->rmap->rstart; 1632 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1633 cols_tmp = cols; 1634 for (i=0; i<mb; i++) { 1635 ncol = bi[i+1]-bi[i]; 1636 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1637 row++; array += ncol; cols_tmp += ncol; 1638 } 1639 ierr = PetscFree(cols);CHKERRQ(ierr); 1640 1641 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1642 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1643 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1644 *matout = B; 1645 } else { 1646 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1647 } 1648 PetscFunctionReturn(0); 1649 } 1650 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1653 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1654 { 1655 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1656 Mat a = aij->A,b = aij->B; 1657 PetscErrorCode ierr; 1658 PetscInt s1,s2,s3; 1659 1660 PetscFunctionBegin; 1661 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1662 if (rr) { 1663 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1664 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1665 /* Overlap communication with computation. */ 1666 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 } 1668 if (ll) { 1669 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1670 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1671 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1672 } 1673 /* scale the diagonal block */ 1674 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1675 1676 if (rr) { 1677 /* Do a scatter end and then right scale the off-diagonal block */ 1678 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1680 } 1681 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1687 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1688 { 1689 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1690 PetscErrorCode ierr; 1691 1692 PetscFunctionBegin; 1693 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1694 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1699 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1700 { 1701 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1702 PetscErrorCode ierr; 1703 1704 PetscFunctionBegin; 1705 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatEqual_MPIAIJ" 1711 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1712 { 1713 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1714 Mat a,b,c,d; 1715 PetscTruth flg; 1716 PetscErrorCode ierr; 1717 1718 PetscFunctionBegin; 1719 a = matA->A; b = matA->B; 1720 c = matB->A; d = matB->B; 1721 1722 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1723 if (flg) { 1724 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1725 } 1726 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatCopy_MPIAIJ" 1732 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1733 { 1734 PetscErrorCode ierr; 1735 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1736 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1737 1738 PetscFunctionBegin; 1739 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1740 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1741 /* because of the column compression in the off-processor part of the matrix a->B, 1742 the number of columns in a->B and b->B may be different, hence we cannot call 1743 the MatCopy() directly on the two parts. If need be, we can provide a more 1744 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1745 then copying the submatrices */ 1746 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1747 } else { 1748 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1749 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1750 } 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1756 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1757 { 1758 PetscErrorCode ierr; 1759 1760 PetscFunctionBegin; 1761 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1762 PetscFunctionReturn(0); 1763 } 1764 1765 #include "petscblaslapack.h" 1766 #undef __FUNCT__ 1767 #define __FUNCT__ "MatAXPY_MPIAIJ" 1768 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1769 { 1770 PetscErrorCode ierr; 1771 PetscInt i; 1772 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1773 PetscBLASInt bnz,one=1; 1774 Mat_SeqAIJ *x,*y; 1775 1776 PetscFunctionBegin; 1777 if (str == SAME_NONZERO_PATTERN) { 1778 PetscScalar alpha = a; 1779 x = (Mat_SeqAIJ *)xx->A->data; 1780 y = (Mat_SeqAIJ *)yy->A->data; 1781 bnz = PetscBLASIntCast(x->nz); 1782 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1783 x = (Mat_SeqAIJ *)xx->B->data; 1784 y = (Mat_SeqAIJ *)yy->B->data; 1785 bnz = PetscBLASIntCast(x->nz); 1786 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1787 } else if (str == SUBSET_NONZERO_PATTERN) { 1788 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1789 1790 x = (Mat_SeqAIJ *)xx->B->data; 1791 y = (Mat_SeqAIJ *)yy->B->data; 1792 if (y->xtoy && y->XtoY != xx->B) { 1793 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1794 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1795 } 1796 if (!y->xtoy) { /* get xtoy */ 1797 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1798 y->XtoY = xx->B; 1799 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1800 } 1801 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1802 } else { 1803 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1804 } 1805 PetscFunctionReturn(0); 1806 } 1807 1808 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatConjugate_MPIAIJ" 1812 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1813 { 1814 #if defined(PETSC_USE_COMPLEX) 1815 PetscErrorCode ierr; 1816 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1817 1818 PetscFunctionBegin; 1819 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1820 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1821 #else 1822 PetscFunctionBegin; 1823 #endif 1824 PetscFunctionReturn(0); 1825 } 1826 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "MatRealPart_MPIAIJ" 1829 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1830 { 1831 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1832 PetscErrorCode ierr; 1833 1834 PetscFunctionBegin; 1835 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1836 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1837 PetscFunctionReturn(0); 1838 } 1839 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1842 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1843 { 1844 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1845 PetscErrorCode ierr; 1846 1847 PetscFunctionBegin; 1848 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1849 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1850 PetscFunctionReturn(0); 1851 } 1852 1853 #ifdef PETSC_HAVE_PBGL 1854 1855 #include <boost/parallel/mpi/bsp_process_group.hpp> 1856 #include <boost/graph/distributed/ilu_default_graph.hpp> 1857 #include <boost/graph/distributed/ilu_0_block.hpp> 1858 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1859 #include <boost/graph/distributed/petsc/interface.hpp> 1860 #include <boost/multi_array.hpp> 1861 #include <boost/parallel/distributed_property_map->hpp> 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1865 /* 1866 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1867 */ 1868 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, MatFactorInfo *info) 1869 { 1870 namespace petsc = boost::distributed::petsc; 1871 1872 namespace graph_dist = boost::graph::distributed; 1873 using boost::graph::distributed::ilu_default::process_group_type; 1874 using boost::graph::ilu_permuted; 1875 1876 PetscTruth row_identity, col_identity; 1877 PetscContainer c; 1878 PetscInt m, n, M, N; 1879 PetscErrorCode ierr; 1880 1881 PetscFunctionBegin; 1882 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1883 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1884 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1885 if (!row_identity || !col_identity) { 1886 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1887 } 1888 1889 process_group_type pg; 1890 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1891 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1892 lgraph_type& level_graph = *lgraph_p; 1893 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1894 1895 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1896 ilu_permuted(level_graph); 1897 1898 /* put together the new matrix */ 1899 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1900 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1901 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1902 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1903 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1904 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1905 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1906 1907 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1908 ierr = PetscContainerSetPointer(c, lgraph_p); 1909 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1910 PetscFunctionReturn(0); 1911 } 1912 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1915 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, MatFactorInfo *info) 1916 { 1917 PetscFunctionBegin; 1918 PetscFunctionReturn(0); 1919 } 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatSolve_MPIAIJ" 1923 /* 1924 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1925 */ 1926 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1927 { 1928 namespace graph_dist = boost::graph::distributed; 1929 1930 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1931 lgraph_type* lgraph_p; 1932 PetscContainer c; 1933 PetscErrorCode ierr; 1934 1935 PetscFunctionBegin; 1936 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1937 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1938 ierr = VecCopy(b, x); CHKERRQ(ierr); 1939 1940 PetscScalar* array_x; 1941 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1942 PetscInt sx; 1943 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1944 1945 PetscScalar* array_b; 1946 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1947 PetscInt sb; 1948 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1949 1950 lgraph_type& level_graph = *lgraph_p; 1951 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1952 1953 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1954 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1955 ref_x(array_x, boost::extents[num_vertices(graph)]); 1956 1957 typedef boost::iterator_property_map<array_ref_type::iterator, 1958 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1959 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1960 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1961 1962 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1963 1964 PetscFunctionReturn(0); 1965 } 1966 #endif 1967 1968 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1969 PetscInt nzlocal,nsends,nrecvs; 1970 PetscMPIInt *send_rank; 1971 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1972 PetscScalar *sbuf_a,**rbuf_a; 1973 PetscErrorCode (*MatDestroy)(Mat); 1974 } Mat_Redundant; 1975 1976 #undef __FUNCT__ 1977 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1978 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1979 { 1980 PetscErrorCode ierr; 1981 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1982 PetscInt i; 1983 1984 PetscFunctionBegin; 1985 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1986 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1987 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1988 for (i=0; i<redund->nrecvs; i++){ 1989 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1990 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1991 } 1992 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1993 ierr = PetscFree(redund);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatDestroy_MatRedundant" 1999 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2000 { 2001 PetscErrorCode ierr; 2002 PetscContainer container; 2003 Mat_Redundant *redund=PETSC_NULL; 2004 2005 PetscFunctionBegin; 2006 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2007 if (container) { 2008 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2009 } else { 2010 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2011 } 2012 A->ops->destroy = redund->MatDestroy; 2013 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2014 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2015 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2016 PetscFunctionReturn(0); 2017 } 2018 2019 #undef __FUNCT__ 2020 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2021 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2022 { 2023 PetscMPIInt rank,size; 2024 MPI_Comm comm=((PetscObject)mat)->comm; 2025 PetscErrorCode ierr; 2026 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2027 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2028 PetscInt *rowrange=mat->rmap->range; 2029 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2030 Mat A=aij->A,B=aij->B,C=*matredundant; 2031 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2032 PetscScalar *sbuf_a; 2033 PetscInt nzlocal=a->nz+b->nz; 2034 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2035 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2036 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2037 MatScalar *aworkA,*aworkB; 2038 PetscScalar *vals; 2039 PetscMPIInt tag1,tag2,tag3,imdex; 2040 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2041 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2042 MPI_Status recv_status,*send_status; 2043 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2044 PetscInt **rbuf_j=PETSC_NULL; 2045 PetscScalar **rbuf_a=PETSC_NULL; 2046 Mat_Redundant *redund=PETSC_NULL; 2047 PetscContainer container; 2048 2049 PetscFunctionBegin; 2050 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2051 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2052 2053 if (reuse == MAT_REUSE_MATRIX) { 2054 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2055 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2056 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2057 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2058 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2059 if (container) { 2060 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2061 } else { 2062 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2063 } 2064 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2065 2066 nsends = redund->nsends; 2067 nrecvs = redund->nrecvs; 2068 send_rank = redund->send_rank; recv_rank = send_rank + size; 2069 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2070 sbuf_j = redund->sbuf_j; 2071 sbuf_a = redund->sbuf_a; 2072 rbuf_j = redund->rbuf_j; 2073 rbuf_a = redund->rbuf_a; 2074 } 2075 2076 if (reuse == MAT_INITIAL_MATRIX){ 2077 PetscMPIInt subrank,subsize; 2078 PetscInt nleftover,np_subcomm; 2079 /* get the destination processors' id send_rank, nsends and nrecvs */ 2080 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2081 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2082 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2083 recv_rank = send_rank + size; 2084 np_subcomm = size/nsubcomm; 2085 nleftover = size - nsubcomm*np_subcomm; 2086 nsends = 0; nrecvs = 0; 2087 for (i=0; i<size; i++){ /* i=rank*/ 2088 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2089 send_rank[nsends] = i; nsends++; 2090 recv_rank[nrecvs++] = i; 2091 } 2092 } 2093 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2094 i = size-nleftover-1; 2095 j = 0; 2096 while (j < nsubcomm - nleftover){ 2097 send_rank[nsends++] = i; 2098 i--; j++; 2099 } 2100 } 2101 2102 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2103 for (i=0; i<nleftover; i++){ 2104 recv_rank[nrecvs++] = size-nleftover+i; 2105 } 2106 } 2107 2108 /* allocate sbuf_j, sbuf_a */ 2109 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2110 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2111 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2112 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2113 2114 /* copy mat's local entries into the buffers */ 2115 if (reuse == MAT_INITIAL_MATRIX){ 2116 rownz_max = 0; 2117 rptr = sbuf_j; 2118 cols = sbuf_j + rend-rstart + 1; 2119 vals = sbuf_a; 2120 rptr[0] = 0; 2121 for (i=0; i<rend-rstart; i++){ 2122 row = i + rstart; 2123 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2124 ncols = nzA + nzB; 2125 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2126 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2127 /* load the column indices for this row into cols */ 2128 lwrite = 0; 2129 for (l=0; l<nzB; l++) { 2130 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2131 vals[lwrite] = aworkB[l]; 2132 cols[lwrite++] = ctmp; 2133 } 2134 } 2135 for (l=0; l<nzA; l++){ 2136 vals[lwrite] = aworkA[l]; 2137 cols[lwrite++] = cstart + cworkA[l]; 2138 } 2139 for (l=0; l<nzB; l++) { 2140 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2141 vals[lwrite] = aworkB[l]; 2142 cols[lwrite++] = ctmp; 2143 } 2144 } 2145 vals += ncols; 2146 cols += ncols; 2147 rptr[i+1] = rptr[i] + ncols; 2148 if (rownz_max < ncols) rownz_max = ncols; 2149 } 2150 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2151 } else { /* only copy matrix values into sbuf_a */ 2152 rptr = sbuf_j; 2153 vals = sbuf_a; 2154 rptr[0] = 0; 2155 for (i=0; i<rend-rstart; i++){ 2156 row = i + rstart; 2157 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2158 ncols = nzA + nzB; 2159 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2160 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2161 lwrite = 0; 2162 for (l=0; l<nzB; l++) { 2163 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2164 } 2165 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2166 for (l=0; l<nzB; l++) { 2167 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2168 } 2169 vals += ncols; 2170 rptr[i+1] = rptr[i] + ncols; 2171 } 2172 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2173 2174 /* send nzlocal to others, and recv other's nzlocal */ 2175 /*--------------------------------------------------*/ 2176 if (reuse == MAT_INITIAL_MATRIX){ 2177 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2178 s_waits2 = s_waits3 + nsends; 2179 s_waits1 = s_waits2 + nsends; 2180 r_waits1 = s_waits1 + nsends; 2181 r_waits2 = r_waits1 + nrecvs; 2182 r_waits3 = r_waits2 + nrecvs; 2183 } else { 2184 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2185 r_waits3 = s_waits3 + nsends; 2186 } 2187 2188 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2189 if (reuse == MAT_INITIAL_MATRIX){ 2190 /* get new tags to keep the communication clean */ 2191 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2192 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2193 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2194 rbuf_nz = sbuf_nz + nsends; 2195 2196 /* post receives of other's nzlocal */ 2197 for (i=0; i<nrecvs; i++){ 2198 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2199 } 2200 /* send nzlocal to others */ 2201 for (i=0; i<nsends; i++){ 2202 sbuf_nz[i] = nzlocal; 2203 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2204 } 2205 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2206 count = nrecvs; 2207 while (count) { 2208 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2209 recv_rank[imdex] = recv_status.MPI_SOURCE; 2210 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2211 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2212 2213 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2214 rbuf_nz[imdex] += i + 2; 2215 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2216 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2217 count--; 2218 } 2219 /* wait on sends of nzlocal */ 2220 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2221 /* send mat->i,j to others, and recv from other's */ 2222 /*------------------------------------------------*/ 2223 for (i=0; i<nsends; i++){ 2224 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2225 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2226 } 2227 /* wait on receives of mat->i,j */ 2228 /*------------------------------*/ 2229 count = nrecvs; 2230 while (count) { 2231 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2232 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2233 count--; 2234 } 2235 /* wait on sends of mat->i,j */ 2236 /*---------------------------*/ 2237 if (nsends) { 2238 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2239 } 2240 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2241 2242 /* post receives, send and receive mat->a */ 2243 /*----------------------------------------*/ 2244 for (imdex=0; imdex<nrecvs; imdex++) { 2245 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2246 } 2247 for (i=0; i<nsends; i++){ 2248 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2249 } 2250 count = nrecvs; 2251 while (count) { 2252 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2253 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2254 count--; 2255 } 2256 if (nsends) { 2257 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2258 } 2259 2260 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2261 2262 /* create redundant matrix */ 2263 /*-------------------------*/ 2264 if (reuse == MAT_INITIAL_MATRIX){ 2265 /* compute rownz_max for preallocation */ 2266 for (imdex=0; imdex<nrecvs; imdex++){ 2267 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2268 rptr = rbuf_j[imdex]; 2269 for (i=0; i<j; i++){ 2270 ncols = rptr[i+1] - rptr[i]; 2271 if (rownz_max < ncols) rownz_max = ncols; 2272 } 2273 } 2274 2275 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2276 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2277 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2278 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2279 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2280 } else { 2281 C = *matredundant; 2282 } 2283 2284 /* insert local matrix entries */ 2285 rptr = sbuf_j; 2286 cols = sbuf_j + rend-rstart + 1; 2287 vals = sbuf_a; 2288 for (i=0; i<rend-rstart; i++){ 2289 row = i + rstart; 2290 ncols = rptr[i+1] - rptr[i]; 2291 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2292 vals += ncols; 2293 cols += ncols; 2294 } 2295 /* insert received matrix entries */ 2296 for (imdex=0; imdex<nrecvs; imdex++){ 2297 rstart = rowrange[recv_rank[imdex]]; 2298 rend = rowrange[recv_rank[imdex]+1]; 2299 rptr = rbuf_j[imdex]; 2300 cols = rbuf_j[imdex] + rend-rstart + 1; 2301 vals = rbuf_a[imdex]; 2302 for (i=0; i<rend-rstart; i++){ 2303 row = i + rstart; 2304 ncols = rptr[i+1] - rptr[i]; 2305 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2306 vals += ncols; 2307 cols += ncols; 2308 } 2309 } 2310 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2311 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2312 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2313 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2314 if (reuse == MAT_INITIAL_MATRIX){ 2315 PetscContainer container; 2316 *matredundant = C; 2317 /* create a supporting struct and attach it to C for reuse */ 2318 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2319 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2320 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2321 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2322 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2323 2324 redund->nzlocal = nzlocal; 2325 redund->nsends = nsends; 2326 redund->nrecvs = nrecvs; 2327 redund->send_rank = send_rank; 2328 redund->sbuf_nz = sbuf_nz; 2329 redund->sbuf_j = sbuf_j; 2330 redund->sbuf_a = sbuf_a; 2331 redund->rbuf_j = rbuf_j; 2332 redund->rbuf_a = rbuf_a; 2333 2334 redund->MatDestroy = C->ops->destroy; 2335 C->ops->destroy = MatDestroy_MatRedundant; 2336 } 2337 PetscFunctionReturn(0); 2338 } 2339 2340 #undef __FUNCT__ 2341 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2342 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2343 { 2344 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2345 PetscErrorCode ierr; 2346 PetscInt i,*idxb = 0; 2347 PetscScalar *va,*vb; 2348 Vec vtmp; 2349 2350 PetscFunctionBegin; 2351 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2352 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2353 if (idx) { 2354 for (i=0; i<A->cmap->n; i++) { 2355 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2356 } 2357 } 2358 2359 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2360 if (idx) { 2361 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2362 } 2363 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2364 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2365 2366 for (i=0; i<A->rmap->n; i++){ 2367 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2368 va[i] = vb[i]; 2369 if (idx) idx[i] = a->garray[idxb[i]]; 2370 } 2371 } 2372 2373 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2374 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2375 if (idxb) { 2376 ierr = PetscFree(idxb);CHKERRQ(ierr); 2377 } 2378 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 #undef __FUNCT__ 2383 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2384 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2385 { 2386 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2387 PetscInt n = A->rmap->n; 2388 PetscInt cstart = A->cmap->rstart; 2389 PetscInt *cmap = mat->garray; 2390 PetscInt *diagIdx, *offdiagIdx; 2391 Vec diagV, offdiagV; 2392 PetscScalar *a, *diagA, *offdiagA; 2393 PetscInt r; 2394 PetscErrorCode ierr; 2395 2396 PetscFunctionBegin; 2397 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2398 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2399 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2400 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2401 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2402 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2403 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2404 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2405 for(r = 0; r < n; ++r) { 2406 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2407 a[r] = diagA[r]; 2408 idx[r] = cstart + diagIdx[r]; 2409 } else { 2410 a[r] = offdiagA[r]; 2411 idx[r] = cmap[offdiagIdx[r]]; 2412 } 2413 } 2414 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2415 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2416 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2417 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2418 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2419 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 #undef __FUNCT__ 2424 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2425 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2426 { 2427 PetscErrorCode ierr; 2428 2429 PetscFunctionBegin; 2430 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2431 PetscFunctionReturn(0); 2432 } 2433 2434 /* -------------------------------------------------------------------*/ 2435 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2436 MatGetRow_MPIAIJ, 2437 MatRestoreRow_MPIAIJ, 2438 MatMult_MPIAIJ, 2439 /* 4*/ MatMultAdd_MPIAIJ, 2440 MatMultTranspose_MPIAIJ, 2441 MatMultTransposeAdd_MPIAIJ, 2442 #ifdef PETSC_HAVE_PBGL 2443 MatSolve_MPIAIJ, 2444 #else 2445 0, 2446 #endif 2447 0, 2448 0, 2449 /*10*/ 0, 2450 0, 2451 0, 2452 MatRelax_MPIAIJ, 2453 MatTranspose_MPIAIJ, 2454 /*15*/ MatGetInfo_MPIAIJ, 2455 MatEqual_MPIAIJ, 2456 MatGetDiagonal_MPIAIJ, 2457 MatDiagonalScale_MPIAIJ, 2458 MatNorm_MPIAIJ, 2459 /*20*/ MatAssemblyBegin_MPIAIJ, 2460 MatAssemblyEnd_MPIAIJ, 2461 0, 2462 MatSetOption_MPIAIJ, 2463 MatZeroEntries_MPIAIJ, 2464 /*25*/ MatZeroRows_MPIAIJ, 2465 0, 2466 #ifdef PETSC_HAVE_PBGL 2467 0, 2468 #else 2469 0, 2470 #endif 2471 0, 2472 0, 2473 /*30*/ MatSetUpPreallocation_MPIAIJ, 2474 #ifdef PETSC_HAVE_PBGL 2475 0, 2476 #else 2477 0, 2478 #endif 2479 0, 2480 0, 2481 0, 2482 /*35*/ MatDuplicate_MPIAIJ, 2483 0, 2484 0, 2485 0, 2486 0, 2487 /*40*/ MatAXPY_MPIAIJ, 2488 MatGetSubMatrices_MPIAIJ, 2489 MatIncreaseOverlap_MPIAIJ, 2490 MatGetValues_MPIAIJ, 2491 MatCopy_MPIAIJ, 2492 /*45*/ 0, 2493 MatScale_MPIAIJ, 2494 0, 2495 0, 2496 0, 2497 /*50*/ MatSetBlockSize_MPIAIJ, 2498 0, 2499 0, 2500 0, 2501 0, 2502 /*55*/ MatFDColoringCreate_MPIAIJ, 2503 0, 2504 MatSetUnfactored_MPIAIJ, 2505 MatPermute_MPIAIJ, 2506 0, 2507 /*60*/ MatGetSubMatrix_MPIAIJ, 2508 MatDestroy_MPIAIJ, 2509 MatView_MPIAIJ, 2510 0, 2511 0, 2512 /*65*/ 0, 2513 0, 2514 0, 2515 0, 2516 0, 2517 /*70*/ MatGetRowMaxAbs_MPIAIJ, 2518 0, 2519 MatSetColoring_MPIAIJ, 2520 #if defined(PETSC_HAVE_ADIC) 2521 MatSetValuesAdic_MPIAIJ, 2522 #else 2523 0, 2524 #endif 2525 MatSetValuesAdifor_MPIAIJ, 2526 /*75*/ 0, 2527 0, 2528 0, 2529 0, 2530 0, 2531 /*80*/ 0, 2532 0, 2533 0, 2534 0, 2535 /*84*/ MatLoad_MPIAIJ, 2536 0, 2537 0, 2538 0, 2539 0, 2540 0, 2541 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2542 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2543 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2544 MatPtAP_Basic, 2545 MatPtAPSymbolic_MPIAIJ, 2546 /*95*/ MatPtAPNumeric_MPIAIJ, 2547 0, 2548 0, 2549 0, 2550 0, 2551 /*100*/0, 2552 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2553 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2554 MatConjugate_MPIAIJ, 2555 0, 2556 /*105*/MatSetValuesRow_MPIAIJ, 2557 MatRealPart_MPIAIJ, 2558 MatImaginaryPart_MPIAIJ, 2559 0, 2560 0, 2561 /*110*/0, 2562 MatGetRedundantMatrix_MPIAIJ, 2563 MatGetRowMin_MPIAIJ, 2564 0, 2565 0, 2566 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2567 2568 /* ----------------------------------------------------------------------------------------*/ 2569 2570 EXTERN_C_BEGIN 2571 #undef __FUNCT__ 2572 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2573 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2574 { 2575 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2576 PetscErrorCode ierr; 2577 2578 PetscFunctionBegin; 2579 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2580 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2581 PetscFunctionReturn(0); 2582 } 2583 EXTERN_C_END 2584 2585 EXTERN_C_BEGIN 2586 #undef __FUNCT__ 2587 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2588 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2589 { 2590 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2591 PetscErrorCode ierr; 2592 2593 PetscFunctionBegin; 2594 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2595 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2596 PetscFunctionReturn(0); 2597 } 2598 EXTERN_C_END 2599 2600 #include "petscpc.h" 2601 EXTERN_C_BEGIN 2602 #undef __FUNCT__ 2603 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2604 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2605 { 2606 Mat_MPIAIJ *b; 2607 PetscErrorCode ierr; 2608 PetscInt i; 2609 2610 PetscFunctionBegin; 2611 B->preallocated = PETSC_TRUE; 2612 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2613 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2614 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2615 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2616 2617 B->rmap->bs = B->cmap->bs = 1; 2618 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2619 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2620 if (d_nnz) { 2621 for (i=0; i<B->rmap->n; i++) { 2622 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2623 } 2624 } 2625 if (o_nnz) { 2626 for (i=0; i<B->rmap->n; i++) { 2627 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2628 } 2629 } 2630 b = (Mat_MPIAIJ*)B->data; 2631 2632 /* Explicitly create 2 MATSEQAIJ matrices. */ 2633 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2634 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2635 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2636 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2637 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2638 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2639 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2640 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2641 2642 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2643 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2644 2645 PetscFunctionReturn(0); 2646 } 2647 EXTERN_C_END 2648 2649 #undef __FUNCT__ 2650 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2651 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2652 { 2653 Mat mat; 2654 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2655 PetscErrorCode ierr; 2656 2657 PetscFunctionBegin; 2658 *newmat = 0; 2659 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2660 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2661 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2662 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2663 a = (Mat_MPIAIJ*)mat->data; 2664 2665 mat->factor = matin->factor; 2666 mat->rmap->bs = matin->rmap->bs; 2667 mat->assembled = PETSC_TRUE; 2668 mat->insertmode = NOT_SET_VALUES; 2669 mat->preallocated = PETSC_TRUE; 2670 2671 a->size = oldmat->size; 2672 a->rank = oldmat->rank; 2673 a->donotstash = oldmat->donotstash; 2674 a->roworiented = oldmat->roworiented; 2675 a->rowindices = 0; 2676 a->rowvalues = 0; 2677 a->getrowactive = PETSC_FALSE; 2678 2679 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2680 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2681 2682 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2683 if (oldmat->colmap) { 2684 #if defined (PETSC_USE_CTABLE) 2685 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2686 #else 2687 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2688 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2689 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2690 #endif 2691 } else a->colmap = 0; 2692 if (oldmat->garray) { 2693 PetscInt len; 2694 len = oldmat->B->cmap->n; 2695 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2696 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2697 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2698 } else a->garray = 0; 2699 2700 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2701 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2702 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2703 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2704 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2705 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2706 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2707 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2708 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2709 *newmat = mat; 2710 PetscFunctionReturn(0); 2711 } 2712 2713 #include "petscsys.h" 2714 2715 #undef __FUNCT__ 2716 #define __FUNCT__ "MatLoad_MPIAIJ" 2717 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2718 { 2719 Mat A; 2720 PetscScalar *vals,*svals; 2721 MPI_Comm comm = ((PetscObject)viewer)->comm; 2722 MPI_Status status; 2723 PetscErrorCode ierr; 2724 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2725 PetscInt i,nz,j,rstart,rend,mmax; 2726 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2727 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2728 PetscInt cend,cstart,n,*rowners; 2729 int fd; 2730 2731 PetscFunctionBegin; 2732 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2733 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2734 if (!rank) { 2735 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2736 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2737 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2738 } 2739 2740 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2741 M = header[1]; N = header[2]; 2742 /* determine ownership of all rows */ 2743 m = M/size + ((M % size) > rank); 2744 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2745 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2746 2747 /* First process needs enough room for process with most rows */ 2748 if (!rank) { 2749 mmax = rowners[1]; 2750 for (i=2; i<size; i++) { 2751 mmax = PetscMax(mmax,rowners[i]); 2752 } 2753 } else mmax = m; 2754 2755 rowners[0] = 0; 2756 for (i=2; i<=size; i++) { 2757 rowners[i] += rowners[i-1]; 2758 } 2759 rstart = rowners[rank]; 2760 rend = rowners[rank+1]; 2761 2762 /* distribute row lengths to all processors */ 2763 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2764 if (!rank) { 2765 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2766 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2767 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2768 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2769 for (j=0; j<m; j++) { 2770 procsnz[0] += ourlens[j]; 2771 } 2772 for (i=1; i<size; i++) { 2773 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2774 /* calculate the number of nonzeros on each processor */ 2775 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2776 procsnz[i] += rowlengths[j]; 2777 } 2778 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2779 } 2780 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2781 } else { 2782 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2783 } 2784 2785 if (!rank) { 2786 /* determine max buffer needed and allocate it */ 2787 maxnz = 0; 2788 for (i=0; i<size; i++) { 2789 maxnz = PetscMax(maxnz,procsnz[i]); 2790 } 2791 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2792 2793 /* read in my part of the matrix column indices */ 2794 nz = procsnz[0]; 2795 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2796 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2797 2798 /* read in every one elses and ship off */ 2799 for (i=1; i<size; i++) { 2800 nz = procsnz[i]; 2801 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2802 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2803 } 2804 ierr = PetscFree(cols);CHKERRQ(ierr); 2805 } else { 2806 /* determine buffer space needed for message */ 2807 nz = 0; 2808 for (i=0; i<m; i++) { 2809 nz += ourlens[i]; 2810 } 2811 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2812 2813 /* receive message of column indices*/ 2814 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2815 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2816 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2817 } 2818 2819 /* determine column ownership if matrix is not square */ 2820 if (N != M) { 2821 n = N/size + ((N % size) > rank); 2822 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2823 cstart = cend - n; 2824 } else { 2825 cstart = rstart; 2826 cend = rend; 2827 n = cend - cstart; 2828 } 2829 2830 /* loop over local rows, determining number of off diagonal entries */ 2831 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2832 jj = 0; 2833 for (i=0; i<m; i++) { 2834 for (j=0; j<ourlens[i]; j++) { 2835 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2836 jj++; 2837 } 2838 } 2839 2840 /* create our matrix */ 2841 for (i=0; i<m; i++) { 2842 ourlens[i] -= offlens[i]; 2843 } 2844 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2845 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2846 ierr = MatSetType(A,type);CHKERRQ(ierr); 2847 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2848 2849 for (i=0; i<m; i++) { 2850 ourlens[i] += offlens[i]; 2851 } 2852 2853 if (!rank) { 2854 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2855 2856 /* read in my part of the matrix numerical values */ 2857 nz = procsnz[0]; 2858 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2859 2860 /* insert into matrix */ 2861 jj = rstart; 2862 smycols = mycols; 2863 svals = vals; 2864 for (i=0; i<m; i++) { 2865 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2866 smycols += ourlens[i]; 2867 svals += ourlens[i]; 2868 jj++; 2869 } 2870 2871 /* read in other processors and ship out */ 2872 for (i=1; i<size; i++) { 2873 nz = procsnz[i]; 2874 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2875 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2876 } 2877 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2878 } else { 2879 /* receive numeric values */ 2880 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2881 2882 /* receive message of values*/ 2883 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2884 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2885 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2886 2887 /* insert into matrix */ 2888 jj = rstart; 2889 smycols = mycols; 2890 svals = vals; 2891 for (i=0; i<m; i++) { 2892 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2893 smycols += ourlens[i]; 2894 svals += ourlens[i]; 2895 jj++; 2896 } 2897 } 2898 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2899 ierr = PetscFree(vals);CHKERRQ(ierr); 2900 ierr = PetscFree(mycols);CHKERRQ(ierr); 2901 ierr = PetscFree(rowners);CHKERRQ(ierr); 2902 2903 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2904 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2905 *newmat = A; 2906 PetscFunctionReturn(0); 2907 } 2908 2909 #undef __FUNCT__ 2910 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2911 /* 2912 Not great since it makes two copies of the submatrix, first an SeqAIJ 2913 in local and then by concatenating the local matrices the end result. 2914 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2915 */ 2916 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2917 { 2918 PetscErrorCode ierr; 2919 PetscMPIInt rank,size; 2920 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2921 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2922 Mat *local,M,Mreuse; 2923 MatScalar *vwork,*aa; 2924 MPI_Comm comm = ((PetscObject)mat)->comm; 2925 Mat_SeqAIJ *aij; 2926 2927 2928 PetscFunctionBegin; 2929 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2930 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2931 2932 if (call == MAT_REUSE_MATRIX) { 2933 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2934 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2935 local = &Mreuse; 2936 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2937 } else { 2938 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2939 Mreuse = *local; 2940 ierr = PetscFree(local);CHKERRQ(ierr); 2941 } 2942 2943 /* 2944 m - number of local rows 2945 n - number of columns (same on all processors) 2946 rstart - first row in new global matrix generated 2947 */ 2948 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2949 if (call == MAT_INITIAL_MATRIX) { 2950 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2951 ii = aij->i; 2952 jj = aij->j; 2953 2954 /* 2955 Determine the number of non-zeros in the diagonal and off-diagonal 2956 portions of the matrix in order to do correct preallocation 2957 */ 2958 2959 /* first get start and end of "diagonal" columns */ 2960 if (csize == PETSC_DECIDE) { 2961 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2962 if (mglobal == n) { /* square matrix */ 2963 nlocal = m; 2964 } else { 2965 nlocal = n/size + ((n % size) > rank); 2966 } 2967 } else { 2968 nlocal = csize; 2969 } 2970 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2971 rstart = rend - nlocal; 2972 if (rank == size - 1 && rend != n) { 2973 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2974 } 2975 2976 /* next, compute all the lengths */ 2977 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2978 olens = dlens + m; 2979 for (i=0; i<m; i++) { 2980 jend = ii[i+1] - ii[i]; 2981 olen = 0; 2982 dlen = 0; 2983 for (j=0; j<jend; j++) { 2984 if (*jj < rstart || *jj >= rend) olen++; 2985 else dlen++; 2986 jj++; 2987 } 2988 olens[i] = olen; 2989 dlens[i] = dlen; 2990 } 2991 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2992 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2993 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2994 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2995 ierr = PetscFree(dlens);CHKERRQ(ierr); 2996 } else { 2997 PetscInt ml,nl; 2998 2999 M = *newmat; 3000 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3001 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3002 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3003 /* 3004 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3005 rather than the slower MatSetValues(). 3006 */ 3007 M->was_assembled = PETSC_TRUE; 3008 M->assembled = PETSC_FALSE; 3009 } 3010 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3011 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3012 ii = aij->i; 3013 jj = aij->j; 3014 aa = aij->a; 3015 for (i=0; i<m; i++) { 3016 row = rstart + i; 3017 nz = ii[i+1] - ii[i]; 3018 cwork = jj; jj += nz; 3019 vwork = aa; aa += nz; 3020 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3021 } 3022 3023 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3024 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3025 *newmat = M; 3026 3027 /* save submatrix used in processor for next request */ 3028 if (call == MAT_INITIAL_MATRIX) { 3029 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3030 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3031 } 3032 3033 PetscFunctionReturn(0); 3034 } 3035 3036 EXTERN_C_BEGIN 3037 #undef __FUNCT__ 3038 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3039 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3040 { 3041 PetscInt m,cstart, cend,j,nnz,i,d; 3042 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3043 const PetscInt *JJ; 3044 PetscScalar *values; 3045 PetscErrorCode ierr; 3046 3047 PetscFunctionBegin; 3048 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3049 3050 B->rmap->bs = B->cmap->bs = 1; 3051 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3052 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3053 m = B->rmap->n; 3054 cstart = B->cmap->rstart; 3055 cend = B->cmap->rend; 3056 rstart = B->rmap->rstart; 3057 3058 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3059 o_nnz = d_nnz + m; 3060 3061 #if defined(PETSC_USE_DEBUGGING) 3062 for (i=0; i<m; i++) { 3063 nnz = Ii[i+1]- Ii[i]; 3064 JJ = J + Ii[i]; 3065 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3066 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3067 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3068 for (j=1; j<nnz; j++) { 3069 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3070 } 3071 } 3072 #endif 3073 3074 for (i=0; i<m; i++) { 3075 nnz = Ii[i+1]- Ii[i]; 3076 JJ = J + Ii[i]; 3077 nnz_max = PetscMax(nnz_max,nnz); 3078 for (j=0; j<nnz; j++) { 3079 if (*JJ >= cstart) break; 3080 JJ++; 3081 } 3082 d = 0; 3083 for (; j<nnz; j++) { 3084 if (*JJ++ >= cend) break; 3085 d++; 3086 } 3087 d_nnz[i] = d; 3088 o_nnz[i] = nnz - d; 3089 } 3090 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3091 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3092 3093 if (v) values = (PetscScalar*)v; 3094 else { 3095 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3096 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3097 } 3098 3099 for (i=0; i<m; i++) { 3100 ii = i + rstart; 3101 nnz = Ii[i+1]- Ii[i]; 3102 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3103 } 3104 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3105 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3106 3107 if (!v) { 3108 ierr = PetscFree(values);CHKERRQ(ierr); 3109 } 3110 PetscFunctionReturn(0); 3111 } 3112 EXTERN_C_END 3113 3114 #undef __FUNCT__ 3115 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3116 /*@ 3117 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3118 (the default parallel PETSc format). 3119 3120 Collective on MPI_Comm 3121 3122 Input Parameters: 3123 + B - the matrix 3124 . i - the indices into j for the start of each local row (starts with zero) 3125 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3126 - v - optional values in the matrix 3127 3128 Level: developer 3129 3130 Notes: 3131 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3132 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3133 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3134 3135 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3136 3137 The format which is used for the sparse matrix input, is equivalent to a 3138 row-major ordering.. i.e for the following matrix, the input data expected is 3139 as shown: 3140 3141 1 0 0 3142 2 0 3 P0 3143 ------- 3144 4 5 6 P1 3145 3146 Process0 [P0]: rows_owned=[0,1] 3147 i = {0,1,3} [size = nrow+1 = 2+1] 3148 j = {0,0,2} [size = nz = 6] 3149 v = {1,2,3} [size = nz = 6] 3150 3151 Process1 [P1]: rows_owned=[2] 3152 i = {0,3} [size = nrow+1 = 1+1] 3153 j = {0,1,2} [size = nz = 6] 3154 v = {4,5,6} [size = nz = 6] 3155 3156 The column indices for each row MUST be sorted. 3157 3158 .keywords: matrix, aij, compressed row, sparse, parallel 3159 3160 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3161 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3162 @*/ 3163 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3164 { 3165 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3166 3167 PetscFunctionBegin; 3168 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3169 if (f) { 3170 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3171 } 3172 PetscFunctionReturn(0); 3173 } 3174 3175 #undef __FUNCT__ 3176 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3177 /*@C 3178 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3179 (the default parallel PETSc format). For good matrix assembly performance 3180 the user should preallocate the matrix storage by setting the parameters 3181 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3182 performance can be increased by more than a factor of 50. 3183 3184 Collective on MPI_Comm 3185 3186 Input Parameters: 3187 + A - the matrix 3188 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3189 (same value is used for all local rows) 3190 . d_nnz - array containing the number of nonzeros in the various rows of the 3191 DIAGONAL portion of the local submatrix (possibly different for each row) 3192 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3193 The size of this array is equal to the number of local rows, i.e 'm'. 3194 You must leave room for the diagonal entry even if it is zero. 3195 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3196 submatrix (same value is used for all local rows). 3197 - o_nnz - array containing the number of nonzeros in the various rows of the 3198 OFF-DIAGONAL portion of the local submatrix (possibly different for 3199 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3200 structure. The size of this array is equal to the number 3201 of local rows, i.e 'm'. 3202 3203 If the *_nnz parameter is given then the *_nz parameter is ignored 3204 3205 The AIJ format (also called the Yale sparse matrix format or 3206 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3207 storage. The stored row and column indices begin with zero. See the users manual for details. 3208 3209 The parallel matrix is partitioned such that the first m0 rows belong to 3210 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3211 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3212 3213 The DIAGONAL portion of the local submatrix of a processor can be defined 3214 as the submatrix which is obtained by extraction the part corresponding 3215 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3216 first row that belongs to the processor, and r2 is the last row belonging 3217 to the this processor. This is a square mxm matrix. The remaining portion 3218 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3219 3220 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3221 3222 You can call MatGetInfo() to get information on how effective the preallocation was; 3223 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3224 You can also run with the option -info and look for messages with the string 3225 malloc in them to see if additional memory allocation was needed. 3226 3227 Example usage: 3228 3229 Consider the following 8x8 matrix with 34 non-zero values, that is 3230 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3231 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3232 as follows: 3233 3234 .vb 3235 1 2 0 | 0 3 0 | 0 4 3236 Proc0 0 5 6 | 7 0 0 | 8 0 3237 9 0 10 | 11 0 0 | 12 0 3238 ------------------------------------- 3239 13 0 14 | 15 16 17 | 0 0 3240 Proc1 0 18 0 | 19 20 21 | 0 0 3241 0 0 0 | 22 23 0 | 24 0 3242 ------------------------------------- 3243 Proc2 25 26 27 | 0 0 28 | 29 0 3244 30 0 0 | 31 32 33 | 0 34 3245 .ve 3246 3247 This can be represented as a collection of submatrices as: 3248 3249 .vb 3250 A B C 3251 D E F 3252 G H I 3253 .ve 3254 3255 Where the submatrices A,B,C are owned by proc0, D,E,F are 3256 owned by proc1, G,H,I are owned by proc2. 3257 3258 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3259 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3260 The 'M','N' parameters are 8,8, and have the same values on all procs. 3261 3262 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3263 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3264 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3265 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3266 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3267 matrix, ans [DF] as another SeqAIJ matrix. 3268 3269 When d_nz, o_nz parameters are specified, d_nz storage elements are 3270 allocated for every row of the local diagonal submatrix, and o_nz 3271 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3272 One way to choose d_nz and o_nz is to use the max nonzerors per local 3273 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3274 In this case, the values of d_nz,o_nz are: 3275 .vb 3276 proc0 : dnz = 2, o_nz = 2 3277 proc1 : dnz = 3, o_nz = 2 3278 proc2 : dnz = 1, o_nz = 4 3279 .ve 3280 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3281 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3282 for proc3. i.e we are using 12+15+10=37 storage locations to store 3283 34 values. 3284 3285 When d_nnz, o_nnz parameters are specified, the storage is specified 3286 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3287 In the above case the values for d_nnz,o_nnz are: 3288 .vb 3289 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3290 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3291 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3292 .ve 3293 Here the space allocated is sum of all the above values i.e 34, and 3294 hence pre-allocation is perfect. 3295 3296 Level: intermediate 3297 3298 .keywords: matrix, aij, compressed row, sparse, parallel 3299 3300 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3301 MPIAIJ, MatGetInfo() 3302 @*/ 3303 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3304 { 3305 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3306 3307 PetscFunctionBegin; 3308 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3309 if (f) { 3310 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3311 } 3312 PetscFunctionReturn(0); 3313 } 3314 3315 #undef __FUNCT__ 3316 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3317 /*@ 3318 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3319 CSR format the local rows. 3320 3321 Collective on MPI_Comm 3322 3323 Input Parameters: 3324 + comm - MPI communicator 3325 . m - number of local rows (Cannot be PETSC_DECIDE) 3326 . n - This value should be the same as the local size used in creating the 3327 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3328 calculated if N is given) For square matrices n is almost always m. 3329 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3330 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3331 . i - row indices 3332 . j - column indices 3333 - a - matrix values 3334 3335 Output Parameter: 3336 . mat - the matrix 3337 3338 Level: intermediate 3339 3340 Notes: 3341 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3342 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3343 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3344 3345 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3346 3347 The format which is used for the sparse matrix input, is equivalent to a 3348 row-major ordering.. i.e for the following matrix, the input data expected is 3349 as shown: 3350 3351 1 0 0 3352 2 0 3 P0 3353 ------- 3354 4 5 6 P1 3355 3356 Process0 [P0]: rows_owned=[0,1] 3357 i = {0,1,3} [size = nrow+1 = 2+1] 3358 j = {0,0,2} [size = nz = 6] 3359 v = {1,2,3} [size = nz = 6] 3360 3361 Process1 [P1]: rows_owned=[2] 3362 i = {0,3} [size = nrow+1 = 1+1] 3363 j = {0,1,2} [size = nz = 6] 3364 v = {4,5,6} [size = nz = 6] 3365 3366 .keywords: matrix, aij, compressed row, sparse, parallel 3367 3368 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3369 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3370 @*/ 3371 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3372 { 3373 PetscErrorCode ierr; 3374 3375 PetscFunctionBegin; 3376 if (i[0]) { 3377 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3378 } 3379 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3380 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3381 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3382 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3383 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3384 PetscFunctionReturn(0); 3385 } 3386 3387 #undef __FUNCT__ 3388 #define __FUNCT__ "MatCreateMPIAIJ" 3389 /*@C 3390 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3391 (the default parallel PETSc format). For good matrix assembly performance 3392 the user should preallocate the matrix storage by setting the parameters 3393 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3394 performance can be increased by more than a factor of 50. 3395 3396 Collective on MPI_Comm 3397 3398 Input Parameters: 3399 + comm - MPI communicator 3400 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3401 This value should be the same as the local size used in creating the 3402 y vector for the matrix-vector product y = Ax. 3403 . n - This value should be the same as the local size used in creating the 3404 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3405 calculated if N is given) For square matrices n is almost always m. 3406 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3407 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3408 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3409 (same value is used for all local rows) 3410 . d_nnz - array containing the number of nonzeros in the various rows of the 3411 DIAGONAL portion of the local submatrix (possibly different for each row) 3412 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3413 The size of this array is equal to the number of local rows, i.e 'm'. 3414 You must leave room for the diagonal entry even if it is zero. 3415 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3416 submatrix (same value is used for all local rows). 3417 - o_nnz - array containing the number of nonzeros in the various rows of the 3418 OFF-DIAGONAL portion of the local submatrix (possibly different for 3419 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3420 structure. The size of this array is equal to the number 3421 of local rows, i.e 'm'. 3422 3423 Output Parameter: 3424 . A - the matrix 3425 3426 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3427 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3428 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3429 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3430 3431 Notes: 3432 If the *_nnz parameter is given then the *_nz parameter is ignored 3433 3434 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3435 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3436 storage requirements for this matrix. 3437 3438 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3439 processor than it must be used on all processors that share the object for 3440 that argument. 3441 3442 The user MUST specify either the local or global matrix dimensions 3443 (possibly both). 3444 3445 The parallel matrix is partitioned across processors such that the 3446 first m0 rows belong to process 0, the next m1 rows belong to 3447 process 1, the next m2 rows belong to process 2 etc.. where 3448 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3449 values corresponding to [m x N] submatrix. 3450 3451 The columns are logically partitioned with the n0 columns belonging 3452 to 0th partition, the next n1 columns belonging to the next 3453 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3454 3455 The DIAGONAL portion of the local submatrix on any given processor 3456 is the submatrix corresponding to the rows and columns m,n 3457 corresponding to the given processor. i.e diagonal matrix on 3458 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3459 etc. The remaining portion of the local submatrix [m x (N-n)] 3460 constitute the OFF-DIAGONAL portion. The example below better 3461 illustrates this concept. 3462 3463 For a square global matrix we define each processor's diagonal portion 3464 to be its local rows and the corresponding columns (a square submatrix); 3465 each processor's off-diagonal portion encompasses the remainder of the 3466 local matrix (a rectangular submatrix). 3467 3468 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3469 3470 When calling this routine with a single process communicator, a matrix of 3471 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3472 type of communicator, use the construction mechanism: 3473 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3474 3475 By default, this format uses inodes (identical nodes) when possible. 3476 We search for consecutive rows with the same nonzero structure, thereby 3477 reusing matrix information to achieve increased efficiency. 3478 3479 Options Database Keys: 3480 + -mat_no_inode - Do not use inodes 3481 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3482 - -mat_aij_oneindex - Internally use indexing starting at 1 3483 rather than 0. Note that when calling MatSetValues(), 3484 the user still MUST index entries starting at 0! 3485 3486 3487 Example usage: 3488 3489 Consider the following 8x8 matrix with 34 non-zero values, that is 3490 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3491 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3492 as follows: 3493 3494 .vb 3495 1 2 0 | 0 3 0 | 0 4 3496 Proc0 0 5 6 | 7 0 0 | 8 0 3497 9 0 10 | 11 0 0 | 12 0 3498 ------------------------------------- 3499 13 0 14 | 15 16 17 | 0 0 3500 Proc1 0 18 0 | 19 20 21 | 0 0 3501 0 0 0 | 22 23 0 | 24 0 3502 ------------------------------------- 3503 Proc2 25 26 27 | 0 0 28 | 29 0 3504 30 0 0 | 31 32 33 | 0 34 3505 .ve 3506 3507 This can be represented as a collection of submatrices as: 3508 3509 .vb 3510 A B C 3511 D E F 3512 G H I 3513 .ve 3514 3515 Where the submatrices A,B,C are owned by proc0, D,E,F are 3516 owned by proc1, G,H,I are owned by proc2. 3517 3518 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3519 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3520 The 'M','N' parameters are 8,8, and have the same values on all procs. 3521 3522 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3523 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3524 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3525 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3526 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3527 matrix, ans [DF] as another SeqAIJ matrix. 3528 3529 When d_nz, o_nz parameters are specified, d_nz storage elements are 3530 allocated for every row of the local diagonal submatrix, and o_nz 3531 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3532 One way to choose d_nz and o_nz is to use the max nonzerors per local 3533 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3534 In this case, the values of d_nz,o_nz are: 3535 .vb 3536 proc0 : dnz = 2, o_nz = 2 3537 proc1 : dnz = 3, o_nz = 2 3538 proc2 : dnz = 1, o_nz = 4 3539 .ve 3540 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3541 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3542 for proc3. i.e we are using 12+15+10=37 storage locations to store 3543 34 values. 3544 3545 When d_nnz, o_nnz parameters are specified, the storage is specified 3546 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3547 In the above case the values for d_nnz,o_nnz are: 3548 .vb 3549 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3550 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3551 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3552 .ve 3553 Here the space allocated is sum of all the above values i.e 34, and 3554 hence pre-allocation is perfect. 3555 3556 Level: intermediate 3557 3558 .keywords: matrix, aij, compressed row, sparse, parallel 3559 3560 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3561 MPIAIJ, MatCreateMPIAIJWithArrays() 3562 @*/ 3563 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3564 { 3565 PetscErrorCode ierr; 3566 PetscMPIInt size; 3567 3568 PetscFunctionBegin; 3569 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3570 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3571 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3572 if (size > 1) { 3573 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3574 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3575 } else { 3576 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3577 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3578 } 3579 PetscFunctionReturn(0); 3580 } 3581 3582 #undef __FUNCT__ 3583 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3584 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3585 { 3586 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3587 3588 PetscFunctionBegin; 3589 *Ad = a->A; 3590 *Ao = a->B; 3591 *colmap = a->garray; 3592 PetscFunctionReturn(0); 3593 } 3594 3595 #undef __FUNCT__ 3596 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3597 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3598 { 3599 PetscErrorCode ierr; 3600 PetscInt i; 3601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3602 3603 PetscFunctionBegin; 3604 if (coloring->ctype == IS_COLORING_GLOBAL) { 3605 ISColoringValue *allcolors,*colors; 3606 ISColoring ocoloring; 3607 3608 /* set coloring for diagonal portion */ 3609 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3610 3611 /* set coloring for off-diagonal portion */ 3612 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3613 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3614 for (i=0; i<a->B->cmap->n; i++) { 3615 colors[i] = allcolors[a->garray[i]]; 3616 } 3617 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3618 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3619 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3620 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3621 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3622 ISColoringValue *colors; 3623 PetscInt *larray; 3624 ISColoring ocoloring; 3625 3626 /* set coloring for diagonal portion */ 3627 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3628 for (i=0; i<a->A->cmap->n; i++) { 3629 larray[i] = i + A->cmap->rstart; 3630 } 3631 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3632 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3633 for (i=0; i<a->A->cmap->n; i++) { 3634 colors[i] = coloring->colors[larray[i]]; 3635 } 3636 ierr = PetscFree(larray);CHKERRQ(ierr); 3637 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3638 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3639 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3640 3641 /* set coloring for off-diagonal portion */ 3642 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3643 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3644 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3645 for (i=0; i<a->B->cmap->n; i++) { 3646 colors[i] = coloring->colors[larray[i]]; 3647 } 3648 ierr = PetscFree(larray);CHKERRQ(ierr); 3649 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3650 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3651 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3652 } else { 3653 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3654 } 3655 3656 PetscFunctionReturn(0); 3657 } 3658 3659 #if defined(PETSC_HAVE_ADIC) 3660 #undef __FUNCT__ 3661 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3662 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3663 { 3664 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3665 PetscErrorCode ierr; 3666 3667 PetscFunctionBegin; 3668 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3669 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3670 PetscFunctionReturn(0); 3671 } 3672 #endif 3673 3674 #undef __FUNCT__ 3675 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3676 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3677 { 3678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3679 PetscErrorCode ierr; 3680 3681 PetscFunctionBegin; 3682 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3683 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3684 PetscFunctionReturn(0); 3685 } 3686 3687 #undef __FUNCT__ 3688 #define __FUNCT__ "MatMerge" 3689 /*@ 3690 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3691 matrices from each processor 3692 3693 Collective on MPI_Comm 3694 3695 Input Parameters: 3696 + comm - the communicators the parallel matrix will live on 3697 . inmat - the input sequential matrices 3698 . n - number of local columns (or PETSC_DECIDE) 3699 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3700 3701 Output Parameter: 3702 . outmat - the parallel matrix generated 3703 3704 Level: advanced 3705 3706 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3707 3708 @*/ 3709 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3710 { 3711 PetscErrorCode ierr; 3712 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3713 PetscInt *indx; 3714 PetscScalar *values; 3715 3716 PetscFunctionBegin; 3717 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3718 if (scall == MAT_INITIAL_MATRIX){ 3719 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3720 if (n == PETSC_DECIDE){ 3721 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3722 } 3723 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3724 rstart -= m; 3725 3726 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3727 for (i=0;i<m;i++) { 3728 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3729 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3730 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3731 } 3732 /* This routine will ONLY return MPIAIJ type matrix */ 3733 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3734 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3735 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3736 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3737 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3738 3739 } else if (scall == MAT_REUSE_MATRIX){ 3740 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3741 } else { 3742 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3743 } 3744 3745 for (i=0;i<m;i++) { 3746 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3747 Ii = i + rstart; 3748 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3749 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3750 } 3751 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3752 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3753 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3754 3755 PetscFunctionReturn(0); 3756 } 3757 3758 #undef __FUNCT__ 3759 #define __FUNCT__ "MatFileSplit" 3760 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3761 { 3762 PetscErrorCode ierr; 3763 PetscMPIInt rank; 3764 PetscInt m,N,i,rstart,nnz; 3765 size_t len; 3766 const PetscInt *indx; 3767 PetscViewer out; 3768 char *name; 3769 Mat B; 3770 const PetscScalar *values; 3771 3772 PetscFunctionBegin; 3773 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3774 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3775 /* Should this be the type of the diagonal block of A? */ 3776 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3777 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3778 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3779 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3780 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3781 for (i=0;i<m;i++) { 3782 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3783 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3784 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3785 } 3786 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3787 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3788 3789 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3790 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3791 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3792 sprintf(name,"%s.%d",outfile,rank); 3793 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3794 ierr = PetscFree(name); 3795 ierr = MatView(B,out);CHKERRQ(ierr); 3796 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3797 ierr = MatDestroy(B);CHKERRQ(ierr); 3798 PetscFunctionReturn(0); 3799 } 3800 3801 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3802 #undef __FUNCT__ 3803 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3804 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3805 { 3806 PetscErrorCode ierr; 3807 Mat_Merge_SeqsToMPI *merge; 3808 PetscContainer container; 3809 3810 PetscFunctionBegin; 3811 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3812 if (container) { 3813 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3814 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3815 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3816 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3817 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3818 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3819 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3820 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3821 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3822 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3823 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3824 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3825 3826 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3827 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3828 } 3829 ierr = PetscFree(merge);CHKERRQ(ierr); 3830 3831 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3832 PetscFunctionReturn(0); 3833 } 3834 3835 #include "src/mat/utils/freespace.h" 3836 #include "petscbt.h" 3837 3838 #undef __FUNCT__ 3839 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3840 /*@C 3841 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3842 matrices from each processor 3843 3844 Collective on MPI_Comm 3845 3846 Input Parameters: 3847 + comm - the communicators the parallel matrix will live on 3848 . seqmat - the input sequential matrices 3849 . m - number of local rows (or PETSC_DECIDE) 3850 . n - number of local columns (or PETSC_DECIDE) 3851 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3852 3853 Output Parameter: 3854 . mpimat - the parallel matrix generated 3855 3856 Level: advanced 3857 3858 Notes: 3859 The dimensions of the sequential matrix in each processor MUST be the same. 3860 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3861 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3862 @*/ 3863 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3864 { 3865 PetscErrorCode ierr; 3866 MPI_Comm comm=((PetscObject)mpimat)->comm; 3867 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3868 PetscMPIInt size,rank,taga,*len_s; 3869 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 3870 PetscInt proc,m; 3871 PetscInt **buf_ri,**buf_rj; 3872 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3873 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3874 MPI_Request *s_waits,*r_waits; 3875 MPI_Status *status; 3876 MatScalar *aa=a->a; 3877 MatScalar **abuf_r,*ba_i; 3878 Mat_Merge_SeqsToMPI *merge; 3879 PetscContainer container; 3880 3881 PetscFunctionBegin; 3882 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3883 3884 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3885 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3886 3887 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3888 if (container) { 3889 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3890 } 3891 bi = merge->bi; 3892 bj = merge->bj; 3893 buf_ri = merge->buf_ri; 3894 buf_rj = merge->buf_rj; 3895 3896 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3897 owners = merge->rowmap.range; 3898 len_s = merge->len_s; 3899 3900 /* send and recv matrix values */ 3901 /*-----------------------------*/ 3902 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3903 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3904 3905 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3906 for (proc=0,k=0; proc<size; proc++){ 3907 if (!len_s[proc]) continue; 3908 i = owners[proc]; 3909 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3910 k++; 3911 } 3912 3913 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3914 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3915 ierr = PetscFree(status);CHKERRQ(ierr); 3916 3917 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3918 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3919 3920 /* insert mat values of mpimat */ 3921 /*----------------------------*/ 3922 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 3923 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3924 nextrow = buf_ri_k + merge->nrecv; 3925 nextai = nextrow + merge->nrecv; 3926 3927 for (k=0; k<merge->nrecv; k++){ 3928 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3929 nrows = *(buf_ri_k[k]); 3930 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3931 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3932 } 3933 3934 /* set values of ba */ 3935 m = merge->rowmap.n; 3936 for (i=0; i<m; i++) { 3937 arow = owners[rank] + i; 3938 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3939 bnzi = bi[i+1] - bi[i]; 3940 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 3941 3942 /* add local non-zero vals of this proc's seqmat into ba */ 3943 anzi = ai[arow+1] - ai[arow]; 3944 aj = a->j + ai[arow]; 3945 aa = a->a + ai[arow]; 3946 nextaj = 0; 3947 for (j=0; nextaj<anzi; j++){ 3948 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3949 ba_i[j] += aa[nextaj++]; 3950 } 3951 } 3952 3953 /* add received vals into ba */ 3954 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3955 /* i-th row */ 3956 if (i == *nextrow[k]) { 3957 anzi = *(nextai[k]+1) - *nextai[k]; 3958 aj = buf_rj[k] + *(nextai[k]); 3959 aa = abuf_r[k] + *(nextai[k]); 3960 nextaj = 0; 3961 for (j=0; nextaj<anzi; j++){ 3962 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3963 ba_i[j] += aa[nextaj++]; 3964 } 3965 } 3966 nextrow[k]++; nextai[k]++; 3967 } 3968 } 3969 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3970 } 3971 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3972 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3973 3974 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3975 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3976 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3977 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3978 PetscFunctionReturn(0); 3979 } 3980 3981 #undef __FUNCT__ 3982 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3983 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3984 { 3985 PetscErrorCode ierr; 3986 Mat B_mpi; 3987 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3988 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3989 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3990 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 3991 PetscInt len,proc,*dnz,*onz; 3992 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3993 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3994 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3995 MPI_Status *status; 3996 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3997 PetscBT lnkbt; 3998 Mat_Merge_SeqsToMPI *merge; 3999 PetscContainer container; 4000 4001 PetscFunctionBegin; 4002 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4003 4004 /* make sure it is a PETSc comm */ 4005 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4006 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4007 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4008 4009 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4010 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4011 4012 /* determine row ownership */ 4013 /*---------------------------------------------------------*/ 4014 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4015 merge->rowmap.n = m; 4016 merge->rowmap.N = M; 4017 merge->rowmap.bs = 1; 4018 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4019 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4020 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4021 4022 m = merge->rowmap.n; 4023 M = merge->rowmap.N; 4024 owners = merge->rowmap.range; 4025 4026 /* determine the number of messages to send, their lengths */ 4027 /*---------------------------------------------------------*/ 4028 len_s = merge->len_s; 4029 4030 len = 0; /* length of buf_si[] */ 4031 merge->nsend = 0; 4032 for (proc=0; proc<size; proc++){ 4033 len_si[proc] = 0; 4034 if (proc == rank){ 4035 len_s[proc] = 0; 4036 } else { 4037 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4038 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4039 } 4040 if (len_s[proc]) { 4041 merge->nsend++; 4042 nrows = 0; 4043 for (i=owners[proc]; i<owners[proc+1]; i++){ 4044 if (ai[i+1] > ai[i]) nrows++; 4045 } 4046 len_si[proc] = 2*(nrows+1); 4047 len += len_si[proc]; 4048 } 4049 } 4050 4051 /* determine the number and length of messages to receive for ij-structure */ 4052 /*-------------------------------------------------------------------------*/ 4053 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4054 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4055 4056 /* post the Irecv of j-structure */ 4057 /*-------------------------------*/ 4058 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4059 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4060 4061 /* post the Isend of j-structure */ 4062 /*--------------------------------*/ 4063 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4064 sj_waits = si_waits + merge->nsend; 4065 4066 for (proc=0, k=0; proc<size; proc++){ 4067 if (!len_s[proc]) continue; 4068 i = owners[proc]; 4069 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4070 k++; 4071 } 4072 4073 /* receives and sends of j-structure are complete */ 4074 /*------------------------------------------------*/ 4075 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4076 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4077 4078 /* send and recv i-structure */ 4079 /*---------------------------*/ 4080 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4081 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4082 4083 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4084 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4085 for (proc=0,k=0; proc<size; proc++){ 4086 if (!len_s[proc]) continue; 4087 /* form outgoing message for i-structure: 4088 buf_si[0]: nrows to be sent 4089 [1:nrows]: row index (global) 4090 [nrows+1:2*nrows+1]: i-structure index 4091 */ 4092 /*-------------------------------------------*/ 4093 nrows = len_si[proc]/2 - 1; 4094 buf_si_i = buf_si + nrows+1; 4095 buf_si[0] = nrows; 4096 buf_si_i[0] = 0; 4097 nrows = 0; 4098 for (i=owners[proc]; i<owners[proc+1]; i++){ 4099 anzi = ai[i+1] - ai[i]; 4100 if (anzi) { 4101 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4102 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4103 nrows++; 4104 } 4105 } 4106 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4107 k++; 4108 buf_si += len_si[proc]; 4109 } 4110 4111 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4112 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4113 4114 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4115 for (i=0; i<merge->nrecv; i++){ 4116 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4117 } 4118 4119 ierr = PetscFree(len_si);CHKERRQ(ierr); 4120 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4121 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4122 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4123 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4124 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4125 ierr = PetscFree(status);CHKERRQ(ierr); 4126 4127 /* compute a local seq matrix in each processor */ 4128 /*----------------------------------------------*/ 4129 /* allocate bi array and free space for accumulating nonzero column info */ 4130 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4131 bi[0] = 0; 4132 4133 /* create and initialize a linked list */ 4134 nlnk = N+1; 4135 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4136 4137 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4138 len = 0; 4139 len = ai[owners[rank+1]] - ai[owners[rank]]; 4140 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4141 current_space = free_space; 4142 4143 /* determine symbolic info for each local row */ 4144 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4145 nextrow = buf_ri_k + merge->nrecv; 4146 nextai = nextrow + merge->nrecv; 4147 for (k=0; k<merge->nrecv; k++){ 4148 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4149 nrows = *buf_ri_k[k]; 4150 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4151 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4152 } 4153 4154 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4155 len = 0; 4156 for (i=0;i<m;i++) { 4157 bnzi = 0; 4158 /* add local non-zero cols of this proc's seqmat into lnk */ 4159 arow = owners[rank] + i; 4160 anzi = ai[arow+1] - ai[arow]; 4161 aj = a->j + ai[arow]; 4162 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4163 bnzi += nlnk; 4164 /* add received col data into lnk */ 4165 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4166 if (i == *nextrow[k]) { /* i-th row */ 4167 anzi = *(nextai[k]+1) - *nextai[k]; 4168 aj = buf_rj[k] + *nextai[k]; 4169 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4170 bnzi += nlnk; 4171 nextrow[k]++; nextai[k]++; 4172 } 4173 } 4174 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4175 4176 /* if free space is not available, make more free space */ 4177 if (current_space->local_remaining<bnzi) { 4178 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4179 nspacedouble++; 4180 } 4181 /* copy data into free space, then initialize lnk */ 4182 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4183 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4184 4185 current_space->array += bnzi; 4186 current_space->local_used += bnzi; 4187 current_space->local_remaining -= bnzi; 4188 4189 bi[i+1] = bi[i] + bnzi; 4190 } 4191 4192 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4193 4194 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4195 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4196 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4197 4198 /* create symbolic parallel matrix B_mpi */ 4199 /*---------------------------------------*/ 4200 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4201 if (n==PETSC_DECIDE) { 4202 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4203 } else { 4204 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4205 } 4206 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4207 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4208 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4209 4210 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4211 B_mpi->assembled = PETSC_FALSE; 4212 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4213 merge->bi = bi; 4214 merge->bj = bj; 4215 merge->buf_ri = buf_ri; 4216 merge->buf_rj = buf_rj; 4217 merge->coi = PETSC_NULL; 4218 merge->coj = PETSC_NULL; 4219 merge->owners_co = PETSC_NULL; 4220 4221 /* attach the supporting struct to B_mpi for reuse */ 4222 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4223 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4224 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4225 *mpimat = B_mpi; 4226 4227 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4228 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4229 PetscFunctionReturn(0); 4230 } 4231 4232 #undef __FUNCT__ 4233 #define __FUNCT__ "MatMerge_SeqsToMPI" 4234 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4235 { 4236 PetscErrorCode ierr; 4237 4238 PetscFunctionBegin; 4239 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4240 if (scall == MAT_INITIAL_MATRIX){ 4241 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4242 } 4243 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4244 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4245 PetscFunctionReturn(0); 4246 } 4247 4248 #undef __FUNCT__ 4249 #define __FUNCT__ "MatGetLocalMat" 4250 /*@ 4251 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4252 4253 Not Collective 4254 4255 Input Parameters: 4256 + A - the matrix 4257 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4258 4259 Output Parameter: 4260 . A_loc - the local sequential matrix generated 4261 4262 Level: developer 4263 4264 @*/ 4265 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4266 { 4267 PetscErrorCode ierr; 4268 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4269 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4270 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4271 MatScalar *aa=a->a,*ba=b->a,*cam; 4272 PetscScalar *ca; 4273 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4274 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4275 4276 PetscFunctionBegin; 4277 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4278 if (scall == MAT_INITIAL_MATRIX){ 4279 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4280 ci[0] = 0; 4281 for (i=0; i<am; i++){ 4282 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4283 } 4284 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4285 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4286 k = 0; 4287 for (i=0; i<am; i++) { 4288 ncols_o = bi[i+1] - bi[i]; 4289 ncols_d = ai[i+1] - ai[i]; 4290 /* off-diagonal portion of A */ 4291 for (jo=0; jo<ncols_o; jo++) { 4292 col = cmap[*bj]; 4293 if (col >= cstart) break; 4294 cj[k] = col; bj++; 4295 ca[k++] = *ba++; 4296 } 4297 /* diagonal portion of A */ 4298 for (j=0; j<ncols_d; j++) { 4299 cj[k] = cstart + *aj++; 4300 ca[k++] = *aa++; 4301 } 4302 /* off-diagonal portion of A */ 4303 for (j=jo; j<ncols_o; j++) { 4304 cj[k] = cmap[*bj++]; 4305 ca[k++] = *ba++; 4306 } 4307 } 4308 /* put together the new matrix */ 4309 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4310 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4311 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4312 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4313 mat->free_a = PETSC_TRUE; 4314 mat->free_ij = PETSC_TRUE; 4315 mat->nonew = 0; 4316 } else if (scall == MAT_REUSE_MATRIX){ 4317 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4318 ci = mat->i; cj = mat->j; cam = mat->a; 4319 for (i=0; i<am; i++) { 4320 /* off-diagonal portion of A */ 4321 ncols_o = bi[i+1] - bi[i]; 4322 for (jo=0; jo<ncols_o; jo++) { 4323 col = cmap[*bj]; 4324 if (col >= cstart) break; 4325 *cam++ = *ba++; bj++; 4326 } 4327 /* diagonal portion of A */ 4328 ncols_d = ai[i+1] - ai[i]; 4329 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4330 /* off-diagonal portion of A */ 4331 for (j=jo; j<ncols_o; j++) { 4332 *cam++ = *ba++; bj++; 4333 } 4334 } 4335 } else { 4336 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4337 } 4338 4339 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4340 PetscFunctionReturn(0); 4341 } 4342 4343 #undef __FUNCT__ 4344 #define __FUNCT__ "MatGetLocalMatCondensed" 4345 /*@C 4346 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4347 4348 Not Collective 4349 4350 Input Parameters: 4351 + A - the matrix 4352 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4353 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4354 4355 Output Parameter: 4356 . A_loc - the local sequential matrix generated 4357 4358 Level: developer 4359 4360 @*/ 4361 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4362 { 4363 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4364 PetscErrorCode ierr; 4365 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4366 IS isrowa,iscola; 4367 Mat *aloc; 4368 4369 PetscFunctionBegin; 4370 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4371 if (!row){ 4372 start = A->rmap->rstart; end = A->rmap->rend; 4373 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4374 } else { 4375 isrowa = *row; 4376 } 4377 if (!col){ 4378 start = A->cmap->rstart; 4379 cmap = a->garray; 4380 nzA = a->A->cmap->n; 4381 nzB = a->B->cmap->n; 4382 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4383 ncols = 0; 4384 for (i=0; i<nzB; i++) { 4385 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4386 else break; 4387 } 4388 imark = i; 4389 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4390 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4391 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4392 ierr = PetscFree(idx);CHKERRQ(ierr); 4393 } else { 4394 iscola = *col; 4395 } 4396 if (scall != MAT_INITIAL_MATRIX){ 4397 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4398 aloc[0] = *A_loc; 4399 } 4400 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4401 *A_loc = aloc[0]; 4402 ierr = PetscFree(aloc);CHKERRQ(ierr); 4403 if (!row){ 4404 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4405 } 4406 if (!col){ 4407 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4408 } 4409 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4410 PetscFunctionReturn(0); 4411 } 4412 4413 #undef __FUNCT__ 4414 #define __FUNCT__ "MatGetBrowsOfAcols" 4415 /*@C 4416 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4417 4418 Collective on Mat 4419 4420 Input Parameters: 4421 + A,B - the matrices in mpiaij format 4422 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4423 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4424 4425 Output Parameter: 4426 + rowb, colb - index sets of rows and columns of B to extract 4427 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4428 - B_seq - the sequential matrix generated 4429 4430 Level: developer 4431 4432 @*/ 4433 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4434 { 4435 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4436 PetscErrorCode ierr; 4437 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4438 IS isrowb,iscolb; 4439 Mat *bseq; 4440 4441 PetscFunctionBegin; 4442 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4443 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4444 } 4445 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4446 4447 if (scall == MAT_INITIAL_MATRIX){ 4448 start = A->cmap->rstart; 4449 cmap = a->garray; 4450 nzA = a->A->cmap->n; 4451 nzB = a->B->cmap->n; 4452 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4453 ncols = 0; 4454 for (i=0; i<nzB; i++) { /* row < local row index */ 4455 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4456 else break; 4457 } 4458 imark = i; 4459 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4460 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4461 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4462 ierr = PetscFree(idx);CHKERRQ(ierr); 4463 *brstart = imark; 4464 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4465 } else { 4466 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4467 isrowb = *rowb; iscolb = *colb; 4468 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4469 bseq[0] = *B_seq; 4470 } 4471 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4472 *B_seq = bseq[0]; 4473 ierr = PetscFree(bseq);CHKERRQ(ierr); 4474 if (!rowb){ 4475 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4476 } else { 4477 *rowb = isrowb; 4478 } 4479 if (!colb){ 4480 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4481 } else { 4482 *colb = iscolb; 4483 } 4484 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4485 PetscFunctionReturn(0); 4486 } 4487 4488 #undef __FUNCT__ 4489 #define __FUNCT__ "MatGetBrowsOfAoCols" 4490 /*@C 4491 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4492 of the OFF-DIAGONAL portion of local A 4493 4494 Collective on Mat 4495 4496 Input Parameters: 4497 + A,B - the matrices in mpiaij format 4498 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4499 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4500 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4501 4502 Output Parameter: 4503 + B_oth - the sequential matrix generated 4504 4505 Level: developer 4506 4507 @*/ 4508 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4509 { 4510 VecScatter_MPI_General *gen_to,*gen_from; 4511 PetscErrorCode ierr; 4512 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4513 Mat_SeqAIJ *b_oth; 4514 VecScatter ctx=a->Mvctx; 4515 MPI_Comm comm=((PetscObject)ctx)->comm; 4516 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4517 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4518 PetscScalar *rvalues,*svalues; 4519 MatScalar *b_otha,*bufa,*bufA; 4520 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4521 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4522 MPI_Status *sstatus,rstatus; 4523 PetscMPIInt jj; 4524 PetscInt *cols,sbs,rbs; 4525 PetscScalar *vals; 4526 4527 PetscFunctionBegin; 4528 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4529 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4530 } 4531 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4532 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4533 4534 gen_to = (VecScatter_MPI_General*)ctx->todata; 4535 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4536 rvalues = gen_from->values; /* holds the length of receiving row */ 4537 svalues = gen_to->values; /* holds the length of sending row */ 4538 nrecvs = gen_from->n; 4539 nsends = gen_to->n; 4540 4541 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4542 srow = gen_to->indices; /* local row index to be sent */ 4543 sstarts = gen_to->starts; 4544 sprocs = gen_to->procs; 4545 sstatus = gen_to->sstatus; 4546 sbs = gen_to->bs; 4547 rstarts = gen_from->starts; 4548 rprocs = gen_from->procs; 4549 rbs = gen_from->bs; 4550 4551 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4552 if (scall == MAT_INITIAL_MATRIX){ 4553 /* i-array */ 4554 /*---------*/ 4555 /* post receives */ 4556 for (i=0; i<nrecvs; i++){ 4557 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4558 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4559 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4560 } 4561 4562 /* pack the outgoing message */ 4563 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4564 rstartsj = sstartsj + nsends +1; 4565 sstartsj[0] = 0; rstartsj[0] = 0; 4566 len = 0; /* total length of j or a array to be sent */ 4567 k = 0; 4568 for (i=0; i<nsends; i++){ 4569 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4570 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4571 for (j=0; j<nrows; j++) { 4572 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4573 for (l=0; l<sbs; l++){ 4574 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4575 rowlen[j*sbs+l] = ncols; 4576 len += ncols; 4577 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4578 } 4579 k++; 4580 } 4581 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4582 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4583 } 4584 /* recvs and sends of i-array are completed */ 4585 i = nrecvs; 4586 while (i--) { 4587 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4588 } 4589 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4590 4591 /* allocate buffers for sending j and a arrays */ 4592 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4593 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4594 4595 /* create i-array of B_oth */ 4596 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4597 b_othi[0] = 0; 4598 len = 0; /* total length of j or a array to be received */ 4599 k = 0; 4600 for (i=0; i<nrecvs; i++){ 4601 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4602 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4603 for (j=0; j<nrows; j++) { 4604 b_othi[k+1] = b_othi[k] + rowlen[j]; 4605 len += rowlen[j]; k++; 4606 } 4607 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4608 } 4609 4610 /* allocate space for j and a arrrays of B_oth */ 4611 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4612 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4613 4614 /* j-array */ 4615 /*---------*/ 4616 /* post receives of j-array */ 4617 for (i=0; i<nrecvs; i++){ 4618 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4619 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4620 } 4621 4622 /* pack the outgoing message j-array */ 4623 k = 0; 4624 for (i=0; i<nsends; i++){ 4625 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4626 bufJ = bufj+sstartsj[i]; 4627 for (j=0; j<nrows; j++) { 4628 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4629 for (ll=0; ll<sbs; ll++){ 4630 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4631 for (l=0; l<ncols; l++){ 4632 *bufJ++ = cols[l]; 4633 } 4634 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4635 } 4636 } 4637 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4638 } 4639 4640 /* recvs and sends of j-array are completed */ 4641 i = nrecvs; 4642 while (i--) { 4643 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4644 } 4645 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4646 } else if (scall == MAT_REUSE_MATRIX){ 4647 sstartsj = *startsj; 4648 rstartsj = sstartsj + nsends +1; 4649 bufa = *bufa_ptr; 4650 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4651 b_otha = b_oth->a; 4652 } else { 4653 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4654 } 4655 4656 /* a-array */ 4657 /*---------*/ 4658 /* post receives of a-array */ 4659 for (i=0; i<nrecvs; i++){ 4660 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4661 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4662 } 4663 4664 /* pack the outgoing message a-array */ 4665 k = 0; 4666 for (i=0; i<nsends; i++){ 4667 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4668 bufA = bufa+sstartsj[i]; 4669 for (j=0; j<nrows; j++) { 4670 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4671 for (ll=0; ll<sbs; ll++){ 4672 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4673 for (l=0; l<ncols; l++){ 4674 *bufA++ = vals[l]; 4675 } 4676 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4677 } 4678 } 4679 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4680 } 4681 /* recvs and sends of a-array are completed */ 4682 i = nrecvs; 4683 while (i--) { 4684 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4685 } 4686 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4687 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4688 4689 if (scall == MAT_INITIAL_MATRIX){ 4690 /* put together the new matrix */ 4691 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4692 4693 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4694 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4695 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4696 b_oth->free_a = PETSC_TRUE; 4697 b_oth->free_ij = PETSC_TRUE; 4698 b_oth->nonew = 0; 4699 4700 ierr = PetscFree(bufj);CHKERRQ(ierr); 4701 if (!startsj || !bufa_ptr){ 4702 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4703 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4704 } else { 4705 *startsj = sstartsj; 4706 *bufa_ptr = bufa; 4707 } 4708 } 4709 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4710 PetscFunctionReturn(0); 4711 } 4712 4713 #undef __FUNCT__ 4714 #define __FUNCT__ "MatGetCommunicationStructs" 4715 /*@C 4716 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4717 4718 Not Collective 4719 4720 Input Parameters: 4721 . A - The matrix in mpiaij format 4722 4723 Output Parameter: 4724 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4725 . colmap - A map from global column index to local index into lvec 4726 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4727 4728 Level: developer 4729 4730 @*/ 4731 #if defined (PETSC_USE_CTABLE) 4732 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4733 #else 4734 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4735 #endif 4736 { 4737 Mat_MPIAIJ *a; 4738 4739 PetscFunctionBegin; 4740 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4741 PetscValidPointer(lvec, 2) 4742 PetscValidPointer(colmap, 3) 4743 PetscValidPointer(multScatter, 4) 4744 a = (Mat_MPIAIJ *) A->data; 4745 if (lvec) *lvec = a->lvec; 4746 if (colmap) *colmap = a->colmap; 4747 if (multScatter) *multScatter = a->Mvctx; 4748 PetscFunctionReturn(0); 4749 } 4750 4751 EXTERN_C_BEGIN 4752 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4753 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4754 EXTERN_C_END 4755 4756 #include "src/mat/impls/dense/mpi/mpidense.h" 4757 4758 #undef __FUNCT__ 4759 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4760 /* 4761 Computes (B'*A')' since computing B*A directly is untenable 4762 4763 n p p 4764 ( ) ( ) ( ) 4765 m ( A ) * n ( B ) = m ( C ) 4766 ( ) ( ) ( ) 4767 4768 */ 4769 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4770 { 4771 PetscErrorCode ierr; 4772 Mat At,Bt,Ct; 4773 4774 PetscFunctionBegin; 4775 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4776 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4777 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4778 ierr = MatDestroy(At);CHKERRQ(ierr); 4779 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4780 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4781 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4782 PetscFunctionReturn(0); 4783 } 4784 4785 #undef __FUNCT__ 4786 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4787 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4788 { 4789 PetscErrorCode ierr; 4790 PetscInt m=A->rmap->n,n=B->cmap->n; 4791 Mat Cmat; 4792 4793 PetscFunctionBegin; 4794 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4795 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4796 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4797 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4798 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4799 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4800 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4801 *C = Cmat; 4802 PetscFunctionReturn(0); 4803 } 4804 4805 /* ----------------------------------------------------------------*/ 4806 #undef __FUNCT__ 4807 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4808 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4809 { 4810 PetscErrorCode ierr; 4811 4812 PetscFunctionBegin; 4813 if (scall == MAT_INITIAL_MATRIX){ 4814 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4815 } 4816 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4817 PetscFunctionReturn(0); 4818 } 4819 4820 EXTERN_C_BEGIN 4821 #if defined(PETSC_HAVE_MUMPS) 4822 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4823 #endif 4824 #if defined(PETSC_HAVE_SUPERLU_DIST) 4825 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4826 #endif 4827 #if defined(PETSC_HAVE_SPOOLES) 4828 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 4829 #endif 4830 EXTERN_C_END 4831 4832 /*MC 4833 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4834 4835 Options Database Keys: 4836 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4837 4838 Level: beginner 4839 4840 .seealso: MatCreateMPIAIJ() 4841 M*/ 4842 4843 EXTERN_C_BEGIN 4844 #undef __FUNCT__ 4845 #define __FUNCT__ "MatCreate_MPIAIJ" 4846 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4847 { 4848 Mat_MPIAIJ *b; 4849 PetscErrorCode ierr; 4850 PetscMPIInt size; 4851 4852 PetscFunctionBegin; 4853 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4854 4855 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4856 B->data = (void*)b; 4857 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4858 B->rmap->bs = 1; 4859 B->assembled = PETSC_FALSE; 4860 B->mapping = 0; 4861 4862 B->insertmode = NOT_SET_VALUES; 4863 b->size = size; 4864 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4865 4866 /* build cache for off array entries formed */ 4867 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4868 b->donotstash = PETSC_FALSE; 4869 b->colmap = 0; 4870 b->garray = 0; 4871 b->roworiented = PETSC_TRUE; 4872 4873 /* stuff used for matrix vector multiply */ 4874 b->lvec = PETSC_NULL; 4875 b->Mvctx = PETSC_NULL; 4876 4877 /* stuff for MatGetRow() */ 4878 b->rowindices = 0; 4879 b->rowvalues = 0; 4880 b->getrowactive = PETSC_FALSE; 4881 4882 #if defined(PETSC_HAVE_SPOOLES) 4883 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 4884 "MatGetFactor_mpiaij_spooles", 4885 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 4886 #endif 4887 #if defined(PETSC_HAVE_MUMPS) 4888 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 4889 "MatGetFactor_mpiaij_mumps", 4890 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 4891 #endif 4892 #if defined(PETSC_HAVE_SUPERLU_DIST) 4893 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 4894 "MatGetFactor_mpiaij_superlu_dist", 4895 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 4896 #endif 4897 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4898 "MatStoreValues_MPIAIJ", 4899 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4900 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4901 "MatRetrieveValues_MPIAIJ", 4902 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4903 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4904 "MatGetDiagonalBlock_MPIAIJ", 4905 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4906 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4907 "MatIsTranspose_MPIAIJ", 4908 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4909 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4910 "MatMPIAIJSetPreallocation_MPIAIJ", 4911 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4912 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4913 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4914 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4915 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4916 "MatDiagonalScaleLocal_MPIAIJ", 4917 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4918 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4919 "MatConvert_MPIAIJ_MPICSRPERM", 4920 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4921 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4922 "MatConvert_MPIAIJ_MPICRL", 4923 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4924 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 4925 "MatMatMult_MPIDense_MPIAIJ", 4926 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 4927 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 4928 "MatMatMultSymbolic_MPIDense_MPIAIJ", 4929 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 4930 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 4931 "MatMatMultNumeric_MPIDense_MPIAIJ", 4932 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 4933 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4934 PetscFunctionReturn(0); 4935 } 4936 EXTERN_C_END 4937 4938 #undef __FUNCT__ 4939 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4940 /*@ 4941 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4942 and "off-diagonal" part of the matrix in CSR format. 4943 4944 Collective on MPI_Comm 4945 4946 Input Parameters: 4947 + comm - MPI communicator 4948 . m - number of local rows (Cannot be PETSC_DECIDE) 4949 . n - This value should be the same as the local size used in creating the 4950 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4951 calculated if N is given) For square matrices n is almost always m. 4952 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4953 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4954 . i - row indices for "diagonal" portion of matrix 4955 . j - column indices 4956 . a - matrix values 4957 . oi - row indices for "off-diagonal" portion of matrix 4958 . oj - column indices 4959 - oa - matrix values 4960 4961 Output Parameter: 4962 . mat - the matrix 4963 4964 Level: advanced 4965 4966 Notes: 4967 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4968 4969 The i and j indices are 0 based 4970 4971 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4972 4973 4974 .keywords: matrix, aij, compressed row, sparse, parallel 4975 4976 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4977 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4978 @*/ 4979 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4980 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4981 { 4982 PetscErrorCode ierr; 4983 Mat_MPIAIJ *maij; 4984 4985 PetscFunctionBegin; 4986 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4987 if (i[0]) { 4988 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4989 } 4990 if (oi[0]) { 4991 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4992 } 4993 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4994 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4995 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4996 maij = (Mat_MPIAIJ*) (*mat)->data; 4997 maij->donotstash = PETSC_TRUE; 4998 (*mat)->preallocated = PETSC_TRUE; 4999 5000 (*mat)->rmap->bs = (*mat)->cmap->bs = 1; 5001 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5002 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5003 5004 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5005 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5006 5007 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5008 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5009 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5010 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5011 5012 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5013 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5014 PetscFunctionReturn(0); 5015 } 5016 5017 /* 5018 Special version for direct calls from Fortran 5019 */ 5020 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5021 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5022 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5023 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5024 #endif 5025 5026 /* Change these macros so can be used in void function */ 5027 #undef CHKERRQ 5028 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5029 #undef SETERRQ2 5030 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5031 #undef SETERRQ 5032 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5033 5034 EXTERN_C_BEGIN 5035 #undef __FUNCT__ 5036 #define __FUNCT__ "matsetvaluesmpiaij_" 5037 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5038 { 5039 Mat mat = *mmat; 5040 PetscInt m = *mm, n = *mn; 5041 InsertMode addv = *maddv; 5042 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5043 PetscScalar value; 5044 PetscErrorCode ierr; 5045 5046 MatPreallocated(mat); 5047 if (mat->insertmode == NOT_SET_VALUES) { 5048 mat->insertmode = addv; 5049 } 5050 #if defined(PETSC_USE_DEBUG) 5051 else if (mat->insertmode != addv) { 5052 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5053 } 5054 #endif 5055 { 5056 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5057 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5058 PetscTruth roworiented = aij->roworiented; 5059 5060 /* Some Variables required in the macro */ 5061 Mat A = aij->A; 5062 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5063 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5064 MatScalar *aa = a->a; 5065 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5066 Mat B = aij->B; 5067 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5068 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5069 MatScalar *ba = b->a; 5070 5071 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5072 PetscInt nonew = a->nonew; 5073 MatScalar *ap1,*ap2; 5074 5075 PetscFunctionBegin; 5076 for (i=0; i<m; i++) { 5077 if (im[i] < 0) continue; 5078 #if defined(PETSC_USE_DEBUG) 5079 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5080 #endif 5081 if (im[i] >= rstart && im[i] < rend) { 5082 row = im[i] - rstart; 5083 lastcol1 = -1; 5084 rp1 = aj + ai[row]; 5085 ap1 = aa + ai[row]; 5086 rmax1 = aimax[row]; 5087 nrow1 = ailen[row]; 5088 low1 = 0; 5089 high1 = nrow1; 5090 lastcol2 = -1; 5091 rp2 = bj + bi[row]; 5092 ap2 = ba + bi[row]; 5093 rmax2 = bimax[row]; 5094 nrow2 = bilen[row]; 5095 low2 = 0; 5096 high2 = nrow2; 5097 5098 for (j=0; j<n; j++) { 5099 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5100 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5101 if (in[j] >= cstart && in[j] < cend){ 5102 col = in[j] - cstart; 5103 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5104 } else if (in[j] < 0) continue; 5105 #if defined(PETSC_USE_DEBUG) 5106 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5107 #endif 5108 else { 5109 if (mat->was_assembled) { 5110 if (!aij->colmap) { 5111 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5112 } 5113 #if defined (PETSC_USE_CTABLE) 5114 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5115 col--; 5116 #else 5117 col = aij->colmap[in[j]] - 1; 5118 #endif 5119 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5120 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5121 col = in[j]; 5122 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5123 B = aij->B; 5124 b = (Mat_SeqAIJ*)B->data; 5125 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5126 rp2 = bj + bi[row]; 5127 ap2 = ba + bi[row]; 5128 rmax2 = bimax[row]; 5129 nrow2 = bilen[row]; 5130 low2 = 0; 5131 high2 = nrow2; 5132 bm = aij->B->rmap->n; 5133 ba = b->a; 5134 } 5135 } else col = in[j]; 5136 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5137 } 5138 } 5139 } else { 5140 if (!aij->donotstash) { 5141 if (roworiented) { 5142 if (ignorezeroentries && v[i*n] == 0.0) continue; 5143 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5144 } else { 5145 if (ignorezeroentries && v[i] == 0.0) continue; 5146 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5147 } 5148 } 5149 } 5150 }} 5151 PetscFunctionReturnVoid(); 5152 } 5153 EXTERN_C_END 5154 5155