1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap.n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap.n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap.n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap.n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 311 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap.n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 415 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap.range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 642 source = lens + nrecvs; 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree(lens);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 Contributed by: Matthew Knepley 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap.n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultAdd_MPIAIJ" 733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 741 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 PetscTruth merged; 753 754 PetscFunctionBegin; 755 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 756 /* do nondiagonal part */ 757 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 758 if (!merged) { 759 /* send it on its way */ 760 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 761 /* do local part */ 762 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 763 /* receive remote parts: note this assumes the values are not actually */ 764 /* added in yy until the next line, */ 765 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 } else { 767 /* do local part */ 768 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 769 /* send it on its way */ 770 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 /* values actually were received in the Begin() but we need to call this nop */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 EXTERN_C_BEGIN 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 781 { 782 MPI_Comm comm; 783 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 784 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 785 IS Me,Notme; 786 PetscErrorCode ierr; 787 PetscInt M,N,first,last,*notme,i; 788 PetscMPIInt size; 789 790 PetscFunctionBegin; 791 792 /* Easy test: symmetric diagonal block */ 793 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 794 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 795 if (!*f) PetscFunctionReturn(0); 796 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 798 if (size == 1) PetscFunctionReturn(0); 799 800 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 801 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 802 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 803 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 804 for (i=0; i<first; i++) notme[i] = i; 805 for (i=last; i<M; i++) notme[i-last+first] = i; 806 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 807 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 808 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 809 Aoff = Aoffs[0]; 810 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 811 Boff = Boffs[0]; 812 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 813 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 815 ierr = ISDestroy(Me);CHKERRQ(ierr); 816 ierr = ISDestroy(Notme);CHKERRQ(ierr); 817 818 PetscFunctionReturn(0); 819 } 820 EXTERN_C_END 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 /* do nondiagonal part */ 831 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 832 /* send it on its way */ 833 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 834 /* do local part */ 835 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 836 /* receive remote parts */ 837 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 This only works correctly for square matrices where the subblock A->A is the 843 diagonal block 844 */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 848 { 849 PetscErrorCode ierr; 850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 851 852 PetscFunctionBegin; 853 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 854 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 855 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 856 } 857 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatScale_MPIAIJ" 863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 864 { 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 870 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDestroy_MPIAIJ" 876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 877 { 878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 #if defined(PETSC_USE_LOG) 883 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 884 #endif 885 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 886 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 887 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 888 #if defined (PETSC_USE_CTABLE) 889 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 890 #else 891 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 892 #endif 893 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 894 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 895 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 896 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 897 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 898 ierr = PetscFree(aij);CHKERRQ(ierr); 899 900 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPIAIJ_Binary" 913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 914 { 915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 916 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 917 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 920 int fd; 921 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 922 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 923 PetscScalar *column_values; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 927 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 928 nz = A->nz + B->nz; 929 if (!rank) { 930 header[0] = MAT_FILE_COOKIE; 931 header[1] = mat->rmap.N; 932 header[2] = mat->cmap.N; 933 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 936 /* get largest number of rows any processor has */ 937 rlen = mat->rmap.n; 938 range = mat->rmap.range; 939 for (i=1; i<size; i++) { 940 rlen = PetscMax(rlen,range[i+1] - range[i]); 941 } 942 } else { 943 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 944 rlen = mat->rmap.n; 945 } 946 947 /* load up the local row counts */ 948 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 949 for (i=0; i<mat->rmap.n; i++) { 950 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 951 } 952 953 /* store the row lengths to the file */ 954 if (!rank) { 955 MPI_Status status; 956 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 957 for (i=1; i<size; i++) { 958 rlen = range[i+1] - range[i]; 959 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 960 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 961 } 962 } else { 963 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 964 } 965 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 966 967 /* load up the local column indices */ 968 nzmax = nz; /* )th processor needs space a largest processor needs */ 969 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 971 cnt = 0; 972 for (i=0; i<mat->rmap.n; i++) { 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ( (col = garray[B->j[j]]) > cstart) break; 975 column_indices[cnt++] = col; 976 } 977 for (k=A->i[i]; k<A->i[i+1]; k++) { 978 column_indices[cnt++] = A->j[k] + cstart; 979 } 980 for (; j<B->i[i+1]; j++) { 981 column_indices[cnt++] = garray[B->j[j]]; 982 } 983 } 984 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 985 986 /* store the column indices to the file */ 987 if (!rank) { 988 MPI_Status status; 989 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 990 for (i=1; i<size; i++) { 991 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 993 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 995 } 996 } else { 997 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 998 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1001 1002 /* load up the local column values */ 1003 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1004 cnt = 0; 1005 for (i=0; i<mat->rmap.n; i++) { 1006 for (j=B->i[i]; j<B->i[i+1]; j++) { 1007 if ( garray[B->j[j]] > cstart) break; 1008 column_values[cnt++] = B->a[j]; 1009 } 1010 for (k=A->i[i]; k<A->i[i+1]; k++) { 1011 column_values[cnt++] = A->a[k]; 1012 } 1013 for (; j<B->i[i+1]; j++) { 1014 column_values[cnt++] = B->a[j]; 1015 } 1016 } 1017 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1018 1019 /* store the column values to the file */ 1020 if (!rank) { 1021 MPI_Status status; 1022 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 } else { 1030 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1031 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(column_values);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1040 { 1041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1042 PetscErrorCode ierr; 1043 PetscMPIInt rank = aij->rank,size = aij->size; 1044 PetscTruth isdraw,iascii,isbinary; 1045 PetscViewer sviewer; 1046 PetscViewerFormat format; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1052 if (iascii) { 1053 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1054 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1055 MatInfo info; 1056 PetscTruth inodes; 1057 1058 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1059 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1061 if (!inodes) { 1062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1063 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1064 } else { 1065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1066 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1067 } 1068 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1074 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscInt inodecount,inodelimit,*inodes; 1078 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1079 if (inodes) { 1080 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } 1088 } else if (isbinary) { 1089 if (size == 1) { 1090 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1091 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (isdraw) { 1097 PetscDraw draw; 1098 PetscTruth isnull; 1099 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1100 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1101 } 1102 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 /* assemble the entire matrix onto first processor. */ 1108 Mat A; 1109 Mat_SeqAIJ *Aloc; 1110 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1111 MatScalar *a; 1112 1113 if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead"); 1114 1115 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1116 if (!rank) { 1117 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1118 } else { 1119 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1120 } 1121 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1122 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1123 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1125 1126 /* copy over the A part */ 1127 Aloc = (Mat_SeqAIJ*)aij->A->data; 1128 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1129 row = mat->rmap.rstart; 1130 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 1131 for (i=0; i<m; i++) { 1132 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1133 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1134 } 1135 aj = Aloc->j; 1136 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 1137 1138 /* copy over the B part */ 1139 Aloc = (Mat_SeqAIJ*)aij->B->data; 1140 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1141 row = mat->rmap.rstart; 1142 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1143 ct = cols; 1144 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1148 } 1149 ierr = PetscFree(ct);CHKERRQ(ierr); 1150 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1151 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1152 /* 1153 Everyone has to call to draw the matrix since the graphics waits are 1154 synchronized across all processors that share the PetscDraw object 1155 */ 1156 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1157 if (!rank) { 1158 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1159 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1160 } 1161 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1162 ierr = MatDestroy(A);CHKERRQ(ierr); 1163 } 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatView_MPIAIJ" 1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1170 { 1171 PetscErrorCode ierr; 1172 PetscTruth iascii,isdraw,issocket,isbinary; 1173 1174 PetscFunctionBegin; 1175 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1176 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1177 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1178 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1179 if (iascii || isdraw || isbinary || issocket) { 1180 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1181 } else { 1182 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatRelax_MPIAIJ" 1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1190 { 1191 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1192 PetscErrorCode ierr; 1193 Vec bb1; 1194 1195 PetscFunctionBegin; 1196 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1197 1198 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1199 if (flag & SOR_ZERO_INITIAL_GUESS) { 1200 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1201 its--; 1202 } 1203 1204 while (its--) { 1205 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 1208 /* update rhs: bb1 = bb - B*x */ 1209 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1210 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1211 1212 /* local sweep */ 1213 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1214 } 1215 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1216 if (flag & SOR_ZERO_INITIAL_GUESS) { 1217 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1218 its--; 1219 } 1220 while (its--) { 1221 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1222 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 1224 /* update rhs: bb1 = bb - B*x */ 1225 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1226 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1227 1228 /* local sweep */ 1229 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1230 } 1231 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1232 if (flag & SOR_ZERO_INITIAL_GUESS) { 1233 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1234 its--; 1235 } 1236 while (its--) { 1237 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 1240 /* update rhs: bb1 = bb - B*x */ 1241 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1242 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1243 1244 /* local sweep */ 1245 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1246 } 1247 } else { 1248 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1249 } 1250 1251 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatPermute_MPIAIJ" 1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1258 { 1259 MPI_Comm comm,pcomm; 1260 PetscInt first,local_size,nrows,*rows; 1261 int ntids; 1262 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1267 /* make a collective version of 'rowp' */ 1268 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1269 if (pcomm==comm) { 1270 crowp = rowp; 1271 } else { 1272 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1273 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1274 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1275 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1276 } 1277 /* collect the global row permutation and invert it */ 1278 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1279 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1280 if (pcomm!=comm) { 1281 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1282 } 1283 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1284 /* get the local target indices */ 1285 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1286 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1287 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1288 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1289 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1290 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1291 /* the column permutation is so much easier; 1292 make a local version of 'colp' and invert it */ 1293 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1294 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1295 if (ntids==1) { 1296 lcolp = colp; 1297 } else { 1298 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1299 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1300 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1301 } 1302 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1303 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1304 if (ntids>1) { 1305 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1306 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1307 } 1308 /* now we just get the submatrix */ 1309 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1310 /* clean up */ 1311 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1312 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1318 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1319 { 1320 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1321 Mat A = mat->A,B = mat->B; 1322 PetscErrorCode ierr; 1323 PetscReal isend[5],irecv[5]; 1324 1325 PetscFunctionBegin; 1326 info->block_size = 1.0; 1327 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1328 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1329 isend[3] = info->memory; isend[4] = info->mallocs; 1330 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1331 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1332 isend[3] += info->memory; isend[4] += info->mallocs; 1333 if (flag == MAT_LOCAL) { 1334 info->nz_used = isend[0]; 1335 info->nz_allocated = isend[1]; 1336 info->nz_unneeded = isend[2]; 1337 info->memory = isend[3]; 1338 info->mallocs = isend[4]; 1339 } else if (flag == MAT_GLOBAL_MAX) { 1340 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1341 info->nz_used = irecv[0]; 1342 info->nz_allocated = irecv[1]; 1343 info->nz_unneeded = irecv[2]; 1344 info->memory = irecv[3]; 1345 info->mallocs = irecv[4]; 1346 } else if (flag == MAT_GLOBAL_SUM) { 1347 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1348 info->nz_used = irecv[0]; 1349 info->nz_allocated = irecv[1]; 1350 info->nz_unneeded = irecv[2]; 1351 info->memory = irecv[3]; 1352 info->mallocs = irecv[4]; 1353 } 1354 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1355 info->fill_ratio_needed = 0; 1356 info->factor_mallocs = 0; 1357 info->rows_global = (double)matin->rmap.N; 1358 info->columns_global = (double)matin->cmap.N; 1359 info->rows_local = (double)matin->rmap.n; 1360 info->columns_local = (double)matin->cmap.N; 1361 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatSetOption_MPIAIJ" 1367 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1368 { 1369 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1370 PetscErrorCode ierr; 1371 1372 PetscFunctionBegin; 1373 switch (op) { 1374 case MAT_NEW_NONZERO_LOCATIONS: 1375 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1376 case MAT_KEEP_ZEROED_ROWS: 1377 case MAT_NEW_NONZERO_LOCATION_ERR: 1378 case MAT_USE_INODES: 1379 case MAT_IGNORE_ZERO_ENTRIES: 1380 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1381 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1382 break; 1383 case MAT_ROW_ORIENTED: 1384 a->roworiented = flg; 1385 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1386 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1387 break; 1388 case MAT_NEW_DIAGONALS: 1389 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1390 break; 1391 case MAT_IGNORE_OFF_PROC_ENTRIES: 1392 a->donotstash = PETSC_TRUE; 1393 break; 1394 case MAT_SYMMETRIC: 1395 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1396 break; 1397 case MAT_STRUCTURALLY_SYMMETRIC: 1398 case MAT_HERMITIAN: 1399 case MAT_SYMMETRY_ETERNAL: 1400 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1401 break; 1402 default: 1403 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1404 } 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatGetRow_MPIAIJ" 1410 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1411 { 1412 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1413 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1414 PetscErrorCode ierr; 1415 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1416 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1417 PetscInt *cmap,*idx_p; 1418 1419 PetscFunctionBegin; 1420 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1421 mat->getrowactive = PETSC_TRUE; 1422 1423 if (!mat->rowvalues && (idx || v)) { 1424 /* 1425 allocate enough space to hold information from the longest row. 1426 */ 1427 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1428 PetscInt max = 1,tmp; 1429 for (i=0; i<matin->rmap.n; i++) { 1430 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1431 if (max < tmp) { max = tmp; } 1432 } 1433 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1434 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1435 } 1436 1437 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1438 lrow = row - rstart; 1439 1440 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1441 if (!v) {pvA = 0; pvB = 0;} 1442 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1443 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1444 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1445 nztot = nzA + nzB; 1446 1447 cmap = mat->garray; 1448 if (v || idx) { 1449 if (nztot) { 1450 /* Sort by increasing column numbers, assuming A and B already sorted */ 1451 PetscInt imark = -1; 1452 if (v) { 1453 *v = v_p = mat->rowvalues; 1454 for (i=0; i<nzB; i++) { 1455 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1456 else break; 1457 } 1458 imark = i; 1459 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1460 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1461 } 1462 if (idx) { 1463 *idx = idx_p = mat->rowindices; 1464 if (imark > -1) { 1465 for (i=0; i<imark; i++) { 1466 idx_p[i] = cmap[cworkB[i]]; 1467 } 1468 } else { 1469 for (i=0; i<nzB; i++) { 1470 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1471 else break; 1472 } 1473 imark = i; 1474 } 1475 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1476 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1477 } 1478 } else { 1479 if (idx) *idx = 0; 1480 if (v) *v = 0; 1481 } 1482 } 1483 *nz = nztot; 1484 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1485 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1491 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1492 { 1493 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1494 1495 PetscFunctionBegin; 1496 if (!aij->getrowactive) { 1497 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1498 } 1499 aij->getrowactive = PETSC_FALSE; 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatNorm_MPIAIJ" 1505 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1506 { 1507 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1508 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1509 PetscErrorCode ierr; 1510 PetscInt i,j,cstart = mat->cmap.rstart; 1511 PetscReal sum = 0.0; 1512 MatScalar *v; 1513 1514 PetscFunctionBegin; 1515 if (aij->size == 1) { 1516 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1517 } else { 1518 if (type == NORM_FROBENIUS) { 1519 v = amat->a; 1520 for (i=0; i<amat->nz; i++) { 1521 #if defined(PETSC_USE_COMPLEX) 1522 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1523 #else 1524 sum += (*v)*(*v); v++; 1525 #endif 1526 } 1527 v = bmat->a; 1528 for (i=0; i<bmat->nz; i++) { 1529 #if defined(PETSC_USE_COMPLEX) 1530 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1531 #else 1532 sum += (*v)*(*v); v++; 1533 #endif 1534 } 1535 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1536 *norm = sqrt(*norm); 1537 } else if (type == NORM_1) { /* max column norm */ 1538 PetscReal *tmp,*tmp2; 1539 PetscInt *jj,*garray = aij->garray; 1540 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1541 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1542 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1543 *norm = 0.0; 1544 v = amat->a; jj = amat->j; 1545 for (j=0; j<amat->nz; j++) { 1546 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1547 } 1548 v = bmat->a; jj = bmat->j; 1549 for (j=0; j<bmat->nz; j++) { 1550 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1551 } 1552 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1553 for (j=0; j<mat->cmap.N; j++) { 1554 if (tmp2[j] > *norm) *norm = tmp2[j]; 1555 } 1556 ierr = PetscFree(tmp);CHKERRQ(ierr); 1557 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1558 } else if (type == NORM_INFINITY) { /* max row norm */ 1559 PetscReal ntemp = 0.0; 1560 for (j=0; j<aij->A->rmap.n; j++) { 1561 v = amat->a + amat->i[j]; 1562 sum = 0.0; 1563 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1564 sum += PetscAbsScalar(*v); v++; 1565 } 1566 v = bmat->a + bmat->i[j]; 1567 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1568 sum += PetscAbsScalar(*v); v++; 1569 } 1570 if (sum > ntemp) ntemp = sum; 1571 } 1572 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1573 } else { 1574 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1575 } 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatTranspose_MPIAIJ" 1582 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1583 { 1584 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1585 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1586 PetscErrorCode ierr; 1587 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1588 PetscInt cstart=A->cmap.rstart,ncol; 1589 Mat B; 1590 MatScalar *array; 1591 1592 PetscFunctionBegin; 1593 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1594 1595 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1596 ai = Aloc->i; aj = Aloc->j; 1597 bi = Bloc->i; bj = Bloc->j; 1598 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1599 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1600 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1601 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1602 for (i=0; i<ai[ma]; i++){ 1603 d_nnz[aj[i]] ++; 1604 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1605 } 1606 1607 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1608 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1609 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1610 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1611 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1612 } else { 1613 B = *matout; 1614 } 1615 1616 /* copy over the A part */ 1617 array = Aloc->a; 1618 row = A->rmap.rstart; 1619 for (i=0; i<ma; i++) { 1620 ncol = ai[i+1]-ai[i]; 1621 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1622 row++; array += ncol; aj += ncol; 1623 } 1624 aj = Aloc->j; 1625 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1626 1627 /* copy over the B part */ 1628 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1629 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1630 array = Bloc->a; 1631 row = A->rmap.rstart; 1632 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1633 cols_tmp = cols; 1634 for (i=0; i<mb; i++) { 1635 ncol = bi[i+1]-bi[i]; 1636 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1637 row++; array += ncol; cols_tmp += ncol; 1638 } 1639 ierr = PetscFree(cols);CHKERRQ(ierr); 1640 1641 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1642 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1643 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1644 *matout = B; 1645 } else { 1646 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1647 } 1648 PetscFunctionReturn(0); 1649 } 1650 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1653 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1654 { 1655 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1656 Mat a = aij->A,b = aij->B; 1657 PetscErrorCode ierr; 1658 PetscInt s1,s2,s3; 1659 1660 PetscFunctionBegin; 1661 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1662 if (rr) { 1663 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1664 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1665 /* Overlap communication with computation. */ 1666 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 } 1668 if (ll) { 1669 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1670 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1671 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1672 } 1673 /* scale the diagonal block */ 1674 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1675 1676 if (rr) { 1677 /* Do a scatter end and then right scale the off-diagonal block */ 1678 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1680 } 1681 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1687 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1688 { 1689 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1690 PetscErrorCode ierr; 1691 1692 PetscFunctionBegin; 1693 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1694 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1699 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1700 { 1701 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1702 PetscErrorCode ierr; 1703 1704 PetscFunctionBegin; 1705 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatEqual_MPIAIJ" 1711 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1712 { 1713 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1714 Mat a,b,c,d; 1715 PetscTruth flg; 1716 PetscErrorCode ierr; 1717 1718 PetscFunctionBegin; 1719 a = matA->A; b = matA->B; 1720 c = matB->A; d = matB->B; 1721 1722 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1723 if (flg) { 1724 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1725 } 1726 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatCopy_MPIAIJ" 1732 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1733 { 1734 PetscErrorCode ierr; 1735 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1736 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1737 1738 PetscFunctionBegin; 1739 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1740 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1741 /* because of the column compression in the off-processor part of the matrix a->B, 1742 the number of columns in a->B and b->B may be different, hence we cannot call 1743 the MatCopy() directly on the two parts. If need be, we can provide a more 1744 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1745 then copying the submatrices */ 1746 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1747 } else { 1748 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1749 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1750 } 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1756 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1757 { 1758 PetscErrorCode ierr; 1759 1760 PetscFunctionBegin; 1761 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1762 PetscFunctionReturn(0); 1763 } 1764 1765 #include "petscblaslapack.h" 1766 #undef __FUNCT__ 1767 #define __FUNCT__ "MatAXPY_MPIAIJ" 1768 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1769 { 1770 PetscErrorCode ierr; 1771 PetscInt i; 1772 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1773 PetscBLASInt bnz,one=1; 1774 Mat_SeqAIJ *x,*y; 1775 1776 PetscFunctionBegin; 1777 if (str == SAME_NONZERO_PATTERN) { 1778 PetscScalar alpha = a; 1779 x = (Mat_SeqAIJ *)xx->A->data; 1780 y = (Mat_SeqAIJ *)yy->A->data; 1781 bnz = PetscBLASIntCast(x->nz); 1782 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1783 x = (Mat_SeqAIJ *)xx->B->data; 1784 y = (Mat_SeqAIJ *)yy->B->data; 1785 bnz = PetscBLASIntCast(x->nz); 1786 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1787 } else if (str == SUBSET_NONZERO_PATTERN) { 1788 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1789 1790 x = (Mat_SeqAIJ *)xx->B->data; 1791 y = (Mat_SeqAIJ *)yy->B->data; 1792 if (y->xtoy && y->XtoY != xx->B) { 1793 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1794 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1795 } 1796 if (!y->xtoy) { /* get xtoy */ 1797 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1798 y->XtoY = xx->B; 1799 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1800 } 1801 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1802 } else { 1803 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1804 } 1805 PetscFunctionReturn(0); 1806 } 1807 1808 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatConjugate_MPIAIJ" 1812 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1813 { 1814 #if defined(PETSC_USE_COMPLEX) 1815 PetscErrorCode ierr; 1816 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1817 1818 PetscFunctionBegin; 1819 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1820 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1821 #else 1822 PetscFunctionBegin; 1823 #endif 1824 PetscFunctionReturn(0); 1825 } 1826 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "MatRealPart_MPIAIJ" 1829 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1830 { 1831 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1832 PetscErrorCode ierr; 1833 1834 PetscFunctionBegin; 1835 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1836 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1837 PetscFunctionReturn(0); 1838 } 1839 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1842 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1843 { 1844 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1845 PetscErrorCode ierr; 1846 1847 PetscFunctionBegin; 1848 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1849 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1850 PetscFunctionReturn(0); 1851 } 1852 1853 #ifdef PETSC_HAVE_PBGL 1854 1855 #include <boost/parallel/mpi/bsp_process_group.hpp> 1856 #include <boost/graph/distributed/ilu_default_graph.hpp> 1857 #include <boost/graph/distributed/ilu_0_block.hpp> 1858 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1859 #include <boost/graph/distributed/petsc/interface.hpp> 1860 #include <boost/multi_array.hpp> 1861 #include <boost/parallel/distributed_property_map.hpp> 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1865 /* 1866 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1867 */ 1868 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1869 { 1870 namespace petsc = boost::distributed::petsc; 1871 1872 namespace graph_dist = boost::graph::distributed; 1873 using boost::graph::distributed::ilu_default::process_group_type; 1874 using boost::graph::ilu_permuted; 1875 1876 PetscTruth row_identity, col_identity; 1877 PetscContainer c; 1878 PetscInt m, n, M, N; 1879 PetscErrorCode ierr; 1880 1881 PetscFunctionBegin; 1882 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1883 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1884 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1885 if (!row_identity || !col_identity) { 1886 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1887 } 1888 1889 process_group_type pg; 1890 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1891 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1892 lgraph_type& level_graph = *lgraph_p; 1893 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1894 1895 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1896 ilu_permuted(level_graph); 1897 1898 /* put together the new matrix */ 1899 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1900 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1901 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1902 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1903 ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1904 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1905 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1906 (*fact)->factor = MAT_FACTOR_LU; 1907 1908 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1909 ierr = PetscContainerSetPointer(c, lgraph_p); 1910 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1916 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1917 { 1918 PetscFunctionBegin; 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatSolve_MPIAIJ" 1924 /* 1925 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1926 */ 1927 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1928 { 1929 namespace graph_dist = boost::graph::distributed; 1930 1931 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1932 lgraph_type* lgraph_p; 1933 PetscContainer c; 1934 PetscErrorCode ierr; 1935 1936 PetscFunctionBegin; 1937 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1938 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1939 ierr = VecCopy(b, x); CHKERRQ(ierr); 1940 1941 PetscScalar* array_x; 1942 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1943 PetscInt sx; 1944 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1945 1946 PetscScalar* array_b; 1947 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1948 PetscInt sb; 1949 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1950 1951 lgraph_type& level_graph = *lgraph_p; 1952 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1953 1954 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1955 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1956 ref_x(array_x, boost::extents[num_vertices(graph)]); 1957 1958 typedef boost::iterator_property_map<array_ref_type::iterator, 1959 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1960 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1961 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1962 1963 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1964 1965 PetscFunctionReturn(0); 1966 } 1967 #endif 1968 1969 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1970 PetscInt nzlocal,nsends,nrecvs; 1971 PetscMPIInt *send_rank; 1972 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1973 PetscScalar *sbuf_a,**rbuf_a; 1974 PetscErrorCode (*MatDestroy)(Mat); 1975 } Mat_Redundant; 1976 1977 #undef __FUNCT__ 1978 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1979 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1980 { 1981 PetscErrorCode ierr; 1982 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1983 PetscInt i; 1984 1985 PetscFunctionBegin; 1986 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1987 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1988 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1989 for (i=0; i<redund->nrecvs; i++){ 1990 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1991 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1992 } 1993 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1994 ierr = PetscFree(redund);CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatDestroy_MatRedundant" 2000 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2001 { 2002 PetscErrorCode ierr; 2003 PetscContainer container; 2004 Mat_Redundant *redund=PETSC_NULL; 2005 2006 PetscFunctionBegin; 2007 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2008 if (container) { 2009 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2010 } else { 2011 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2012 } 2013 A->ops->destroy = redund->MatDestroy; 2014 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2015 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2016 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2017 PetscFunctionReturn(0); 2018 } 2019 2020 #undef __FUNCT__ 2021 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2022 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2023 { 2024 PetscMPIInt rank,size; 2025 MPI_Comm comm=((PetscObject)mat)->comm; 2026 PetscErrorCode ierr; 2027 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2028 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2029 PetscInt *rowrange=mat->rmap.range; 2030 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2031 Mat A=aij->A,B=aij->B,C=*matredundant; 2032 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2033 PetscScalar *sbuf_a; 2034 PetscInt nzlocal=a->nz+b->nz; 2035 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2036 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 2037 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2038 MatScalar *aworkA,*aworkB; 2039 PetscScalar *vals; 2040 PetscMPIInt tag1,tag2,tag3,imdex; 2041 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2042 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2043 MPI_Status recv_status,*send_status; 2044 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2045 PetscInt **rbuf_j=PETSC_NULL; 2046 PetscScalar **rbuf_a=PETSC_NULL; 2047 Mat_Redundant *redund=PETSC_NULL; 2048 PetscContainer container; 2049 2050 PetscFunctionBegin; 2051 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2052 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2053 2054 if (reuse == MAT_REUSE_MATRIX) { 2055 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2056 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2057 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2058 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2059 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2060 if (container) { 2061 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2062 } else { 2063 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2064 } 2065 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2066 2067 nsends = redund->nsends; 2068 nrecvs = redund->nrecvs; 2069 send_rank = redund->send_rank; recv_rank = send_rank + size; 2070 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2071 sbuf_j = redund->sbuf_j; 2072 sbuf_a = redund->sbuf_a; 2073 rbuf_j = redund->rbuf_j; 2074 rbuf_a = redund->rbuf_a; 2075 } 2076 2077 if (reuse == MAT_INITIAL_MATRIX){ 2078 PetscMPIInt subrank,subsize; 2079 PetscInt nleftover,np_subcomm; 2080 /* get the destination processors' id send_rank, nsends and nrecvs */ 2081 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2082 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2083 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2084 recv_rank = send_rank + size; 2085 np_subcomm = size/nsubcomm; 2086 nleftover = size - nsubcomm*np_subcomm; 2087 nsends = 0; nrecvs = 0; 2088 for (i=0; i<size; i++){ /* i=rank*/ 2089 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2090 send_rank[nsends] = i; nsends++; 2091 recv_rank[nrecvs++] = i; 2092 } 2093 } 2094 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2095 i = size-nleftover-1; 2096 j = 0; 2097 while (j < nsubcomm - nleftover){ 2098 send_rank[nsends++] = i; 2099 i--; j++; 2100 } 2101 } 2102 2103 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2104 for (i=0; i<nleftover; i++){ 2105 recv_rank[nrecvs++] = size-nleftover+i; 2106 } 2107 } 2108 2109 /* allocate sbuf_j, sbuf_a */ 2110 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2111 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2112 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2113 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2114 2115 /* copy mat's local entries into the buffers */ 2116 if (reuse == MAT_INITIAL_MATRIX){ 2117 rownz_max = 0; 2118 rptr = sbuf_j; 2119 cols = sbuf_j + rend-rstart + 1; 2120 vals = sbuf_a; 2121 rptr[0] = 0; 2122 for (i=0; i<rend-rstart; i++){ 2123 row = i + rstart; 2124 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2125 ncols = nzA + nzB; 2126 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2127 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2128 /* load the column indices for this row into cols */ 2129 lwrite = 0; 2130 for (l=0; l<nzB; l++) { 2131 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2132 vals[lwrite] = aworkB[l]; 2133 cols[lwrite++] = ctmp; 2134 } 2135 } 2136 for (l=0; l<nzA; l++){ 2137 vals[lwrite] = aworkA[l]; 2138 cols[lwrite++] = cstart + cworkA[l]; 2139 } 2140 for (l=0; l<nzB; l++) { 2141 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2142 vals[lwrite] = aworkB[l]; 2143 cols[lwrite++] = ctmp; 2144 } 2145 } 2146 vals += ncols; 2147 cols += ncols; 2148 rptr[i+1] = rptr[i] + ncols; 2149 if (rownz_max < ncols) rownz_max = ncols; 2150 } 2151 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2152 } else { /* only copy matrix values into sbuf_a */ 2153 rptr = sbuf_j; 2154 vals = sbuf_a; 2155 rptr[0] = 0; 2156 for (i=0; i<rend-rstart; i++){ 2157 row = i + rstart; 2158 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2159 ncols = nzA + nzB; 2160 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2161 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2162 lwrite = 0; 2163 for (l=0; l<nzB; l++) { 2164 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2165 } 2166 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2167 for (l=0; l<nzB; l++) { 2168 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2169 } 2170 vals += ncols; 2171 rptr[i+1] = rptr[i] + ncols; 2172 } 2173 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2174 2175 /* send nzlocal to others, and recv other's nzlocal */ 2176 /*--------------------------------------------------*/ 2177 if (reuse == MAT_INITIAL_MATRIX){ 2178 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2179 s_waits2 = s_waits3 + nsends; 2180 s_waits1 = s_waits2 + nsends; 2181 r_waits1 = s_waits1 + nsends; 2182 r_waits2 = r_waits1 + nrecvs; 2183 r_waits3 = r_waits2 + nrecvs; 2184 } else { 2185 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2186 r_waits3 = s_waits3 + nsends; 2187 } 2188 2189 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2190 if (reuse == MAT_INITIAL_MATRIX){ 2191 /* get new tags to keep the communication clean */ 2192 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2193 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2194 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2195 rbuf_nz = sbuf_nz + nsends; 2196 2197 /* post receives of other's nzlocal */ 2198 for (i=0; i<nrecvs; i++){ 2199 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2200 } 2201 /* send nzlocal to others */ 2202 for (i=0; i<nsends; i++){ 2203 sbuf_nz[i] = nzlocal; 2204 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2205 } 2206 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2207 count = nrecvs; 2208 while (count) { 2209 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2210 recv_rank[imdex] = recv_status.MPI_SOURCE; 2211 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2212 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2213 2214 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2215 rbuf_nz[imdex] += i + 2; 2216 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2217 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2218 count--; 2219 } 2220 /* wait on sends of nzlocal */ 2221 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2222 /* send mat->i,j to others, and recv from other's */ 2223 /*------------------------------------------------*/ 2224 for (i=0; i<nsends; i++){ 2225 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2226 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2227 } 2228 /* wait on receives of mat->i,j */ 2229 /*------------------------------*/ 2230 count = nrecvs; 2231 while (count) { 2232 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2233 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2234 count--; 2235 } 2236 /* wait on sends of mat->i,j */ 2237 /*---------------------------*/ 2238 if (nsends) { 2239 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2240 } 2241 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2242 2243 /* post receives, send and receive mat->a */ 2244 /*----------------------------------------*/ 2245 for (imdex=0; imdex<nrecvs; imdex++) { 2246 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2247 } 2248 for (i=0; i<nsends; i++){ 2249 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2250 } 2251 count = nrecvs; 2252 while (count) { 2253 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2254 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2255 count--; 2256 } 2257 if (nsends) { 2258 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2259 } 2260 2261 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2262 2263 /* create redundant matrix */ 2264 /*-------------------------*/ 2265 if (reuse == MAT_INITIAL_MATRIX){ 2266 /* compute rownz_max for preallocation */ 2267 for (imdex=0; imdex<nrecvs; imdex++){ 2268 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2269 rptr = rbuf_j[imdex]; 2270 for (i=0; i<j; i++){ 2271 ncols = rptr[i+1] - rptr[i]; 2272 if (rownz_max < ncols) rownz_max = ncols; 2273 } 2274 } 2275 2276 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2277 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2278 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2279 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2280 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2281 } else { 2282 C = *matredundant; 2283 } 2284 2285 /* insert local matrix entries */ 2286 rptr = sbuf_j; 2287 cols = sbuf_j + rend-rstart + 1; 2288 vals = sbuf_a; 2289 for (i=0; i<rend-rstart; i++){ 2290 row = i + rstart; 2291 ncols = rptr[i+1] - rptr[i]; 2292 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2293 vals += ncols; 2294 cols += ncols; 2295 } 2296 /* insert received matrix entries */ 2297 for (imdex=0; imdex<nrecvs; imdex++){ 2298 rstart = rowrange[recv_rank[imdex]]; 2299 rend = rowrange[recv_rank[imdex]+1]; 2300 rptr = rbuf_j[imdex]; 2301 cols = rbuf_j[imdex] + rend-rstart + 1; 2302 vals = rbuf_a[imdex]; 2303 for (i=0; i<rend-rstart; i++){ 2304 row = i + rstart; 2305 ncols = rptr[i+1] - rptr[i]; 2306 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2307 vals += ncols; 2308 cols += ncols; 2309 } 2310 } 2311 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2312 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2313 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2314 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2315 if (reuse == MAT_INITIAL_MATRIX){ 2316 PetscContainer container; 2317 *matredundant = C; 2318 /* create a supporting struct and attach it to C for reuse */ 2319 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2320 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2321 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2322 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2323 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2324 2325 redund->nzlocal = nzlocal; 2326 redund->nsends = nsends; 2327 redund->nrecvs = nrecvs; 2328 redund->send_rank = send_rank; 2329 redund->sbuf_nz = sbuf_nz; 2330 redund->sbuf_j = sbuf_j; 2331 redund->sbuf_a = sbuf_a; 2332 redund->rbuf_j = rbuf_j; 2333 redund->rbuf_a = rbuf_a; 2334 2335 redund->MatDestroy = C->ops->destroy; 2336 C->ops->destroy = MatDestroy_MatRedundant; 2337 } 2338 PetscFunctionReturn(0); 2339 } 2340 2341 #undef __FUNCT__ 2342 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2343 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2344 { 2345 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2346 PetscErrorCode ierr; 2347 PetscInt i,*idxb = 0; 2348 PetscScalar *va,*vb; 2349 Vec vtmp; 2350 2351 PetscFunctionBegin; 2352 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2353 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2354 if (idx) { 2355 for (i=0; i<A->cmap.n; i++) { 2356 if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart; 2357 } 2358 } 2359 2360 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 2361 if (idx) { 2362 ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2363 } 2364 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2365 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2366 2367 for (i=0; i<A->rmap.n; i++){ 2368 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2369 va[i] = vb[i]; 2370 if (idx) idx[i] = a->garray[idxb[i]]; 2371 } 2372 } 2373 2374 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2375 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2376 if (idxb) { 2377 ierr = PetscFree(idxb);CHKERRQ(ierr); 2378 } 2379 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2385 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2386 { 2387 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2388 PetscInt n = A->rmap.n; 2389 PetscInt cstart = A->cmap.rstart; 2390 PetscInt *cmap = mat->garray; 2391 PetscInt *diagIdx, *offdiagIdx; 2392 Vec diagV, offdiagV; 2393 PetscScalar *a, *diagA, *offdiagA; 2394 PetscInt r; 2395 PetscErrorCode ierr; 2396 2397 PetscFunctionBegin; 2398 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2399 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2400 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2401 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2402 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2403 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2404 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2405 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2406 for(r = 0; r < n; ++r) { 2407 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2408 a[r] = diagA[r]; 2409 idx[r] = cstart + diagIdx[r]; 2410 } else { 2411 a[r] = offdiagA[r]; 2412 idx[r] = cmap[offdiagIdx[r]]; 2413 } 2414 } 2415 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2416 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2417 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2418 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2419 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2420 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2421 PetscFunctionReturn(0); 2422 } 2423 2424 #undef __FUNCT__ 2425 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2426 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2427 { 2428 PetscErrorCode ierr; 2429 2430 PetscFunctionBegin; 2431 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2432 PetscFunctionReturn(0); 2433 } 2434 2435 /* -------------------------------------------------------------------*/ 2436 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2437 MatGetRow_MPIAIJ, 2438 MatRestoreRow_MPIAIJ, 2439 MatMult_MPIAIJ, 2440 /* 4*/ MatMultAdd_MPIAIJ, 2441 MatMultTranspose_MPIAIJ, 2442 MatMultTransposeAdd_MPIAIJ, 2443 #ifdef PETSC_HAVE_PBGL 2444 MatSolve_MPIAIJ, 2445 #else 2446 0, 2447 #endif 2448 0, 2449 0, 2450 /*10*/ 0, 2451 0, 2452 0, 2453 MatRelax_MPIAIJ, 2454 MatTranspose_MPIAIJ, 2455 /*15*/ MatGetInfo_MPIAIJ, 2456 MatEqual_MPIAIJ, 2457 MatGetDiagonal_MPIAIJ, 2458 MatDiagonalScale_MPIAIJ, 2459 MatNorm_MPIAIJ, 2460 /*20*/ MatAssemblyBegin_MPIAIJ, 2461 MatAssemblyEnd_MPIAIJ, 2462 0, 2463 MatSetOption_MPIAIJ, 2464 MatZeroEntries_MPIAIJ, 2465 /*25*/ MatZeroRows_MPIAIJ, 2466 0, 2467 #ifdef PETSC_HAVE_PBGL 2468 MatLUFactorNumeric_MPIAIJ, 2469 #else 2470 0, 2471 #endif 2472 0, 2473 0, 2474 /*30*/ MatSetUpPreallocation_MPIAIJ, 2475 #ifdef PETSC_HAVE_PBGL 2476 MatILUFactorSymbolic_MPIAIJ, 2477 #else 2478 0, 2479 #endif 2480 0, 2481 0, 2482 0, 2483 /*35*/ MatDuplicate_MPIAIJ, 2484 0, 2485 0, 2486 0, 2487 0, 2488 /*40*/ MatAXPY_MPIAIJ, 2489 MatGetSubMatrices_MPIAIJ, 2490 MatIncreaseOverlap_MPIAIJ, 2491 MatGetValues_MPIAIJ, 2492 MatCopy_MPIAIJ, 2493 /*45*/ 0, 2494 MatScale_MPIAIJ, 2495 0, 2496 0, 2497 0, 2498 /*50*/ MatSetBlockSize_MPIAIJ, 2499 0, 2500 0, 2501 0, 2502 0, 2503 /*55*/ MatFDColoringCreate_MPIAIJ, 2504 0, 2505 MatSetUnfactored_MPIAIJ, 2506 MatPermute_MPIAIJ, 2507 0, 2508 /*60*/ MatGetSubMatrix_MPIAIJ, 2509 MatDestroy_MPIAIJ, 2510 MatView_MPIAIJ, 2511 0, 2512 0, 2513 /*65*/ 0, 2514 0, 2515 0, 2516 0, 2517 0, 2518 /*70*/ MatGetRowMaxAbs_MPIAIJ, 2519 0, 2520 MatSetColoring_MPIAIJ, 2521 #if defined(PETSC_HAVE_ADIC) 2522 MatSetValuesAdic_MPIAIJ, 2523 #else 2524 0, 2525 #endif 2526 MatSetValuesAdifor_MPIAIJ, 2527 /*75*/ 0, 2528 0, 2529 0, 2530 0, 2531 0, 2532 /*80*/ 0, 2533 0, 2534 0, 2535 0, 2536 /*84*/ MatLoad_MPIAIJ, 2537 0, 2538 0, 2539 0, 2540 0, 2541 0, 2542 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2543 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2544 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2545 MatPtAP_Basic, 2546 MatPtAPSymbolic_MPIAIJ, 2547 /*95*/ MatPtAPNumeric_MPIAIJ, 2548 0, 2549 0, 2550 0, 2551 0, 2552 /*100*/0, 2553 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2554 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2555 MatConjugate_MPIAIJ, 2556 0, 2557 /*105*/MatSetValuesRow_MPIAIJ, 2558 MatRealPart_MPIAIJ, 2559 MatImaginaryPart_MPIAIJ, 2560 0, 2561 0, 2562 /*110*/0, 2563 MatGetRedundantMatrix_MPIAIJ, 2564 MatGetRowMin_MPIAIJ, 2565 0, 2566 0, 2567 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2568 2569 /* ----------------------------------------------------------------------------------------*/ 2570 2571 EXTERN_C_BEGIN 2572 #undef __FUNCT__ 2573 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2574 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2575 { 2576 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2577 PetscErrorCode ierr; 2578 2579 PetscFunctionBegin; 2580 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2581 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2582 PetscFunctionReturn(0); 2583 } 2584 EXTERN_C_END 2585 2586 EXTERN_C_BEGIN 2587 #undef __FUNCT__ 2588 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2589 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2590 { 2591 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2592 PetscErrorCode ierr; 2593 2594 PetscFunctionBegin; 2595 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2596 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2597 PetscFunctionReturn(0); 2598 } 2599 EXTERN_C_END 2600 2601 #include "petscpc.h" 2602 EXTERN_C_BEGIN 2603 #undef __FUNCT__ 2604 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2605 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2606 { 2607 Mat_MPIAIJ *b; 2608 PetscErrorCode ierr; 2609 PetscInt i; 2610 2611 PetscFunctionBegin; 2612 B->preallocated = PETSC_TRUE; 2613 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2614 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2615 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2616 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2617 2618 B->rmap.bs = B->cmap.bs = 1; 2619 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2620 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2621 if (d_nnz) { 2622 for (i=0; i<B->rmap.n; i++) { 2623 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2624 } 2625 } 2626 if (o_nnz) { 2627 for (i=0; i<B->rmap.n; i++) { 2628 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2629 } 2630 } 2631 b = (Mat_MPIAIJ*)B->data; 2632 2633 /* Explicitly create 2 MATSEQAIJ matrices. */ 2634 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2635 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2636 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2637 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2638 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2639 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2640 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2641 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2642 2643 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2644 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2645 2646 PetscFunctionReturn(0); 2647 } 2648 EXTERN_C_END 2649 2650 #undef __FUNCT__ 2651 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2652 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2653 { 2654 Mat mat; 2655 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2656 PetscErrorCode ierr; 2657 2658 PetscFunctionBegin; 2659 *newmat = 0; 2660 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2661 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2662 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2663 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2664 a = (Mat_MPIAIJ*)mat->data; 2665 2666 mat->factor = matin->factor; 2667 mat->rmap.bs = matin->rmap.bs; 2668 mat->assembled = PETSC_TRUE; 2669 mat->insertmode = NOT_SET_VALUES; 2670 mat->preallocated = PETSC_TRUE; 2671 2672 a->size = oldmat->size; 2673 a->rank = oldmat->rank; 2674 a->donotstash = oldmat->donotstash; 2675 a->roworiented = oldmat->roworiented; 2676 a->rowindices = 0; 2677 a->rowvalues = 0; 2678 a->getrowactive = PETSC_FALSE; 2679 2680 ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2681 ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2682 2683 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2684 if (oldmat->colmap) { 2685 #if defined (PETSC_USE_CTABLE) 2686 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2687 #else 2688 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2689 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2690 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2691 #endif 2692 } else a->colmap = 0; 2693 if (oldmat->garray) { 2694 PetscInt len; 2695 len = oldmat->B->cmap.n; 2696 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2697 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2698 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2699 } else a->garray = 0; 2700 2701 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2702 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2703 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2704 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2705 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2706 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2707 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2708 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2709 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2710 *newmat = mat; 2711 PetscFunctionReturn(0); 2712 } 2713 2714 #include "petscsys.h" 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatLoad_MPIAIJ" 2718 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2719 { 2720 Mat A; 2721 PetscScalar *vals,*svals; 2722 MPI_Comm comm = ((PetscObject)viewer)->comm; 2723 MPI_Status status; 2724 PetscErrorCode ierr; 2725 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2726 PetscInt i,nz,j,rstart,rend,mmax; 2727 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2728 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2729 PetscInt cend,cstart,n,*rowners; 2730 int fd; 2731 2732 PetscFunctionBegin; 2733 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2734 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2735 if (!rank) { 2736 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2737 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2738 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2739 } 2740 2741 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2742 M = header[1]; N = header[2]; 2743 /* determine ownership of all rows */ 2744 m = M/size + ((M % size) > rank); 2745 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2746 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2747 2748 /* First process needs enough room for process with most rows */ 2749 if (!rank) { 2750 mmax = rowners[1]; 2751 for (i=2; i<size; i++) { 2752 mmax = PetscMax(mmax,rowners[i]); 2753 } 2754 } else mmax = m; 2755 2756 rowners[0] = 0; 2757 for (i=2; i<=size; i++) { 2758 rowners[i] += rowners[i-1]; 2759 } 2760 rstart = rowners[rank]; 2761 rend = rowners[rank+1]; 2762 2763 /* distribute row lengths to all processors */ 2764 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2765 if (!rank) { 2766 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2767 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2768 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2769 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2770 for (j=0; j<m; j++) { 2771 procsnz[0] += ourlens[j]; 2772 } 2773 for (i=1; i<size; i++) { 2774 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2775 /* calculate the number of nonzeros on each processor */ 2776 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2777 procsnz[i] += rowlengths[j]; 2778 } 2779 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2780 } 2781 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2782 } else { 2783 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2784 } 2785 2786 if (!rank) { 2787 /* determine max buffer needed and allocate it */ 2788 maxnz = 0; 2789 for (i=0; i<size; i++) { 2790 maxnz = PetscMax(maxnz,procsnz[i]); 2791 } 2792 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2793 2794 /* read in my part of the matrix column indices */ 2795 nz = procsnz[0]; 2796 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2797 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2798 2799 /* read in every one elses and ship off */ 2800 for (i=1; i<size; i++) { 2801 nz = procsnz[i]; 2802 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2803 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2804 } 2805 ierr = PetscFree(cols);CHKERRQ(ierr); 2806 } else { 2807 /* determine buffer space needed for message */ 2808 nz = 0; 2809 for (i=0; i<m; i++) { 2810 nz += ourlens[i]; 2811 } 2812 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2813 2814 /* receive message of column indices*/ 2815 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2816 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2817 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2818 } 2819 2820 /* determine column ownership if matrix is not square */ 2821 if (N != M) { 2822 n = N/size + ((N % size) > rank); 2823 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2824 cstart = cend - n; 2825 } else { 2826 cstart = rstart; 2827 cend = rend; 2828 n = cend - cstart; 2829 } 2830 2831 /* loop over local rows, determining number of off diagonal entries */ 2832 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2833 jj = 0; 2834 for (i=0; i<m; i++) { 2835 for (j=0; j<ourlens[i]; j++) { 2836 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2837 jj++; 2838 } 2839 } 2840 2841 /* create our matrix */ 2842 for (i=0; i<m; i++) { 2843 ourlens[i] -= offlens[i]; 2844 } 2845 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2846 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2847 ierr = MatSetType(A,type);CHKERRQ(ierr); 2848 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2849 2850 for (i=0; i<m; i++) { 2851 ourlens[i] += offlens[i]; 2852 } 2853 2854 if (!rank) { 2855 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2856 2857 /* read in my part of the matrix numerical values */ 2858 nz = procsnz[0]; 2859 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2860 2861 /* insert into matrix */ 2862 jj = rstart; 2863 smycols = mycols; 2864 svals = vals; 2865 for (i=0; i<m; i++) { 2866 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2867 smycols += ourlens[i]; 2868 svals += ourlens[i]; 2869 jj++; 2870 } 2871 2872 /* read in other processors and ship out */ 2873 for (i=1; i<size; i++) { 2874 nz = procsnz[i]; 2875 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2876 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2877 } 2878 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2879 } else { 2880 /* receive numeric values */ 2881 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2882 2883 /* receive message of values*/ 2884 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2885 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2886 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2887 2888 /* insert into matrix */ 2889 jj = rstart; 2890 smycols = mycols; 2891 svals = vals; 2892 for (i=0; i<m; i++) { 2893 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2894 smycols += ourlens[i]; 2895 svals += ourlens[i]; 2896 jj++; 2897 } 2898 } 2899 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2900 ierr = PetscFree(vals);CHKERRQ(ierr); 2901 ierr = PetscFree(mycols);CHKERRQ(ierr); 2902 ierr = PetscFree(rowners);CHKERRQ(ierr); 2903 2904 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2905 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2906 *newmat = A; 2907 PetscFunctionReturn(0); 2908 } 2909 2910 #undef __FUNCT__ 2911 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2912 /* 2913 Not great since it makes two copies of the submatrix, first an SeqAIJ 2914 in local and then by concatenating the local matrices the end result. 2915 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2916 */ 2917 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2918 { 2919 PetscErrorCode ierr; 2920 PetscMPIInt rank,size; 2921 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2922 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2923 Mat *local,M,Mreuse; 2924 MatScalar *vwork,*aa; 2925 MPI_Comm comm = ((PetscObject)mat)->comm; 2926 Mat_SeqAIJ *aij; 2927 2928 2929 PetscFunctionBegin; 2930 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2931 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2932 2933 if (call == MAT_REUSE_MATRIX) { 2934 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2935 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2936 local = &Mreuse; 2937 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2938 } else { 2939 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2940 Mreuse = *local; 2941 ierr = PetscFree(local);CHKERRQ(ierr); 2942 } 2943 2944 /* 2945 m - number of local rows 2946 n - number of columns (same on all processors) 2947 rstart - first row in new global matrix generated 2948 */ 2949 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2950 if (call == MAT_INITIAL_MATRIX) { 2951 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2952 ii = aij->i; 2953 jj = aij->j; 2954 2955 /* 2956 Determine the number of non-zeros in the diagonal and off-diagonal 2957 portions of the matrix in order to do correct preallocation 2958 */ 2959 2960 /* first get start and end of "diagonal" columns */ 2961 if (csize == PETSC_DECIDE) { 2962 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2963 if (mglobal == n) { /* square matrix */ 2964 nlocal = m; 2965 } else { 2966 nlocal = n/size + ((n % size) > rank); 2967 } 2968 } else { 2969 nlocal = csize; 2970 } 2971 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2972 rstart = rend - nlocal; 2973 if (rank == size - 1 && rend != n) { 2974 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2975 } 2976 2977 /* next, compute all the lengths */ 2978 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2979 olens = dlens + m; 2980 for (i=0; i<m; i++) { 2981 jend = ii[i+1] - ii[i]; 2982 olen = 0; 2983 dlen = 0; 2984 for (j=0; j<jend; j++) { 2985 if (*jj < rstart || *jj >= rend) olen++; 2986 else dlen++; 2987 jj++; 2988 } 2989 olens[i] = olen; 2990 dlens[i] = dlen; 2991 } 2992 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2993 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2994 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2995 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2996 ierr = PetscFree(dlens);CHKERRQ(ierr); 2997 } else { 2998 PetscInt ml,nl; 2999 3000 M = *newmat; 3001 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3002 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3003 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3004 /* 3005 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3006 rather than the slower MatSetValues(). 3007 */ 3008 M->was_assembled = PETSC_TRUE; 3009 M->assembled = PETSC_FALSE; 3010 } 3011 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3012 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3013 ii = aij->i; 3014 jj = aij->j; 3015 aa = aij->a; 3016 for (i=0; i<m; i++) { 3017 row = rstart + i; 3018 nz = ii[i+1] - ii[i]; 3019 cwork = jj; jj += nz; 3020 vwork = aa; aa += nz; 3021 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3022 } 3023 3024 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3025 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3026 *newmat = M; 3027 3028 /* save submatrix used in processor for next request */ 3029 if (call == MAT_INITIAL_MATRIX) { 3030 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3031 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3032 } 3033 3034 PetscFunctionReturn(0); 3035 } 3036 3037 EXTERN_C_BEGIN 3038 #undef __FUNCT__ 3039 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3040 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3041 { 3042 PetscInt m,cstart, cend,j,nnz,i,d; 3043 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3044 const PetscInt *JJ; 3045 PetscScalar *values; 3046 PetscErrorCode ierr; 3047 3048 PetscFunctionBegin; 3049 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3050 3051 B->rmap.bs = B->cmap.bs = 1; 3052 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 3053 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 3054 m = B->rmap.n; 3055 cstart = B->cmap.rstart; 3056 cend = B->cmap.rend; 3057 rstart = B->rmap.rstart; 3058 3059 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3060 o_nnz = d_nnz + m; 3061 3062 #if defined(PETSC_USE_DEBUGGING) 3063 for (i=0; i<m; i++) { 3064 nnz = Ii[i+1]- Ii[i]; 3065 JJ = J + Ii[i]; 3066 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3067 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3068 if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N); 3069 for (j=1; j<nnz; j++) { 3070 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3071 } 3072 } 3073 #endif 3074 3075 for (i=0; i<m; i++) { 3076 nnz = Ii[i+1]- Ii[i]; 3077 JJ = J + Ii[i]; 3078 nnz_max = PetscMax(nnz_max,nnz); 3079 for (j=0; j<nnz; j++) { 3080 if (*JJ >= cstart) break; 3081 JJ++; 3082 } 3083 d = 0; 3084 for (; j<nnz; j++) { 3085 if (*JJ++ >= cend) break; 3086 d++; 3087 } 3088 d_nnz[i] = d; 3089 o_nnz[i] = nnz - d; 3090 } 3091 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3092 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3093 3094 if (v) values = (PetscScalar*)v; 3095 else { 3096 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3097 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3098 } 3099 3100 for (i=0; i<m; i++) { 3101 ii = i + rstart; 3102 nnz = Ii[i+1]- Ii[i]; 3103 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3104 } 3105 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3106 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3107 3108 if (!v) { 3109 ierr = PetscFree(values);CHKERRQ(ierr); 3110 } 3111 PetscFunctionReturn(0); 3112 } 3113 EXTERN_C_END 3114 3115 #undef __FUNCT__ 3116 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3117 /*@ 3118 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3119 (the default parallel PETSc format). 3120 3121 Collective on MPI_Comm 3122 3123 Input Parameters: 3124 + B - the matrix 3125 . i - the indices into j for the start of each local row (starts with zero) 3126 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3127 - v - optional values in the matrix 3128 3129 Level: developer 3130 3131 Notes: 3132 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3133 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3134 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3135 3136 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3137 3138 The format which is used for the sparse matrix input, is equivalent to a 3139 row-major ordering.. i.e for the following matrix, the input data expected is 3140 as shown: 3141 3142 1 0 0 3143 2 0 3 P0 3144 ------- 3145 4 5 6 P1 3146 3147 Process0 [P0]: rows_owned=[0,1] 3148 i = {0,1,3} [size = nrow+1 = 2+1] 3149 j = {0,0,2} [size = nz = 6] 3150 v = {1,2,3} [size = nz = 6] 3151 3152 Process1 [P1]: rows_owned=[2] 3153 i = {0,3} [size = nrow+1 = 1+1] 3154 j = {0,1,2} [size = nz = 6] 3155 v = {4,5,6} [size = nz = 6] 3156 3157 The column indices for each row MUST be sorted. 3158 3159 .keywords: matrix, aij, compressed row, sparse, parallel 3160 3161 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3162 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3163 @*/ 3164 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3165 { 3166 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3167 3168 PetscFunctionBegin; 3169 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3170 if (f) { 3171 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3172 } 3173 PetscFunctionReturn(0); 3174 } 3175 3176 #undef __FUNCT__ 3177 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3178 /*@C 3179 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3180 (the default parallel PETSc format). For good matrix assembly performance 3181 the user should preallocate the matrix storage by setting the parameters 3182 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3183 performance can be increased by more than a factor of 50. 3184 3185 Collective on MPI_Comm 3186 3187 Input Parameters: 3188 + A - the matrix 3189 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3190 (same value is used for all local rows) 3191 . d_nnz - array containing the number of nonzeros in the various rows of the 3192 DIAGONAL portion of the local submatrix (possibly different for each row) 3193 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3194 The size of this array is equal to the number of local rows, i.e 'm'. 3195 You must leave room for the diagonal entry even if it is zero. 3196 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3197 submatrix (same value is used for all local rows). 3198 - o_nnz - array containing the number of nonzeros in the various rows of the 3199 OFF-DIAGONAL portion of the local submatrix (possibly different for 3200 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3201 structure. The size of this array is equal to the number 3202 of local rows, i.e 'm'. 3203 3204 If the *_nnz parameter is given then the *_nz parameter is ignored 3205 3206 The AIJ format (also called the Yale sparse matrix format or 3207 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3208 storage. The stored row and column indices begin with zero. See the users manual for details. 3209 3210 The parallel matrix is partitioned such that the first m0 rows belong to 3211 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3212 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3213 3214 The DIAGONAL portion of the local submatrix of a processor can be defined 3215 as the submatrix which is obtained by extraction the part corresponding 3216 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3217 first row that belongs to the processor, and r2 is the last row belonging 3218 to the this processor. This is a square mxm matrix. The remaining portion 3219 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3220 3221 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3222 3223 You can call MatGetInfo() to get information on how effective the preallocation was; 3224 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3225 You can also run with the option -info and look for messages with the string 3226 malloc in them to see if additional memory allocation was needed. 3227 3228 Example usage: 3229 3230 Consider the following 8x8 matrix with 34 non-zero values, that is 3231 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3232 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3233 as follows: 3234 3235 .vb 3236 1 2 0 | 0 3 0 | 0 4 3237 Proc0 0 5 6 | 7 0 0 | 8 0 3238 9 0 10 | 11 0 0 | 12 0 3239 ------------------------------------- 3240 13 0 14 | 15 16 17 | 0 0 3241 Proc1 0 18 0 | 19 20 21 | 0 0 3242 0 0 0 | 22 23 0 | 24 0 3243 ------------------------------------- 3244 Proc2 25 26 27 | 0 0 28 | 29 0 3245 30 0 0 | 31 32 33 | 0 34 3246 .ve 3247 3248 This can be represented as a collection of submatrices as: 3249 3250 .vb 3251 A B C 3252 D E F 3253 G H I 3254 .ve 3255 3256 Where the submatrices A,B,C are owned by proc0, D,E,F are 3257 owned by proc1, G,H,I are owned by proc2. 3258 3259 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3260 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3261 The 'M','N' parameters are 8,8, and have the same values on all procs. 3262 3263 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3264 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3265 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3266 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3267 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3268 matrix, ans [DF] as another SeqAIJ matrix. 3269 3270 When d_nz, o_nz parameters are specified, d_nz storage elements are 3271 allocated for every row of the local diagonal submatrix, and o_nz 3272 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3273 One way to choose d_nz and o_nz is to use the max nonzerors per local 3274 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3275 In this case, the values of d_nz,o_nz are: 3276 .vb 3277 proc0 : dnz = 2, o_nz = 2 3278 proc1 : dnz = 3, o_nz = 2 3279 proc2 : dnz = 1, o_nz = 4 3280 .ve 3281 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3282 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3283 for proc3. i.e we are using 12+15+10=37 storage locations to store 3284 34 values. 3285 3286 When d_nnz, o_nnz parameters are specified, the storage is specified 3287 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3288 In the above case the values for d_nnz,o_nnz are: 3289 .vb 3290 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3291 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3292 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3293 .ve 3294 Here the space allocated is sum of all the above values i.e 34, and 3295 hence pre-allocation is perfect. 3296 3297 Level: intermediate 3298 3299 .keywords: matrix, aij, compressed row, sparse, parallel 3300 3301 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3302 MPIAIJ, MatGetInfo() 3303 @*/ 3304 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3305 { 3306 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3307 3308 PetscFunctionBegin; 3309 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3310 if (f) { 3311 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3312 } 3313 PetscFunctionReturn(0); 3314 } 3315 3316 #undef __FUNCT__ 3317 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3318 /*@ 3319 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3320 CSR format the local rows. 3321 3322 Collective on MPI_Comm 3323 3324 Input Parameters: 3325 + comm - MPI communicator 3326 . m - number of local rows (Cannot be PETSC_DECIDE) 3327 . n - This value should be the same as the local size used in creating the 3328 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3329 calculated if N is given) For square matrices n is almost always m. 3330 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3331 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3332 . i - row indices 3333 . j - column indices 3334 - a - matrix values 3335 3336 Output Parameter: 3337 . mat - the matrix 3338 3339 Level: intermediate 3340 3341 Notes: 3342 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3343 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3344 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3345 3346 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3347 3348 The format which is used for the sparse matrix input, is equivalent to a 3349 row-major ordering.. i.e for the following matrix, the input data expected is 3350 as shown: 3351 3352 1 0 0 3353 2 0 3 P0 3354 ------- 3355 4 5 6 P1 3356 3357 Process0 [P0]: rows_owned=[0,1] 3358 i = {0,1,3} [size = nrow+1 = 2+1] 3359 j = {0,0,2} [size = nz = 6] 3360 v = {1,2,3} [size = nz = 6] 3361 3362 Process1 [P1]: rows_owned=[2] 3363 i = {0,3} [size = nrow+1 = 1+1] 3364 j = {0,1,2} [size = nz = 6] 3365 v = {4,5,6} [size = nz = 6] 3366 3367 .keywords: matrix, aij, compressed row, sparse, parallel 3368 3369 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3370 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3371 @*/ 3372 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3373 { 3374 PetscErrorCode ierr; 3375 3376 PetscFunctionBegin; 3377 if (i[0]) { 3378 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3379 } 3380 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3381 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3382 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3383 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3384 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3385 PetscFunctionReturn(0); 3386 } 3387 3388 #undef __FUNCT__ 3389 #define __FUNCT__ "MatCreateMPIAIJ" 3390 /*@C 3391 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3392 (the default parallel PETSc format). For good matrix assembly performance 3393 the user should preallocate the matrix storage by setting the parameters 3394 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3395 performance can be increased by more than a factor of 50. 3396 3397 Collective on MPI_Comm 3398 3399 Input Parameters: 3400 + comm - MPI communicator 3401 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3402 This value should be the same as the local size used in creating the 3403 y vector for the matrix-vector product y = Ax. 3404 . n - This value should be the same as the local size used in creating the 3405 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3406 calculated if N is given) For square matrices n is almost always m. 3407 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3408 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3409 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3410 (same value is used for all local rows) 3411 . d_nnz - array containing the number of nonzeros in the various rows of the 3412 DIAGONAL portion of the local submatrix (possibly different for each row) 3413 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3414 The size of this array is equal to the number of local rows, i.e 'm'. 3415 You must leave room for the diagonal entry even if it is zero. 3416 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3417 submatrix (same value is used for all local rows). 3418 - o_nnz - array containing the number of nonzeros in the various rows of the 3419 OFF-DIAGONAL portion of the local submatrix (possibly different for 3420 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3421 structure. The size of this array is equal to the number 3422 of local rows, i.e 'm'. 3423 3424 Output Parameter: 3425 . A - the matrix 3426 3427 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3428 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3429 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3430 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3431 3432 Notes: 3433 If the *_nnz parameter is given then the *_nz parameter is ignored 3434 3435 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3436 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3437 storage requirements for this matrix. 3438 3439 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3440 processor than it must be used on all processors that share the object for 3441 that argument. 3442 3443 The user MUST specify either the local or global matrix dimensions 3444 (possibly both). 3445 3446 The parallel matrix is partitioned across processors such that the 3447 first m0 rows belong to process 0, the next m1 rows belong to 3448 process 1, the next m2 rows belong to process 2 etc.. where 3449 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3450 values corresponding to [m x N] submatrix. 3451 3452 The columns are logically partitioned with the n0 columns belonging 3453 to 0th partition, the next n1 columns belonging to the next 3454 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3455 3456 The DIAGONAL portion of the local submatrix on any given processor 3457 is the submatrix corresponding to the rows and columns m,n 3458 corresponding to the given processor. i.e diagonal matrix on 3459 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3460 etc. The remaining portion of the local submatrix [m x (N-n)] 3461 constitute the OFF-DIAGONAL portion. The example below better 3462 illustrates this concept. 3463 3464 For a square global matrix we define each processor's diagonal portion 3465 to be its local rows and the corresponding columns (a square submatrix); 3466 each processor's off-diagonal portion encompasses the remainder of the 3467 local matrix (a rectangular submatrix). 3468 3469 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3470 3471 When calling this routine with a single process communicator, a matrix of 3472 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3473 type of communicator, use the construction mechanism: 3474 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3475 3476 By default, this format uses inodes (identical nodes) when possible. 3477 We search for consecutive rows with the same nonzero structure, thereby 3478 reusing matrix information to achieve increased efficiency. 3479 3480 Options Database Keys: 3481 + -mat_no_inode - Do not use inodes 3482 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3483 - -mat_aij_oneindex - Internally use indexing starting at 1 3484 rather than 0. Note that when calling MatSetValues(), 3485 the user still MUST index entries starting at 0! 3486 3487 3488 Example usage: 3489 3490 Consider the following 8x8 matrix with 34 non-zero values, that is 3491 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3492 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3493 as follows: 3494 3495 .vb 3496 1 2 0 | 0 3 0 | 0 4 3497 Proc0 0 5 6 | 7 0 0 | 8 0 3498 9 0 10 | 11 0 0 | 12 0 3499 ------------------------------------- 3500 13 0 14 | 15 16 17 | 0 0 3501 Proc1 0 18 0 | 19 20 21 | 0 0 3502 0 0 0 | 22 23 0 | 24 0 3503 ------------------------------------- 3504 Proc2 25 26 27 | 0 0 28 | 29 0 3505 30 0 0 | 31 32 33 | 0 34 3506 .ve 3507 3508 This can be represented as a collection of submatrices as: 3509 3510 .vb 3511 A B C 3512 D E F 3513 G H I 3514 .ve 3515 3516 Where the submatrices A,B,C are owned by proc0, D,E,F are 3517 owned by proc1, G,H,I are owned by proc2. 3518 3519 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3520 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3521 The 'M','N' parameters are 8,8, and have the same values on all procs. 3522 3523 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3524 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3525 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3526 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3527 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3528 matrix, ans [DF] as another SeqAIJ matrix. 3529 3530 When d_nz, o_nz parameters are specified, d_nz storage elements are 3531 allocated for every row of the local diagonal submatrix, and o_nz 3532 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3533 One way to choose d_nz and o_nz is to use the max nonzerors per local 3534 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3535 In this case, the values of d_nz,o_nz are: 3536 .vb 3537 proc0 : dnz = 2, o_nz = 2 3538 proc1 : dnz = 3, o_nz = 2 3539 proc2 : dnz = 1, o_nz = 4 3540 .ve 3541 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3542 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3543 for proc3. i.e we are using 12+15+10=37 storage locations to store 3544 34 values. 3545 3546 When d_nnz, o_nnz parameters are specified, the storage is specified 3547 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3548 In the above case the values for d_nnz,o_nnz are: 3549 .vb 3550 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3551 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3552 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3553 .ve 3554 Here the space allocated is sum of all the above values i.e 34, and 3555 hence pre-allocation is perfect. 3556 3557 Level: intermediate 3558 3559 .keywords: matrix, aij, compressed row, sparse, parallel 3560 3561 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3562 MPIAIJ, MatCreateMPIAIJWithArrays() 3563 @*/ 3564 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3565 { 3566 PetscErrorCode ierr; 3567 PetscMPIInt size; 3568 3569 PetscFunctionBegin; 3570 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3571 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3572 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3573 if (size > 1) { 3574 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3575 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3576 } else { 3577 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3578 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3579 } 3580 PetscFunctionReturn(0); 3581 } 3582 3583 #undef __FUNCT__ 3584 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3585 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3586 { 3587 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3588 3589 PetscFunctionBegin; 3590 *Ad = a->A; 3591 *Ao = a->B; 3592 *colmap = a->garray; 3593 PetscFunctionReturn(0); 3594 } 3595 3596 #undef __FUNCT__ 3597 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3598 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3599 { 3600 PetscErrorCode ierr; 3601 PetscInt i; 3602 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3603 3604 PetscFunctionBegin; 3605 if (coloring->ctype == IS_COLORING_GLOBAL) { 3606 ISColoringValue *allcolors,*colors; 3607 ISColoring ocoloring; 3608 3609 /* set coloring for diagonal portion */ 3610 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3611 3612 /* set coloring for off-diagonal portion */ 3613 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3614 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3615 for (i=0; i<a->B->cmap.n; i++) { 3616 colors[i] = allcolors[a->garray[i]]; 3617 } 3618 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3619 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3620 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3621 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3622 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3623 ISColoringValue *colors; 3624 PetscInt *larray; 3625 ISColoring ocoloring; 3626 3627 /* set coloring for diagonal portion */ 3628 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3629 for (i=0; i<a->A->cmap.n; i++) { 3630 larray[i] = i + A->cmap.rstart; 3631 } 3632 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3633 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3634 for (i=0; i<a->A->cmap.n; i++) { 3635 colors[i] = coloring->colors[larray[i]]; 3636 } 3637 ierr = PetscFree(larray);CHKERRQ(ierr); 3638 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3639 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3640 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3641 3642 /* set coloring for off-diagonal portion */ 3643 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3644 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3645 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3646 for (i=0; i<a->B->cmap.n; i++) { 3647 colors[i] = coloring->colors[larray[i]]; 3648 } 3649 ierr = PetscFree(larray);CHKERRQ(ierr); 3650 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3651 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3652 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3653 } else { 3654 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3655 } 3656 3657 PetscFunctionReturn(0); 3658 } 3659 3660 #if defined(PETSC_HAVE_ADIC) 3661 #undef __FUNCT__ 3662 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3663 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3664 { 3665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3666 PetscErrorCode ierr; 3667 3668 PetscFunctionBegin; 3669 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3670 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3671 PetscFunctionReturn(0); 3672 } 3673 #endif 3674 3675 #undef __FUNCT__ 3676 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3677 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3678 { 3679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3680 PetscErrorCode ierr; 3681 3682 PetscFunctionBegin; 3683 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3684 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3685 PetscFunctionReturn(0); 3686 } 3687 3688 #undef __FUNCT__ 3689 #define __FUNCT__ "MatMerge" 3690 /*@ 3691 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3692 matrices from each processor 3693 3694 Collective on MPI_Comm 3695 3696 Input Parameters: 3697 + comm - the communicators the parallel matrix will live on 3698 . inmat - the input sequential matrices 3699 . n - number of local columns (or PETSC_DECIDE) 3700 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3701 3702 Output Parameter: 3703 . outmat - the parallel matrix generated 3704 3705 Level: advanced 3706 3707 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3708 3709 @*/ 3710 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3711 { 3712 PetscErrorCode ierr; 3713 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3714 PetscInt *indx; 3715 PetscScalar *values; 3716 3717 PetscFunctionBegin; 3718 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3719 if (scall == MAT_INITIAL_MATRIX){ 3720 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3721 if (n == PETSC_DECIDE){ 3722 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3723 } 3724 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3725 rstart -= m; 3726 3727 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3728 for (i=0;i<m;i++) { 3729 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3730 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3731 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3732 } 3733 /* This routine will ONLY return MPIAIJ type matrix */ 3734 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3735 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3736 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3737 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3738 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3739 3740 } else if (scall == MAT_REUSE_MATRIX){ 3741 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3742 } else { 3743 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3744 } 3745 3746 for (i=0;i<m;i++) { 3747 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3748 Ii = i + rstart; 3749 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3750 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3751 } 3752 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3753 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3754 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3755 3756 PetscFunctionReturn(0); 3757 } 3758 3759 #undef __FUNCT__ 3760 #define __FUNCT__ "MatFileSplit" 3761 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3762 { 3763 PetscErrorCode ierr; 3764 PetscMPIInt rank; 3765 PetscInt m,N,i,rstart,nnz; 3766 size_t len; 3767 const PetscInt *indx; 3768 PetscViewer out; 3769 char *name; 3770 Mat B; 3771 const PetscScalar *values; 3772 3773 PetscFunctionBegin; 3774 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3775 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3776 /* Should this be the type of the diagonal block of A? */ 3777 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3778 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3779 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3780 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3781 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3782 for (i=0;i<m;i++) { 3783 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3784 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3785 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3786 } 3787 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3788 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3789 3790 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3791 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3792 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3793 sprintf(name,"%s.%d",outfile,rank); 3794 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3795 ierr = PetscFree(name); 3796 ierr = MatView(B,out);CHKERRQ(ierr); 3797 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3798 ierr = MatDestroy(B);CHKERRQ(ierr); 3799 PetscFunctionReturn(0); 3800 } 3801 3802 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3803 #undef __FUNCT__ 3804 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3805 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3806 { 3807 PetscErrorCode ierr; 3808 Mat_Merge_SeqsToMPI *merge; 3809 PetscContainer container; 3810 3811 PetscFunctionBegin; 3812 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3813 if (container) { 3814 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3815 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3816 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3817 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3818 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3819 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3820 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3821 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3822 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3823 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3824 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3825 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3826 3827 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3828 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3829 } 3830 ierr = PetscFree(merge);CHKERRQ(ierr); 3831 3832 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3833 PetscFunctionReturn(0); 3834 } 3835 3836 #include "src/mat/utils/freespace.h" 3837 #include "petscbt.h" 3838 3839 #undef __FUNCT__ 3840 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3841 /*@C 3842 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3843 matrices from each processor 3844 3845 Collective on MPI_Comm 3846 3847 Input Parameters: 3848 + comm - the communicators the parallel matrix will live on 3849 . seqmat - the input sequential matrices 3850 . m - number of local rows (or PETSC_DECIDE) 3851 . n - number of local columns (or PETSC_DECIDE) 3852 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3853 3854 Output Parameter: 3855 . mpimat - the parallel matrix generated 3856 3857 Level: advanced 3858 3859 Notes: 3860 The dimensions of the sequential matrix in each processor MUST be the same. 3861 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3862 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3863 @*/ 3864 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3865 { 3866 PetscErrorCode ierr; 3867 MPI_Comm comm=((PetscObject)mpimat)->comm; 3868 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3869 PetscMPIInt size,rank,taga,*len_s; 3870 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3871 PetscInt proc,m; 3872 PetscInt **buf_ri,**buf_rj; 3873 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3874 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3875 MPI_Request *s_waits,*r_waits; 3876 MPI_Status *status; 3877 MatScalar *aa=a->a; 3878 MatScalar **abuf_r,*ba_i; 3879 Mat_Merge_SeqsToMPI *merge; 3880 PetscContainer container; 3881 3882 PetscFunctionBegin; 3883 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3884 3885 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3886 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3887 3888 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3889 if (container) { 3890 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3891 } 3892 bi = merge->bi; 3893 bj = merge->bj; 3894 buf_ri = merge->buf_ri; 3895 buf_rj = merge->buf_rj; 3896 3897 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3898 owners = merge->rowmap.range; 3899 len_s = merge->len_s; 3900 3901 /* send and recv matrix values */ 3902 /*-----------------------------*/ 3903 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3904 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3905 3906 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3907 for (proc=0,k=0; proc<size; proc++){ 3908 if (!len_s[proc]) continue; 3909 i = owners[proc]; 3910 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3911 k++; 3912 } 3913 3914 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3915 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3916 ierr = PetscFree(status);CHKERRQ(ierr); 3917 3918 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3919 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3920 3921 /* insert mat values of mpimat */ 3922 /*----------------------------*/ 3923 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 3924 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3925 nextrow = buf_ri_k + merge->nrecv; 3926 nextai = nextrow + merge->nrecv; 3927 3928 for (k=0; k<merge->nrecv; k++){ 3929 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3930 nrows = *(buf_ri_k[k]); 3931 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3932 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3933 } 3934 3935 /* set values of ba */ 3936 m = merge->rowmap.n; 3937 for (i=0; i<m; i++) { 3938 arow = owners[rank] + i; 3939 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3940 bnzi = bi[i+1] - bi[i]; 3941 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 3942 3943 /* add local non-zero vals of this proc's seqmat into ba */ 3944 anzi = ai[arow+1] - ai[arow]; 3945 aj = a->j + ai[arow]; 3946 aa = a->a + ai[arow]; 3947 nextaj = 0; 3948 for (j=0; nextaj<anzi; j++){ 3949 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3950 ba_i[j] += aa[nextaj++]; 3951 } 3952 } 3953 3954 /* add received vals into ba */ 3955 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3956 /* i-th row */ 3957 if (i == *nextrow[k]) { 3958 anzi = *(nextai[k]+1) - *nextai[k]; 3959 aj = buf_rj[k] + *(nextai[k]); 3960 aa = abuf_r[k] + *(nextai[k]); 3961 nextaj = 0; 3962 for (j=0; nextaj<anzi; j++){ 3963 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3964 ba_i[j] += aa[nextaj++]; 3965 } 3966 } 3967 nextrow[k]++; nextai[k]++; 3968 } 3969 } 3970 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3971 } 3972 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3973 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3974 3975 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3976 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3977 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3978 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3979 PetscFunctionReturn(0); 3980 } 3981 3982 #undef __FUNCT__ 3983 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3984 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3985 { 3986 PetscErrorCode ierr; 3987 Mat B_mpi; 3988 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3989 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3990 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3991 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3992 PetscInt len,proc,*dnz,*onz; 3993 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3994 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3995 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3996 MPI_Status *status; 3997 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3998 PetscBT lnkbt; 3999 Mat_Merge_SeqsToMPI *merge; 4000 PetscContainer container; 4001 4002 PetscFunctionBegin; 4003 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4004 4005 /* make sure it is a PETSc comm */ 4006 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4007 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4008 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4009 4010 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4011 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4012 4013 /* determine row ownership */ 4014 /*---------------------------------------------------------*/ 4015 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4016 merge->rowmap.n = m; 4017 merge->rowmap.N = M; 4018 merge->rowmap.bs = 1; 4019 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4020 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4021 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4022 4023 m = merge->rowmap.n; 4024 M = merge->rowmap.N; 4025 owners = merge->rowmap.range; 4026 4027 /* determine the number of messages to send, their lengths */ 4028 /*---------------------------------------------------------*/ 4029 len_s = merge->len_s; 4030 4031 len = 0; /* length of buf_si[] */ 4032 merge->nsend = 0; 4033 for (proc=0; proc<size; proc++){ 4034 len_si[proc] = 0; 4035 if (proc == rank){ 4036 len_s[proc] = 0; 4037 } else { 4038 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4039 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4040 } 4041 if (len_s[proc]) { 4042 merge->nsend++; 4043 nrows = 0; 4044 for (i=owners[proc]; i<owners[proc+1]; i++){ 4045 if (ai[i+1] > ai[i]) nrows++; 4046 } 4047 len_si[proc] = 2*(nrows+1); 4048 len += len_si[proc]; 4049 } 4050 } 4051 4052 /* determine the number and length of messages to receive for ij-structure */ 4053 /*-------------------------------------------------------------------------*/ 4054 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4055 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4056 4057 /* post the Irecv of j-structure */ 4058 /*-------------------------------*/ 4059 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4060 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4061 4062 /* post the Isend of j-structure */ 4063 /*--------------------------------*/ 4064 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4065 sj_waits = si_waits + merge->nsend; 4066 4067 for (proc=0, k=0; proc<size; proc++){ 4068 if (!len_s[proc]) continue; 4069 i = owners[proc]; 4070 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4071 k++; 4072 } 4073 4074 /* receives and sends of j-structure are complete */ 4075 /*------------------------------------------------*/ 4076 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4077 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4078 4079 /* send and recv i-structure */ 4080 /*---------------------------*/ 4081 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4082 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4083 4084 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4085 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4086 for (proc=0,k=0; proc<size; proc++){ 4087 if (!len_s[proc]) continue; 4088 /* form outgoing message for i-structure: 4089 buf_si[0]: nrows to be sent 4090 [1:nrows]: row index (global) 4091 [nrows+1:2*nrows+1]: i-structure index 4092 */ 4093 /*-------------------------------------------*/ 4094 nrows = len_si[proc]/2 - 1; 4095 buf_si_i = buf_si + nrows+1; 4096 buf_si[0] = nrows; 4097 buf_si_i[0] = 0; 4098 nrows = 0; 4099 for (i=owners[proc]; i<owners[proc+1]; i++){ 4100 anzi = ai[i+1] - ai[i]; 4101 if (anzi) { 4102 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4103 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4104 nrows++; 4105 } 4106 } 4107 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4108 k++; 4109 buf_si += len_si[proc]; 4110 } 4111 4112 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4113 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4114 4115 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4116 for (i=0; i<merge->nrecv; i++){ 4117 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4118 } 4119 4120 ierr = PetscFree(len_si);CHKERRQ(ierr); 4121 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4122 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4123 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4124 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4125 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4126 ierr = PetscFree(status);CHKERRQ(ierr); 4127 4128 /* compute a local seq matrix in each processor */ 4129 /*----------------------------------------------*/ 4130 /* allocate bi array and free space for accumulating nonzero column info */ 4131 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4132 bi[0] = 0; 4133 4134 /* create and initialize a linked list */ 4135 nlnk = N+1; 4136 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4137 4138 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4139 len = 0; 4140 len = ai[owners[rank+1]] - ai[owners[rank]]; 4141 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4142 current_space = free_space; 4143 4144 /* determine symbolic info for each local row */ 4145 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4146 nextrow = buf_ri_k + merge->nrecv; 4147 nextai = nextrow + merge->nrecv; 4148 for (k=0; k<merge->nrecv; k++){ 4149 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4150 nrows = *buf_ri_k[k]; 4151 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4152 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4153 } 4154 4155 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4156 len = 0; 4157 for (i=0;i<m;i++) { 4158 bnzi = 0; 4159 /* add local non-zero cols of this proc's seqmat into lnk */ 4160 arow = owners[rank] + i; 4161 anzi = ai[arow+1] - ai[arow]; 4162 aj = a->j + ai[arow]; 4163 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4164 bnzi += nlnk; 4165 /* add received col data into lnk */ 4166 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4167 if (i == *nextrow[k]) { /* i-th row */ 4168 anzi = *(nextai[k]+1) - *nextai[k]; 4169 aj = buf_rj[k] + *nextai[k]; 4170 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4171 bnzi += nlnk; 4172 nextrow[k]++; nextai[k]++; 4173 } 4174 } 4175 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4176 4177 /* if free space is not available, make more free space */ 4178 if (current_space->local_remaining<bnzi) { 4179 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4180 nspacedouble++; 4181 } 4182 /* copy data into free space, then initialize lnk */ 4183 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4184 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4185 4186 current_space->array += bnzi; 4187 current_space->local_used += bnzi; 4188 current_space->local_remaining -= bnzi; 4189 4190 bi[i+1] = bi[i] + bnzi; 4191 } 4192 4193 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4194 4195 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4196 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4197 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4198 4199 /* create symbolic parallel matrix B_mpi */ 4200 /*---------------------------------------*/ 4201 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4202 if (n==PETSC_DECIDE) { 4203 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4204 } else { 4205 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4206 } 4207 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4208 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4209 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4210 4211 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4212 B_mpi->assembled = PETSC_FALSE; 4213 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4214 merge->bi = bi; 4215 merge->bj = bj; 4216 merge->buf_ri = buf_ri; 4217 merge->buf_rj = buf_rj; 4218 merge->coi = PETSC_NULL; 4219 merge->coj = PETSC_NULL; 4220 merge->owners_co = PETSC_NULL; 4221 4222 /* attach the supporting struct to B_mpi for reuse */ 4223 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4224 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4225 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4226 *mpimat = B_mpi; 4227 4228 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4229 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4230 PetscFunctionReturn(0); 4231 } 4232 4233 #undef __FUNCT__ 4234 #define __FUNCT__ "MatMerge_SeqsToMPI" 4235 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4236 { 4237 PetscErrorCode ierr; 4238 4239 PetscFunctionBegin; 4240 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4241 if (scall == MAT_INITIAL_MATRIX){ 4242 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4243 } 4244 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4245 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4246 PetscFunctionReturn(0); 4247 } 4248 4249 #undef __FUNCT__ 4250 #define __FUNCT__ "MatGetLocalMat" 4251 /*@ 4252 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4253 4254 Not Collective 4255 4256 Input Parameters: 4257 + A - the matrix 4258 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4259 4260 Output Parameter: 4261 . A_loc - the local sequential matrix generated 4262 4263 Level: developer 4264 4265 @*/ 4266 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4267 { 4268 PetscErrorCode ierr; 4269 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4270 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4271 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4272 MatScalar *aa=a->a,*ba=b->a,*cam; 4273 PetscScalar *ca; 4274 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 4275 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4276 4277 PetscFunctionBegin; 4278 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4279 if (scall == MAT_INITIAL_MATRIX){ 4280 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4281 ci[0] = 0; 4282 for (i=0; i<am; i++){ 4283 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4284 } 4285 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4286 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4287 k = 0; 4288 for (i=0; i<am; i++) { 4289 ncols_o = bi[i+1] - bi[i]; 4290 ncols_d = ai[i+1] - ai[i]; 4291 /* off-diagonal portion of A */ 4292 for (jo=0; jo<ncols_o; jo++) { 4293 col = cmap[*bj]; 4294 if (col >= cstart) break; 4295 cj[k] = col; bj++; 4296 ca[k++] = *ba++; 4297 } 4298 /* diagonal portion of A */ 4299 for (j=0; j<ncols_d; j++) { 4300 cj[k] = cstart + *aj++; 4301 ca[k++] = *aa++; 4302 } 4303 /* off-diagonal portion of A */ 4304 for (j=jo; j<ncols_o; j++) { 4305 cj[k] = cmap[*bj++]; 4306 ca[k++] = *ba++; 4307 } 4308 } 4309 /* put together the new matrix */ 4310 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4311 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4312 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4313 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4314 mat->free_a = PETSC_TRUE; 4315 mat->free_ij = PETSC_TRUE; 4316 mat->nonew = 0; 4317 } else if (scall == MAT_REUSE_MATRIX){ 4318 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4319 ci = mat->i; cj = mat->j; cam = mat->a; 4320 for (i=0; i<am; i++) { 4321 /* off-diagonal portion of A */ 4322 ncols_o = bi[i+1] - bi[i]; 4323 for (jo=0; jo<ncols_o; jo++) { 4324 col = cmap[*bj]; 4325 if (col >= cstart) break; 4326 *cam++ = *ba++; bj++; 4327 } 4328 /* diagonal portion of A */ 4329 ncols_d = ai[i+1] - ai[i]; 4330 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4331 /* off-diagonal portion of A */ 4332 for (j=jo; j<ncols_o; j++) { 4333 *cam++ = *ba++; bj++; 4334 } 4335 } 4336 } else { 4337 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4338 } 4339 4340 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4341 PetscFunctionReturn(0); 4342 } 4343 4344 #undef __FUNCT__ 4345 #define __FUNCT__ "MatGetLocalMatCondensed" 4346 /*@C 4347 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4348 4349 Not Collective 4350 4351 Input Parameters: 4352 + A - the matrix 4353 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4354 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4355 4356 Output Parameter: 4357 . A_loc - the local sequential matrix generated 4358 4359 Level: developer 4360 4361 @*/ 4362 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4363 { 4364 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4365 PetscErrorCode ierr; 4366 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4367 IS isrowa,iscola; 4368 Mat *aloc; 4369 4370 PetscFunctionBegin; 4371 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4372 if (!row){ 4373 start = A->rmap.rstart; end = A->rmap.rend; 4374 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4375 } else { 4376 isrowa = *row; 4377 } 4378 if (!col){ 4379 start = A->cmap.rstart; 4380 cmap = a->garray; 4381 nzA = a->A->cmap.n; 4382 nzB = a->B->cmap.n; 4383 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4384 ncols = 0; 4385 for (i=0; i<nzB; i++) { 4386 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4387 else break; 4388 } 4389 imark = i; 4390 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4391 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4392 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4393 ierr = PetscFree(idx);CHKERRQ(ierr); 4394 } else { 4395 iscola = *col; 4396 } 4397 if (scall != MAT_INITIAL_MATRIX){ 4398 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4399 aloc[0] = *A_loc; 4400 } 4401 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4402 *A_loc = aloc[0]; 4403 ierr = PetscFree(aloc);CHKERRQ(ierr); 4404 if (!row){ 4405 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4406 } 4407 if (!col){ 4408 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4409 } 4410 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4411 PetscFunctionReturn(0); 4412 } 4413 4414 #undef __FUNCT__ 4415 #define __FUNCT__ "MatGetBrowsOfAcols" 4416 /*@C 4417 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4418 4419 Collective on Mat 4420 4421 Input Parameters: 4422 + A,B - the matrices in mpiaij format 4423 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4424 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4425 4426 Output Parameter: 4427 + rowb, colb - index sets of rows and columns of B to extract 4428 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4429 - B_seq - the sequential matrix generated 4430 4431 Level: developer 4432 4433 @*/ 4434 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4435 { 4436 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4437 PetscErrorCode ierr; 4438 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4439 IS isrowb,iscolb; 4440 Mat *bseq; 4441 4442 PetscFunctionBegin; 4443 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4444 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4445 } 4446 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4447 4448 if (scall == MAT_INITIAL_MATRIX){ 4449 start = A->cmap.rstart; 4450 cmap = a->garray; 4451 nzA = a->A->cmap.n; 4452 nzB = a->B->cmap.n; 4453 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4454 ncols = 0; 4455 for (i=0; i<nzB; i++) { /* row < local row index */ 4456 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4457 else break; 4458 } 4459 imark = i; 4460 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4461 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4462 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4463 ierr = PetscFree(idx);CHKERRQ(ierr); 4464 *brstart = imark; 4465 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4466 } else { 4467 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4468 isrowb = *rowb; iscolb = *colb; 4469 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4470 bseq[0] = *B_seq; 4471 } 4472 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4473 *B_seq = bseq[0]; 4474 ierr = PetscFree(bseq);CHKERRQ(ierr); 4475 if (!rowb){ 4476 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4477 } else { 4478 *rowb = isrowb; 4479 } 4480 if (!colb){ 4481 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4482 } else { 4483 *colb = iscolb; 4484 } 4485 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4486 PetscFunctionReturn(0); 4487 } 4488 4489 #undef __FUNCT__ 4490 #define __FUNCT__ "MatGetBrowsOfAoCols" 4491 /*@C 4492 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4493 of the OFF-DIAGONAL portion of local A 4494 4495 Collective on Mat 4496 4497 Input Parameters: 4498 + A,B - the matrices in mpiaij format 4499 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4500 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4501 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4502 4503 Output Parameter: 4504 + B_oth - the sequential matrix generated 4505 4506 Level: developer 4507 4508 @*/ 4509 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4510 { 4511 VecScatter_MPI_General *gen_to,*gen_from; 4512 PetscErrorCode ierr; 4513 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4514 Mat_SeqAIJ *b_oth; 4515 VecScatter ctx=a->Mvctx; 4516 MPI_Comm comm=((PetscObject)ctx)->comm; 4517 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4518 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4519 PetscScalar *rvalues,*svalues; 4520 MatScalar *b_otha,*bufa,*bufA; 4521 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4522 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4523 MPI_Status *sstatus,rstatus; 4524 PetscMPIInt jj; 4525 PetscInt *cols,sbs,rbs; 4526 PetscScalar *vals; 4527 4528 PetscFunctionBegin; 4529 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4530 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4531 } 4532 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4533 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4534 4535 gen_to = (VecScatter_MPI_General*)ctx->todata; 4536 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4537 rvalues = gen_from->values; /* holds the length of receiving row */ 4538 svalues = gen_to->values; /* holds the length of sending row */ 4539 nrecvs = gen_from->n; 4540 nsends = gen_to->n; 4541 4542 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4543 srow = gen_to->indices; /* local row index to be sent */ 4544 sstarts = gen_to->starts; 4545 sprocs = gen_to->procs; 4546 sstatus = gen_to->sstatus; 4547 sbs = gen_to->bs; 4548 rstarts = gen_from->starts; 4549 rprocs = gen_from->procs; 4550 rbs = gen_from->bs; 4551 4552 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4553 if (scall == MAT_INITIAL_MATRIX){ 4554 /* i-array */ 4555 /*---------*/ 4556 /* post receives */ 4557 for (i=0; i<nrecvs; i++){ 4558 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4559 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4560 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4561 } 4562 4563 /* pack the outgoing message */ 4564 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4565 rstartsj = sstartsj + nsends +1; 4566 sstartsj[0] = 0; rstartsj[0] = 0; 4567 len = 0; /* total length of j or a array to be sent */ 4568 k = 0; 4569 for (i=0; i<nsends; i++){ 4570 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4571 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4572 for (j=0; j<nrows; j++) { 4573 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4574 for (l=0; l<sbs; l++){ 4575 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4576 rowlen[j*sbs+l] = ncols; 4577 len += ncols; 4578 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4579 } 4580 k++; 4581 } 4582 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4583 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4584 } 4585 /* recvs and sends of i-array are completed */ 4586 i = nrecvs; 4587 while (i--) { 4588 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4589 } 4590 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4591 4592 /* allocate buffers for sending j and a arrays */ 4593 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4594 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4595 4596 /* create i-array of B_oth */ 4597 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4598 b_othi[0] = 0; 4599 len = 0; /* total length of j or a array to be received */ 4600 k = 0; 4601 for (i=0; i<nrecvs; i++){ 4602 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4603 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4604 for (j=0; j<nrows; j++) { 4605 b_othi[k+1] = b_othi[k] + rowlen[j]; 4606 len += rowlen[j]; k++; 4607 } 4608 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4609 } 4610 4611 /* allocate space for j and a arrrays of B_oth */ 4612 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4613 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4614 4615 /* j-array */ 4616 /*---------*/ 4617 /* post receives of j-array */ 4618 for (i=0; i<nrecvs; i++){ 4619 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4620 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4621 } 4622 4623 /* pack the outgoing message j-array */ 4624 k = 0; 4625 for (i=0; i<nsends; i++){ 4626 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4627 bufJ = bufj+sstartsj[i]; 4628 for (j=0; j<nrows; j++) { 4629 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4630 for (ll=0; ll<sbs; ll++){ 4631 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4632 for (l=0; l<ncols; l++){ 4633 *bufJ++ = cols[l]; 4634 } 4635 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4636 } 4637 } 4638 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4639 } 4640 4641 /* recvs and sends of j-array are completed */ 4642 i = nrecvs; 4643 while (i--) { 4644 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4645 } 4646 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4647 } else if (scall == MAT_REUSE_MATRIX){ 4648 sstartsj = *startsj; 4649 rstartsj = sstartsj + nsends +1; 4650 bufa = *bufa_ptr; 4651 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4652 b_otha = b_oth->a; 4653 } else { 4654 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4655 } 4656 4657 /* a-array */ 4658 /*---------*/ 4659 /* post receives of a-array */ 4660 for (i=0; i<nrecvs; i++){ 4661 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4662 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4663 } 4664 4665 /* pack the outgoing message a-array */ 4666 k = 0; 4667 for (i=0; i<nsends; i++){ 4668 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4669 bufA = bufa+sstartsj[i]; 4670 for (j=0; j<nrows; j++) { 4671 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4672 for (ll=0; ll<sbs; ll++){ 4673 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4674 for (l=0; l<ncols; l++){ 4675 *bufA++ = vals[l]; 4676 } 4677 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4678 } 4679 } 4680 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4681 } 4682 /* recvs and sends of a-array are completed */ 4683 i = nrecvs; 4684 while (i--) { 4685 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4686 } 4687 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4688 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4689 4690 if (scall == MAT_INITIAL_MATRIX){ 4691 /* put together the new matrix */ 4692 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4693 4694 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4695 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4696 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4697 b_oth->free_a = PETSC_TRUE; 4698 b_oth->free_ij = PETSC_TRUE; 4699 b_oth->nonew = 0; 4700 4701 ierr = PetscFree(bufj);CHKERRQ(ierr); 4702 if (!startsj || !bufa_ptr){ 4703 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4704 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4705 } else { 4706 *startsj = sstartsj; 4707 *bufa_ptr = bufa; 4708 } 4709 } 4710 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4711 PetscFunctionReturn(0); 4712 } 4713 4714 #undef __FUNCT__ 4715 #define __FUNCT__ "MatGetCommunicationStructs" 4716 /*@C 4717 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4718 4719 Not Collective 4720 4721 Input Parameters: 4722 . A - The matrix in mpiaij format 4723 4724 Output Parameter: 4725 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4726 . colmap - A map from global column index to local index into lvec 4727 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4728 4729 Level: developer 4730 4731 @*/ 4732 #if defined (PETSC_USE_CTABLE) 4733 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4734 #else 4735 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4736 #endif 4737 { 4738 Mat_MPIAIJ *a; 4739 4740 PetscFunctionBegin; 4741 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4742 PetscValidPointer(lvec, 2) 4743 PetscValidPointer(colmap, 3) 4744 PetscValidPointer(multScatter, 4) 4745 a = (Mat_MPIAIJ *) A->data; 4746 if (lvec) *lvec = a->lvec; 4747 if (colmap) *colmap = a->colmap; 4748 if (multScatter) *multScatter = a->Mvctx; 4749 PetscFunctionReturn(0); 4750 } 4751 4752 EXTERN_C_BEGIN 4753 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4754 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4755 EXTERN_C_END 4756 4757 #include "src/mat/impls/dense/mpi/mpidense.h" 4758 4759 #undef __FUNCT__ 4760 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4761 /* 4762 Computes (B'*A')' since computing B*A directly is untenable 4763 4764 n p p 4765 ( ) ( ) ( ) 4766 m ( A ) * n ( B ) = m ( C ) 4767 ( ) ( ) ( ) 4768 4769 */ 4770 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4771 { 4772 PetscErrorCode ierr; 4773 Mat At,Bt,Ct; 4774 4775 PetscFunctionBegin; 4776 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4777 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4778 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4779 ierr = MatDestroy(At);CHKERRQ(ierr); 4780 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4781 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4782 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4783 PetscFunctionReturn(0); 4784 } 4785 4786 #undef __FUNCT__ 4787 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4788 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4789 { 4790 PetscErrorCode ierr; 4791 PetscInt m=A->rmap.n,n=B->cmap.n; 4792 Mat Cmat; 4793 4794 PetscFunctionBegin; 4795 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 4796 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4797 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4798 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4799 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4800 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4801 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4802 *C = Cmat; 4803 PetscFunctionReturn(0); 4804 } 4805 4806 /* ----------------------------------------------------------------*/ 4807 #undef __FUNCT__ 4808 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4809 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4810 { 4811 PetscErrorCode ierr; 4812 4813 PetscFunctionBegin; 4814 if (scall == MAT_INITIAL_MATRIX){ 4815 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4816 } 4817 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4818 PetscFunctionReturn(0); 4819 } 4820 4821 EXTERN_C_BEGIN 4822 #if defined(PETSC_HAVE_MUMPS) 4823 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4824 #endif 4825 #if defined(PETSC_HAVE_SUPERLU_DIST) 4826 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4827 #endif 4828 #if defined(PETSC_HAVE_SPOOLES) 4829 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 4830 #endif 4831 EXTERN_C_END 4832 4833 /*MC 4834 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4835 4836 Options Database Keys: 4837 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4838 4839 Level: beginner 4840 4841 .seealso: MatCreateMPIAIJ() 4842 M*/ 4843 4844 EXTERN_C_BEGIN 4845 #undef __FUNCT__ 4846 #define __FUNCT__ "MatCreate_MPIAIJ" 4847 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4848 { 4849 Mat_MPIAIJ *b; 4850 PetscErrorCode ierr; 4851 PetscMPIInt size; 4852 4853 PetscFunctionBegin; 4854 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4855 4856 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4857 B->data = (void*)b; 4858 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4859 B->rmap.bs = 1; 4860 B->assembled = PETSC_FALSE; 4861 B->mapping = 0; 4862 4863 B->insertmode = NOT_SET_VALUES; 4864 b->size = size; 4865 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4866 4867 /* build cache for off array entries formed */ 4868 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4869 b->donotstash = PETSC_FALSE; 4870 b->colmap = 0; 4871 b->garray = 0; 4872 b->roworiented = PETSC_TRUE; 4873 4874 /* stuff used for matrix vector multiply */ 4875 b->lvec = PETSC_NULL; 4876 b->Mvctx = PETSC_NULL; 4877 4878 /* stuff for MatGetRow() */ 4879 b->rowindices = 0; 4880 b->rowvalues = 0; 4881 b->getrowactive = PETSC_FALSE; 4882 4883 #if defined(PETSC_HAVE_SPOOLES) 4884 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 4885 "MatGetFactor_mpiaij_spooles", 4886 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 4887 #endif 4888 #if defined(PETSC_HAVE_MUMPS) 4889 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 4890 "MatGetFactor_mpiaij_mumps", 4891 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 4892 #endif 4893 #if defined(PETSC_HAVE_SUPERLU_DIST) 4894 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 4895 "MatGetFactor_mpiaij_superlu_dist", 4896 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 4897 #endif 4898 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4899 "MatStoreValues_MPIAIJ", 4900 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4901 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4902 "MatRetrieveValues_MPIAIJ", 4903 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4904 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4905 "MatGetDiagonalBlock_MPIAIJ", 4906 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4907 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4908 "MatIsTranspose_MPIAIJ", 4909 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4910 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4911 "MatMPIAIJSetPreallocation_MPIAIJ", 4912 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4913 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4914 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4915 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4916 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4917 "MatDiagonalScaleLocal_MPIAIJ", 4918 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4919 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4920 "MatConvert_MPIAIJ_MPICSRPERM", 4921 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4922 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4923 "MatConvert_MPIAIJ_MPICRL", 4924 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4925 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 4926 "MatMatMult_MPIDense_MPIAIJ", 4927 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 4928 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 4929 "MatMatMultSymbolic_MPIDense_MPIAIJ", 4930 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 4931 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 4932 "MatMatMultNumeric_MPIDense_MPIAIJ", 4933 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 4934 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4935 PetscFunctionReturn(0); 4936 } 4937 EXTERN_C_END 4938 4939 #undef __FUNCT__ 4940 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4941 /*@ 4942 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4943 and "off-diagonal" part of the matrix in CSR format. 4944 4945 Collective on MPI_Comm 4946 4947 Input Parameters: 4948 + comm - MPI communicator 4949 . m - number of local rows (Cannot be PETSC_DECIDE) 4950 . n - This value should be the same as the local size used in creating the 4951 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4952 calculated if N is given) For square matrices n is almost always m. 4953 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4954 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4955 . i - row indices for "diagonal" portion of matrix 4956 . j - column indices 4957 . a - matrix values 4958 . oi - row indices for "off-diagonal" portion of matrix 4959 . oj - column indices 4960 - oa - matrix values 4961 4962 Output Parameter: 4963 . mat - the matrix 4964 4965 Level: advanced 4966 4967 Notes: 4968 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4969 4970 The i and j indices are 0 based 4971 4972 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4973 4974 4975 .keywords: matrix, aij, compressed row, sparse, parallel 4976 4977 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4978 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4979 @*/ 4980 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4981 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4982 { 4983 PetscErrorCode ierr; 4984 Mat_MPIAIJ *maij; 4985 4986 PetscFunctionBegin; 4987 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4988 if (i[0]) { 4989 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4990 } 4991 if (oi[0]) { 4992 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4993 } 4994 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4995 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4996 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4997 maij = (Mat_MPIAIJ*) (*mat)->data; 4998 maij->donotstash = PETSC_TRUE; 4999 (*mat)->preallocated = PETSC_TRUE; 5000 5001 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 5002 ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr); 5003 ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr); 5004 5005 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5006 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5007 5008 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5009 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5010 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5011 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5012 5013 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5014 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5015 PetscFunctionReturn(0); 5016 } 5017 5018 /* 5019 Special version for direct calls from Fortran 5020 */ 5021 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5022 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5023 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5024 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5025 #endif 5026 5027 /* Change these macros so can be used in void function */ 5028 #undef CHKERRQ 5029 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5030 #undef SETERRQ2 5031 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5032 #undef SETERRQ 5033 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5034 5035 EXTERN_C_BEGIN 5036 #undef __FUNCT__ 5037 #define __FUNCT__ "matsetvaluesmpiaij_" 5038 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5039 { 5040 Mat mat = *mmat; 5041 PetscInt m = *mm, n = *mn; 5042 InsertMode addv = *maddv; 5043 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5044 PetscScalar value; 5045 PetscErrorCode ierr; 5046 5047 MatPreallocated(mat); 5048 if (mat->insertmode == NOT_SET_VALUES) { 5049 mat->insertmode = addv; 5050 } 5051 #if defined(PETSC_USE_DEBUG) 5052 else if (mat->insertmode != addv) { 5053 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5054 } 5055 #endif 5056 { 5057 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 5058 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 5059 PetscTruth roworiented = aij->roworiented; 5060 5061 /* Some Variables required in the macro */ 5062 Mat A = aij->A; 5063 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5064 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5065 MatScalar *aa = a->a; 5066 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5067 Mat B = aij->B; 5068 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5069 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 5070 MatScalar *ba = b->a; 5071 5072 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5073 PetscInt nonew = a->nonew; 5074 MatScalar *ap1,*ap2; 5075 5076 PetscFunctionBegin; 5077 for (i=0; i<m; i++) { 5078 if (im[i] < 0) continue; 5079 #if defined(PETSC_USE_DEBUG) 5080 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 5081 #endif 5082 if (im[i] >= rstart && im[i] < rend) { 5083 row = im[i] - rstart; 5084 lastcol1 = -1; 5085 rp1 = aj + ai[row]; 5086 ap1 = aa + ai[row]; 5087 rmax1 = aimax[row]; 5088 nrow1 = ailen[row]; 5089 low1 = 0; 5090 high1 = nrow1; 5091 lastcol2 = -1; 5092 rp2 = bj + bi[row]; 5093 ap2 = ba + bi[row]; 5094 rmax2 = bimax[row]; 5095 nrow2 = bilen[row]; 5096 low2 = 0; 5097 high2 = nrow2; 5098 5099 for (j=0; j<n; j++) { 5100 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5101 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5102 if (in[j] >= cstart && in[j] < cend){ 5103 col = in[j] - cstart; 5104 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5105 } else if (in[j] < 0) continue; 5106 #if defined(PETSC_USE_DEBUG) 5107 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 5108 #endif 5109 else { 5110 if (mat->was_assembled) { 5111 if (!aij->colmap) { 5112 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5113 } 5114 #if defined (PETSC_USE_CTABLE) 5115 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5116 col--; 5117 #else 5118 col = aij->colmap[in[j]] - 1; 5119 #endif 5120 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5121 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5122 col = in[j]; 5123 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5124 B = aij->B; 5125 b = (Mat_SeqAIJ*)B->data; 5126 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5127 rp2 = bj + bi[row]; 5128 ap2 = ba + bi[row]; 5129 rmax2 = bimax[row]; 5130 nrow2 = bilen[row]; 5131 low2 = 0; 5132 high2 = nrow2; 5133 bm = aij->B->rmap.n; 5134 ba = b->a; 5135 } 5136 } else col = in[j]; 5137 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5138 } 5139 } 5140 } else { 5141 if (!aij->donotstash) { 5142 if (roworiented) { 5143 if (ignorezeroentries && v[i*n] == 0.0) continue; 5144 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5145 } else { 5146 if (ignorezeroentries && v[i] == 0.0) continue; 5147 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5148 } 5149 } 5150 } 5151 }} 5152 PetscFunctionReturnVoid(); 5153 } 5154 EXTERN_C_END 5155 5156