1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap.n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap.n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap.n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap.n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 311 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap.n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 415 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap.range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 642 source = lens + nrecvs; 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree(lens);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 Contributed by: Matthew Knepley 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap.n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultAdd_MPIAIJ" 733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 741 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 749 { 750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 751 PetscErrorCode ierr; 752 PetscTruth merged; 753 754 PetscFunctionBegin; 755 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 756 /* do nondiagonal part */ 757 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 758 if (!merged) { 759 /* send it on its way */ 760 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 761 /* do local part */ 762 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 763 /* receive remote parts: note this assumes the values are not actually */ 764 /* added in yy until the next line, */ 765 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 } else { 767 /* do local part */ 768 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 769 /* send it on its way */ 770 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 771 /* values actually were received in the Begin() but we need to call this nop */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 EXTERN_C_BEGIN 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 781 { 782 MPI_Comm comm; 783 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 784 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 785 IS Me,Notme; 786 PetscErrorCode ierr; 787 PetscInt M,N,first,last,*notme,i; 788 PetscMPIInt size; 789 790 PetscFunctionBegin; 791 792 /* Easy test: symmetric diagonal block */ 793 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 794 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 795 if (!*f) PetscFunctionReturn(0); 796 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 798 if (size == 1) PetscFunctionReturn(0); 799 800 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 801 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 802 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 803 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 804 for (i=0; i<first; i++) notme[i] = i; 805 for (i=last; i<M; i++) notme[i-last+first] = i; 806 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 807 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 808 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 809 Aoff = Aoffs[0]; 810 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 811 Boff = Boffs[0]; 812 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 813 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 814 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 815 ierr = ISDestroy(Me);CHKERRQ(ierr); 816 ierr = ISDestroy(Notme);CHKERRQ(ierr); 817 818 PetscFunctionReturn(0); 819 } 820 EXTERN_C_END 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 /* do nondiagonal part */ 831 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 832 /* send it on its way */ 833 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 834 /* do local part */ 835 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 836 /* receive remote parts */ 837 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 This only works correctly for square matrices where the subblock A->A is the 843 diagonal block 844 */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 848 { 849 PetscErrorCode ierr; 850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 851 852 PetscFunctionBegin; 853 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 854 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 855 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 856 } 857 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatScale_MPIAIJ" 863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 864 { 865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 870 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDestroy_MPIAIJ" 876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 877 { 878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 #if defined(PETSC_USE_LOG) 883 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 884 #endif 885 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 886 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 887 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 888 #if defined (PETSC_USE_CTABLE) 889 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 890 #else 891 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 892 #endif 893 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 894 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 895 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 896 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 897 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 898 ierr = PetscFree(aij);CHKERRQ(ierr); 899 900 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPIAIJ_Binary" 913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 914 { 915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 916 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 917 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 920 int fd; 921 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 922 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 923 PetscScalar *column_values; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 927 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 928 nz = A->nz + B->nz; 929 if (!rank) { 930 header[0] = MAT_FILE_COOKIE; 931 header[1] = mat->rmap.N; 932 header[2] = mat->cmap.N; 933 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 936 /* get largest number of rows any processor has */ 937 rlen = mat->rmap.n; 938 range = mat->rmap.range; 939 for (i=1; i<size; i++) { 940 rlen = PetscMax(rlen,range[i+1] - range[i]); 941 } 942 } else { 943 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 944 rlen = mat->rmap.n; 945 } 946 947 /* load up the local row counts */ 948 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 949 for (i=0; i<mat->rmap.n; i++) { 950 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 951 } 952 953 /* store the row lengths to the file */ 954 if (!rank) { 955 MPI_Status status; 956 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 957 for (i=1; i<size; i++) { 958 rlen = range[i+1] - range[i]; 959 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 960 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 961 } 962 } else { 963 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 964 } 965 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 966 967 /* load up the local column indices */ 968 nzmax = nz; /* )th processor needs space a largest processor needs */ 969 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 971 cnt = 0; 972 for (i=0; i<mat->rmap.n; i++) { 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ( (col = garray[B->j[j]]) > cstart) break; 975 column_indices[cnt++] = col; 976 } 977 for (k=A->i[i]; k<A->i[i+1]; k++) { 978 column_indices[cnt++] = A->j[k] + cstart; 979 } 980 for (; j<B->i[i+1]; j++) { 981 column_indices[cnt++] = garray[B->j[j]]; 982 } 983 } 984 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 985 986 /* store the column indices to the file */ 987 if (!rank) { 988 MPI_Status status; 989 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 990 for (i=1; i<size; i++) { 991 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 992 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 993 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 994 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 995 } 996 } else { 997 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 998 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1001 1002 /* load up the local column values */ 1003 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1004 cnt = 0; 1005 for (i=0; i<mat->rmap.n; i++) { 1006 for (j=B->i[i]; j<B->i[i+1]; j++) { 1007 if ( garray[B->j[j]] > cstart) break; 1008 column_values[cnt++] = B->a[j]; 1009 } 1010 for (k=A->i[i]; k<A->i[i+1]; k++) { 1011 column_values[cnt++] = A->a[k]; 1012 } 1013 for (; j<B->i[i+1]; j++) { 1014 column_values[cnt++] = B->a[j]; 1015 } 1016 } 1017 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1018 1019 /* store the column values to the file */ 1020 if (!rank) { 1021 MPI_Status status; 1022 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 } else { 1030 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1031 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(column_values);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1040 { 1041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1042 PetscErrorCode ierr; 1043 PetscMPIInt rank = aij->rank,size = aij->size; 1044 PetscTruth isdraw,iascii,isbinary; 1045 PetscViewer sviewer; 1046 PetscViewerFormat format; 1047 1048 PetscFunctionBegin; 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1052 if (iascii) { 1053 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1054 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1055 MatInfo info; 1056 PetscTruth inodes; 1057 1058 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1059 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1061 if (!inodes) { 1062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1063 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1064 } else { 1065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1066 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1067 } 1068 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1070 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1074 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1077 PetscInt inodecount,inodelimit,*inodes; 1078 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1079 if (inodes) { 1080 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } 1088 } else if (isbinary) { 1089 if (size == 1) { 1090 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1091 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1092 } else { 1093 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } else if (isdraw) { 1097 PetscDraw draw; 1098 PetscTruth isnull; 1099 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1100 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1101 } 1102 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 /* assemble the entire matrix onto first processor. */ 1108 Mat A; 1109 Mat_SeqAIJ *Aloc; 1110 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1111 MatScalar *a; 1112 1113 if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead"); 1114 1115 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1116 if (!rank) { 1117 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1118 } else { 1119 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1120 } 1121 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1122 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1123 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1125 1126 /* copy over the A part */ 1127 Aloc = (Mat_SeqAIJ*)aij->A->data; 1128 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1129 row = mat->rmap.rstart; 1130 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 1131 for (i=0; i<m; i++) { 1132 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1133 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1134 } 1135 aj = Aloc->j; 1136 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 1137 1138 /* copy over the B part */ 1139 Aloc = (Mat_SeqAIJ*)aij->B->data; 1140 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1141 row = mat->rmap.rstart; 1142 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1143 ct = cols; 1144 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1148 } 1149 ierr = PetscFree(ct);CHKERRQ(ierr); 1150 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1151 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1152 /* 1153 Everyone has to call to draw the matrix since the graphics waits are 1154 synchronized across all processors that share the PetscDraw object 1155 */ 1156 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1157 if (!rank) { 1158 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1159 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1160 } 1161 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1162 ierr = MatDestroy(A);CHKERRQ(ierr); 1163 } 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatView_MPIAIJ" 1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1170 { 1171 PetscErrorCode ierr; 1172 PetscTruth iascii,isdraw,issocket,isbinary; 1173 1174 PetscFunctionBegin; 1175 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1176 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1177 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1178 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1179 if (iascii || isdraw || isbinary || issocket) { 1180 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1181 } else { 1182 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatRelax_MPIAIJ" 1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1190 { 1191 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1192 PetscErrorCode ierr; 1193 Vec bb1; 1194 1195 PetscFunctionBegin; 1196 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1197 1198 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1199 if (flag & SOR_ZERO_INITIAL_GUESS) { 1200 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1201 its--; 1202 } 1203 1204 while (its--) { 1205 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 1208 /* update rhs: bb1 = bb - B*x */ 1209 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1210 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1211 1212 /* local sweep */ 1213 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1214 } 1215 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1216 if (flag & SOR_ZERO_INITIAL_GUESS) { 1217 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1218 its--; 1219 } 1220 while (its--) { 1221 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1222 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 1224 /* update rhs: bb1 = bb - B*x */ 1225 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1226 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1227 1228 /* local sweep */ 1229 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1230 } 1231 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1232 if (flag & SOR_ZERO_INITIAL_GUESS) { 1233 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1234 its--; 1235 } 1236 while (its--) { 1237 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 1240 /* update rhs: bb1 = bb - B*x */ 1241 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1242 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1243 1244 /* local sweep */ 1245 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1246 } 1247 } else { 1248 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1249 } 1250 1251 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatPermute_MPIAIJ" 1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1258 { 1259 MPI_Comm comm,pcomm; 1260 PetscInt first,local_size,nrows,*rows; 1261 int ntids; 1262 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1267 /* make a collective version of 'rowp' */ 1268 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1269 if (pcomm==comm) { 1270 crowp = rowp; 1271 } else { 1272 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1273 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1274 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1275 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1276 } 1277 /* collect the global row permutation and invert it */ 1278 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1279 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1280 if (pcomm!=comm) { 1281 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1282 } 1283 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1284 /* get the local target indices */ 1285 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1286 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1287 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1288 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1289 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1290 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1291 /* the column permutation is so much easier; 1292 make a local version of 'colp' and invert it */ 1293 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1294 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1295 if (ntids==1) { 1296 lcolp = colp; 1297 } else { 1298 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1299 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1300 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1301 } 1302 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1303 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1304 if (ntids>1) { 1305 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1306 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1307 } 1308 /* now we just get the submatrix */ 1309 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1310 /* clean up */ 1311 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1312 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1318 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1319 { 1320 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1321 Mat A = mat->A,B = mat->B; 1322 PetscErrorCode ierr; 1323 PetscReal isend[5],irecv[5]; 1324 1325 PetscFunctionBegin; 1326 info->block_size = 1.0; 1327 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1328 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1329 isend[3] = info->memory; isend[4] = info->mallocs; 1330 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1331 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1332 isend[3] += info->memory; isend[4] += info->mallocs; 1333 if (flag == MAT_LOCAL) { 1334 info->nz_used = isend[0]; 1335 info->nz_allocated = isend[1]; 1336 info->nz_unneeded = isend[2]; 1337 info->memory = isend[3]; 1338 info->mallocs = isend[4]; 1339 } else if (flag == MAT_GLOBAL_MAX) { 1340 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1341 info->nz_used = irecv[0]; 1342 info->nz_allocated = irecv[1]; 1343 info->nz_unneeded = irecv[2]; 1344 info->memory = irecv[3]; 1345 info->mallocs = irecv[4]; 1346 } else if (flag == MAT_GLOBAL_SUM) { 1347 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1348 info->nz_used = irecv[0]; 1349 info->nz_allocated = irecv[1]; 1350 info->nz_unneeded = irecv[2]; 1351 info->memory = irecv[3]; 1352 info->mallocs = irecv[4]; 1353 } 1354 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1355 info->fill_ratio_needed = 0; 1356 info->factor_mallocs = 0; 1357 info->rows_global = (double)matin->rmap.N; 1358 info->columns_global = (double)matin->cmap.N; 1359 info->rows_local = (double)matin->rmap.n; 1360 info->columns_local = (double)matin->cmap.N; 1361 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatSetOption_MPIAIJ" 1367 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1368 { 1369 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1370 PetscErrorCode ierr; 1371 1372 PetscFunctionBegin; 1373 switch (op) { 1374 case MAT_NEW_NONZERO_LOCATIONS: 1375 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1376 case MAT_KEEP_ZEROED_ROWS: 1377 case MAT_NEW_NONZERO_LOCATION_ERR: 1378 case MAT_USE_INODES: 1379 case MAT_IGNORE_ZERO_ENTRIES: 1380 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1381 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1382 break; 1383 case MAT_ROW_ORIENTED: 1384 a->roworiented = flg; 1385 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1386 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1387 break; 1388 case MAT_NEW_DIAGONALS: 1389 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1390 break; 1391 case MAT_IGNORE_OFF_PROC_ENTRIES: 1392 a->donotstash = PETSC_TRUE; 1393 break; 1394 case MAT_SYMMETRIC: 1395 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1396 break; 1397 case MAT_STRUCTURALLY_SYMMETRIC: 1398 case MAT_HERMITIAN: 1399 case MAT_SYMMETRY_ETERNAL: 1400 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1401 break; 1402 default: 1403 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1404 } 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatGetRow_MPIAIJ" 1410 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1411 { 1412 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1413 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1414 PetscErrorCode ierr; 1415 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1416 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1417 PetscInt *cmap,*idx_p; 1418 1419 PetscFunctionBegin; 1420 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1421 mat->getrowactive = PETSC_TRUE; 1422 1423 if (!mat->rowvalues && (idx || v)) { 1424 /* 1425 allocate enough space to hold information from the longest row. 1426 */ 1427 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1428 PetscInt max = 1,tmp; 1429 for (i=0; i<matin->rmap.n; i++) { 1430 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1431 if (max < tmp) { max = tmp; } 1432 } 1433 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1434 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1435 } 1436 1437 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1438 lrow = row - rstart; 1439 1440 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1441 if (!v) {pvA = 0; pvB = 0;} 1442 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1443 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1444 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1445 nztot = nzA + nzB; 1446 1447 cmap = mat->garray; 1448 if (v || idx) { 1449 if (nztot) { 1450 /* Sort by increasing column numbers, assuming A and B already sorted */ 1451 PetscInt imark = -1; 1452 if (v) { 1453 *v = v_p = mat->rowvalues; 1454 for (i=0; i<nzB; i++) { 1455 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1456 else break; 1457 } 1458 imark = i; 1459 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1460 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1461 } 1462 if (idx) { 1463 *idx = idx_p = mat->rowindices; 1464 if (imark > -1) { 1465 for (i=0; i<imark; i++) { 1466 idx_p[i] = cmap[cworkB[i]]; 1467 } 1468 } else { 1469 for (i=0; i<nzB; i++) { 1470 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1471 else break; 1472 } 1473 imark = i; 1474 } 1475 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1476 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1477 } 1478 } else { 1479 if (idx) *idx = 0; 1480 if (v) *v = 0; 1481 } 1482 } 1483 *nz = nztot; 1484 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1485 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1491 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1492 { 1493 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1494 1495 PetscFunctionBegin; 1496 if (!aij->getrowactive) { 1497 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1498 } 1499 aij->getrowactive = PETSC_FALSE; 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatNorm_MPIAIJ" 1505 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1506 { 1507 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1508 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1509 PetscErrorCode ierr; 1510 PetscInt i,j,cstart = mat->cmap.rstart; 1511 PetscReal sum = 0.0; 1512 MatScalar *v; 1513 1514 PetscFunctionBegin; 1515 if (aij->size == 1) { 1516 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1517 } else { 1518 if (type == NORM_FROBENIUS) { 1519 v = amat->a; 1520 for (i=0; i<amat->nz; i++) { 1521 #if defined(PETSC_USE_COMPLEX) 1522 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1523 #else 1524 sum += (*v)*(*v); v++; 1525 #endif 1526 } 1527 v = bmat->a; 1528 for (i=0; i<bmat->nz; i++) { 1529 #if defined(PETSC_USE_COMPLEX) 1530 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1531 #else 1532 sum += (*v)*(*v); v++; 1533 #endif 1534 } 1535 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1536 *norm = sqrt(*norm); 1537 } else if (type == NORM_1) { /* max column norm */ 1538 PetscReal *tmp,*tmp2; 1539 PetscInt *jj,*garray = aij->garray; 1540 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1541 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1542 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1543 *norm = 0.0; 1544 v = amat->a; jj = amat->j; 1545 for (j=0; j<amat->nz; j++) { 1546 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1547 } 1548 v = bmat->a; jj = bmat->j; 1549 for (j=0; j<bmat->nz; j++) { 1550 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1551 } 1552 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1553 for (j=0; j<mat->cmap.N; j++) { 1554 if (tmp2[j] > *norm) *norm = tmp2[j]; 1555 } 1556 ierr = PetscFree(tmp);CHKERRQ(ierr); 1557 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1558 } else if (type == NORM_INFINITY) { /* max row norm */ 1559 PetscReal ntemp = 0.0; 1560 for (j=0; j<aij->A->rmap.n; j++) { 1561 v = amat->a + amat->i[j]; 1562 sum = 0.0; 1563 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1564 sum += PetscAbsScalar(*v); v++; 1565 } 1566 v = bmat->a + bmat->i[j]; 1567 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1568 sum += PetscAbsScalar(*v); v++; 1569 } 1570 if (sum > ntemp) ntemp = sum; 1571 } 1572 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1573 } else { 1574 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1575 } 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatTranspose_MPIAIJ" 1582 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1583 { 1584 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1585 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1586 PetscErrorCode ierr; 1587 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz; 1588 PetscInt cstart=A->cmap.rstart,ncol; 1589 Mat B; 1590 MatScalar *array; 1591 1592 PetscFunctionBegin; 1593 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1594 1595 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1596 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1597 ai = Aloc->i; aj = Aloc->j; 1598 bi = Bloc->i; bj = Bloc->j; 1599 ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1600 cols = d_nnz + na + 1; /* work space to be used by B part */ 1601 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1602 for (i=0; i<ai[ma]; i++){ 1603 d_nnz[aj[i]] ++; 1604 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1605 } 1606 1607 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1608 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1609 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1610 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1611 1612 /* copy over the A part */ 1613 array = Aloc->a; 1614 row = A->rmap.rstart; 1615 for (i=0; i<ma; i++) { 1616 ncol = ai[i+1]-ai[i]; 1617 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1618 row++; array += ncol; aj += ncol; 1619 } 1620 aj = Aloc->j; 1621 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1622 1623 /* copy over the B part */ 1624 array = Bloc->a; 1625 row = A->rmap.rstart; 1626 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1627 for (i=0; i<mb; i++) { 1628 ncol = bi[i+1]-bi[i]; 1629 ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1630 row++; array += ncol; cols += ncol; 1631 } 1632 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1633 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1634 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1635 if (matout) { 1636 *matout = B; 1637 } else { 1638 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1639 } 1640 PetscFunctionReturn(0); 1641 } 1642 1643 #undef __FUNCT__ 1644 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1645 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1646 { 1647 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1648 Mat a = aij->A,b = aij->B; 1649 PetscErrorCode ierr; 1650 PetscInt s1,s2,s3; 1651 1652 PetscFunctionBegin; 1653 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1654 if (rr) { 1655 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1656 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1657 /* Overlap communication with computation. */ 1658 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1659 } 1660 if (ll) { 1661 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1662 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1663 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1664 } 1665 /* scale the diagonal block */ 1666 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1667 1668 if (rr) { 1669 /* Do a scatter end and then right scale the off-diagonal block */ 1670 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1671 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1672 } 1673 1674 PetscFunctionReturn(0); 1675 } 1676 1677 #undef __FUNCT__ 1678 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1679 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1680 { 1681 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1682 PetscErrorCode ierr; 1683 1684 PetscFunctionBegin; 1685 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1686 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1687 PetscFunctionReturn(0); 1688 } 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1691 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1692 { 1693 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatEqual_MPIAIJ" 1703 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1704 { 1705 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1706 Mat a,b,c,d; 1707 PetscTruth flg; 1708 PetscErrorCode ierr; 1709 1710 PetscFunctionBegin; 1711 a = matA->A; b = matA->B; 1712 c = matB->A; d = matB->B; 1713 1714 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1715 if (flg) { 1716 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1717 } 1718 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1719 PetscFunctionReturn(0); 1720 } 1721 1722 #undef __FUNCT__ 1723 #define __FUNCT__ "MatCopy_MPIAIJ" 1724 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1725 { 1726 PetscErrorCode ierr; 1727 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1728 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1729 1730 PetscFunctionBegin; 1731 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1732 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1733 /* because of the column compression in the off-processor part of the matrix a->B, 1734 the number of columns in a->B and b->B may be different, hence we cannot call 1735 the MatCopy() directly on the two parts. If need be, we can provide a more 1736 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1737 then copying the submatrices */ 1738 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1739 } else { 1740 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1741 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1742 } 1743 PetscFunctionReturn(0); 1744 } 1745 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1748 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1749 { 1750 PetscErrorCode ierr; 1751 1752 PetscFunctionBegin; 1753 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1754 PetscFunctionReturn(0); 1755 } 1756 1757 #include "petscblaslapack.h" 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatAXPY_MPIAIJ" 1760 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1761 { 1762 PetscErrorCode ierr; 1763 PetscInt i; 1764 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1765 PetscBLASInt bnz,one=1; 1766 Mat_SeqAIJ *x,*y; 1767 1768 PetscFunctionBegin; 1769 if (str == SAME_NONZERO_PATTERN) { 1770 PetscScalar alpha = a; 1771 x = (Mat_SeqAIJ *)xx->A->data; 1772 y = (Mat_SeqAIJ *)yy->A->data; 1773 bnz = PetscBLASIntCast(x->nz); 1774 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1775 x = (Mat_SeqAIJ *)xx->B->data; 1776 y = (Mat_SeqAIJ *)yy->B->data; 1777 bnz = PetscBLASIntCast(x->nz); 1778 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1779 } else if (str == SUBSET_NONZERO_PATTERN) { 1780 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1781 1782 x = (Mat_SeqAIJ *)xx->B->data; 1783 y = (Mat_SeqAIJ *)yy->B->data; 1784 if (y->xtoy && y->XtoY != xx->B) { 1785 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1786 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1787 } 1788 if (!y->xtoy) { /* get xtoy */ 1789 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1790 y->XtoY = xx->B; 1791 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1792 } 1793 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1794 } else { 1795 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1796 } 1797 PetscFunctionReturn(0); 1798 } 1799 1800 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1801 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatConjugate_MPIAIJ" 1804 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1805 { 1806 #if defined(PETSC_USE_COMPLEX) 1807 PetscErrorCode ierr; 1808 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1809 1810 PetscFunctionBegin; 1811 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1812 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1813 #else 1814 PetscFunctionBegin; 1815 #endif 1816 PetscFunctionReturn(0); 1817 } 1818 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "MatRealPart_MPIAIJ" 1821 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1822 { 1823 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1824 PetscErrorCode ierr; 1825 1826 PetscFunctionBegin; 1827 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1828 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1829 PetscFunctionReturn(0); 1830 } 1831 1832 #undef __FUNCT__ 1833 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1834 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1835 { 1836 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1837 PetscErrorCode ierr; 1838 1839 PetscFunctionBegin; 1840 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1841 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1842 PetscFunctionReturn(0); 1843 } 1844 1845 #ifdef PETSC_HAVE_PBGL 1846 1847 #include <boost/parallel/mpi/bsp_process_group.hpp> 1848 #include <boost/graph/distributed/ilu_default_graph.hpp> 1849 #include <boost/graph/distributed/ilu_0_block.hpp> 1850 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1851 #include <boost/graph/distributed/petsc/interface.hpp> 1852 #include <boost/multi_array.hpp> 1853 #include <boost/parallel/distributed_property_map.hpp> 1854 1855 #undef __FUNCT__ 1856 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1857 /* 1858 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1859 */ 1860 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1861 { 1862 namespace petsc = boost::distributed::petsc; 1863 1864 namespace graph_dist = boost::graph::distributed; 1865 using boost::graph::distributed::ilu_default::process_group_type; 1866 using boost::graph::ilu_permuted; 1867 1868 PetscTruth row_identity, col_identity; 1869 PetscContainer c; 1870 PetscInt m, n, M, N; 1871 PetscErrorCode ierr; 1872 1873 PetscFunctionBegin; 1874 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1875 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1876 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1877 if (!row_identity || !col_identity) { 1878 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1879 } 1880 1881 process_group_type pg; 1882 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1883 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1884 lgraph_type& level_graph = *lgraph_p; 1885 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1886 1887 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1888 ilu_permuted(level_graph); 1889 1890 /* put together the new matrix */ 1891 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1892 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1893 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1894 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1895 ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1896 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1897 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1898 (*fact)->factor = FACTOR_LU; 1899 1900 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1901 ierr = PetscContainerSetPointer(c, lgraph_p); 1902 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1903 PetscFunctionReturn(0); 1904 } 1905 1906 #undef __FUNCT__ 1907 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1908 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1909 { 1910 PetscFunctionBegin; 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatSolve_MPIAIJ" 1916 /* 1917 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1918 */ 1919 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1920 { 1921 namespace graph_dist = boost::graph::distributed; 1922 1923 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1924 lgraph_type* lgraph_p; 1925 PetscContainer c; 1926 PetscErrorCode ierr; 1927 1928 PetscFunctionBegin; 1929 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1930 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1931 ierr = VecCopy(b, x); CHKERRQ(ierr); 1932 1933 PetscScalar* array_x; 1934 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1935 PetscInt sx; 1936 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1937 1938 PetscScalar* array_b; 1939 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1940 PetscInt sb; 1941 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1942 1943 lgraph_type& level_graph = *lgraph_p; 1944 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1945 1946 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1947 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1948 ref_x(array_x, boost::extents[num_vertices(graph)]); 1949 1950 typedef boost::iterator_property_map<array_ref_type::iterator, 1951 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1952 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1953 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1954 1955 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1956 1957 PetscFunctionReturn(0); 1958 } 1959 #endif 1960 1961 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1962 PetscInt nzlocal,nsends,nrecvs; 1963 PetscMPIInt *send_rank; 1964 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1965 PetscScalar *sbuf_a,**rbuf_a; 1966 PetscErrorCode (*MatDestroy)(Mat); 1967 } Mat_Redundant; 1968 1969 #undef __FUNCT__ 1970 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1971 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1972 { 1973 PetscErrorCode ierr; 1974 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1975 PetscInt i; 1976 1977 PetscFunctionBegin; 1978 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1979 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1980 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1981 for (i=0; i<redund->nrecvs; i++){ 1982 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1983 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1984 } 1985 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1986 ierr = PetscFree(redund);CHKERRQ(ierr); 1987 PetscFunctionReturn(0); 1988 } 1989 1990 #undef __FUNCT__ 1991 #define __FUNCT__ "MatDestroy_MatRedundant" 1992 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1993 { 1994 PetscErrorCode ierr; 1995 PetscContainer container; 1996 Mat_Redundant *redund=PETSC_NULL; 1997 1998 PetscFunctionBegin; 1999 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2000 if (container) { 2001 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2002 } else { 2003 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2004 } 2005 A->ops->destroy = redund->MatDestroy; 2006 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2007 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2008 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2009 PetscFunctionReturn(0); 2010 } 2011 2012 #undef __FUNCT__ 2013 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2014 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2015 { 2016 PetscMPIInt rank,size; 2017 MPI_Comm comm=((PetscObject)mat)->comm; 2018 PetscErrorCode ierr; 2019 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2020 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2021 PetscInt *rowrange=mat->rmap.range; 2022 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2023 Mat A=aij->A,B=aij->B,C=*matredundant; 2024 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2025 PetscScalar *sbuf_a; 2026 PetscInt nzlocal=a->nz+b->nz; 2027 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2028 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 2029 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2030 MatScalar *aworkA,*aworkB; 2031 PetscScalar *vals; 2032 PetscMPIInt tag1,tag2,tag3,imdex; 2033 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2034 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2035 MPI_Status recv_status,*send_status; 2036 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2037 PetscInt **rbuf_j=PETSC_NULL; 2038 PetscScalar **rbuf_a=PETSC_NULL; 2039 Mat_Redundant *redund=PETSC_NULL; 2040 PetscContainer container; 2041 2042 PetscFunctionBegin; 2043 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2044 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2045 2046 if (reuse == MAT_REUSE_MATRIX) { 2047 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2048 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2049 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2050 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2051 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2052 if (container) { 2053 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2054 } else { 2055 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2056 } 2057 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2058 2059 nsends = redund->nsends; 2060 nrecvs = redund->nrecvs; 2061 send_rank = redund->send_rank; recv_rank = send_rank + size; 2062 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2063 sbuf_j = redund->sbuf_j; 2064 sbuf_a = redund->sbuf_a; 2065 rbuf_j = redund->rbuf_j; 2066 rbuf_a = redund->rbuf_a; 2067 } 2068 2069 if (reuse == MAT_INITIAL_MATRIX){ 2070 PetscMPIInt subrank,subsize; 2071 PetscInt nleftover,np_subcomm; 2072 /* get the destination processors' id send_rank, nsends and nrecvs */ 2073 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2074 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2075 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2076 recv_rank = send_rank + size; 2077 np_subcomm = size/nsubcomm; 2078 nleftover = size - nsubcomm*np_subcomm; 2079 nsends = 0; nrecvs = 0; 2080 for (i=0; i<size; i++){ /* i=rank*/ 2081 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2082 send_rank[nsends] = i; nsends++; 2083 recv_rank[nrecvs++] = i; 2084 } 2085 } 2086 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2087 i = size-nleftover-1; 2088 j = 0; 2089 while (j < nsubcomm - nleftover){ 2090 send_rank[nsends++] = i; 2091 i--; j++; 2092 } 2093 } 2094 2095 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2096 for (i=0; i<nleftover; i++){ 2097 recv_rank[nrecvs++] = size-nleftover+i; 2098 } 2099 } 2100 2101 /* allocate sbuf_j, sbuf_a */ 2102 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2103 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2104 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2105 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2106 2107 /* copy mat's local entries into the buffers */ 2108 if (reuse == MAT_INITIAL_MATRIX){ 2109 rownz_max = 0; 2110 rptr = sbuf_j; 2111 cols = sbuf_j + rend-rstart + 1; 2112 vals = sbuf_a; 2113 rptr[0] = 0; 2114 for (i=0; i<rend-rstart; i++){ 2115 row = i + rstart; 2116 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2117 ncols = nzA + nzB; 2118 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2119 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2120 /* load the column indices for this row into cols */ 2121 lwrite = 0; 2122 for (l=0; l<nzB; l++) { 2123 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2124 vals[lwrite] = aworkB[l]; 2125 cols[lwrite++] = ctmp; 2126 } 2127 } 2128 for (l=0; l<nzA; l++){ 2129 vals[lwrite] = aworkA[l]; 2130 cols[lwrite++] = cstart + cworkA[l]; 2131 } 2132 for (l=0; l<nzB; l++) { 2133 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2134 vals[lwrite] = aworkB[l]; 2135 cols[lwrite++] = ctmp; 2136 } 2137 } 2138 vals += ncols; 2139 cols += ncols; 2140 rptr[i+1] = rptr[i] + ncols; 2141 if (rownz_max < ncols) rownz_max = ncols; 2142 } 2143 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2144 } else { /* only copy matrix values into sbuf_a */ 2145 rptr = sbuf_j; 2146 vals = sbuf_a; 2147 rptr[0] = 0; 2148 for (i=0; i<rend-rstart; i++){ 2149 row = i + rstart; 2150 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2151 ncols = nzA + nzB; 2152 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2153 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2154 lwrite = 0; 2155 for (l=0; l<nzB; l++) { 2156 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2157 } 2158 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2159 for (l=0; l<nzB; l++) { 2160 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2161 } 2162 vals += ncols; 2163 rptr[i+1] = rptr[i] + ncols; 2164 } 2165 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2166 2167 /* send nzlocal to others, and recv other's nzlocal */ 2168 /*--------------------------------------------------*/ 2169 if (reuse == MAT_INITIAL_MATRIX){ 2170 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2171 s_waits2 = s_waits3 + nsends; 2172 s_waits1 = s_waits2 + nsends; 2173 r_waits1 = s_waits1 + nsends; 2174 r_waits2 = r_waits1 + nrecvs; 2175 r_waits3 = r_waits2 + nrecvs; 2176 } else { 2177 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2178 r_waits3 = s_waits3 + nsends; 2179 } 2180 2181 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2182 if (reuse == MAT_INITIAL_MATRIX){ 2183 /* get new tags to keep the communication clean */ 2184 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2185 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2186 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2187 rbuf_nz = sbuf_nz + nsends; 2188 2189 /* post receives of other's nzlocal */ 2190 for (i=0; i<nrecvs; i++){ 2191 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2192 } 2193 /* send nzlocal to others */ 2194 for (i=0; i<nsends; i++){ 2195 sbuf_nz[i] = nzlocal; 2196 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2197 } 2198 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2199 count = nrecvs; 2200 while (count) { 2201 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2202 recv_rank[imdex] = recv_status.MPI_SOURCE; 2203 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2204 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2205 2206 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2207 rbuf_nz[imdex] += i + 2; 2208 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2209 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2210 count--; 2211 } 2212 /* wait on sends of nzlocal */ 2213 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2214 /* send mat->i,j to others, and recv from other's */ 2215 /*------------------------------------------------*/ 2216 for (i=0; i<nsends; i++){ 2217 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2218 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2219 } 2220 /* wait on receives of mat->i,j */ 2221 /*------------------------------*/ 2222 count = nrecvs; 2223 while (count) { 2224 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2225 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2226 count--; 2227 } 2228 /* wait on sends of mat->i,j */ 2229 /*---------------------------*/ 2230 if (nsends) { 2231 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2232 } 2233 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2234 2235 /* post receives, send and receive mat->a */ 2236 /*----------------------------------------*/ 2237 for (imdex=0; imdex<nrecvs; imdex++) { 2238 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2239 } 2240 for (i=0; i<nsends; i++){ 2241 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2242 } 2243 count = nrecvs; 2244 while (count) { 2245 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2246 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2247 count--; 2248 } 2249 if (nsends) { 2250 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2251 } 2252 2253 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2254 2255 /* create redundant matrix */ 2256 /*-------------------------*/ 2257 if (reuse == MAT_INITIAL_MATRIX){ 2258 /* compute rownz_max for preallocation */ 2259 for (imdex=0; imdex<nrecvs; imdex++){ 2260 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2261 rptr = rbuf_j[imdex]; 2262 for (i=0; i<j; i++){ 2263 ncols = rptr[i+1] - rptr[i]; 2264 if (rownz_max < ncols) rownz_max = ncols; 2265 } 2266 } 2267 2268 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2269 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2270 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2271 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2272 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2273 } else { 2274 C = *matredundant; 2275 } 2276 2277 /* insert local matrix entries */ 2278 rptr = sbuf_j; 2279 cols = sbuf_j + rend-rstart + 1; 2280 vals = sbuf_a; 2281 for (i=0; i<rend-rstart; i++){ 2282 row = i + rstart; 2283 ncols = rptr[i+1] - rptr[i]; 2284 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2285 vals += ncols; 2286 cols += ncols; 2287 } 2288 /* insert received matrix entries */ 2289 for (imdex=0; imdex<nrecvs; imdex++){ 2290 rstart = rowrange[recv_rank[imdex]]; 2291 rend = rowrange[recv_rank[imdex]+1]; 2292 rptr = rbuf_j[imdex]; 2293 cols = rbuf_j[imdex] + rend-rstart + 1; 2294 vals = rbuf_a[imdex]; 2295 for (i=0; i<rend-rstart; i++){ 2296 row = i + rstart; 2297 ncols = rptr[i+1] - rptr[i]; 2298 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2299 vals += ncols; 2300 cols += ncols; 2301 } 2302 } 2303 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2304 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2305 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2306 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2307 if (reuse == MAT_INITIAL_MATRIX){ 2308 PetscContainer container; 2309 *matredundant = C; 2310 /* create a supporting struct and attach it to C for reuse */ 2311 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2312 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2313 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2314 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2315 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2316 2317 redund->nzlocal = nzlocal; 2318 redund->nsends = nsends; 2319 redund->nrecvs = nrecvs; 2320 redund->send_rank = send_rank; 2321 redund->sbuf_nz = sbuf_nz; 2322 redund->sbuf_j = sbuf_j; 2323 redund->sbuf_a = sbuf_a; 2324 redund->rbuf_j = rbuf_j; 2325 redund->rbuf_a = rbuf_a; 2326 2327 redund->MatDestroy = C->ops->destroy; 2328 C->ops->destroy = MatDestroy_MatRedundant; 2329 } 2330 PetscFunctionReturn(0); 2331 } 2332 2333 #undef __FUNCT__ 2334 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2335 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2336 { 2337 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2338 PetscInt n = A->rmap.n; 2339 PetscInt cstart = A->cmap.rstart; 2340 PetscInt *cmap = mat->garray; 2341 PetscInt *diagIdx, *offdiagIdx; 2342 Vec diagV, offdiagV; 2343 PetscScalar *a, *diagA, *offdiagA; 2344 PetscInt r; 2345 PetscErrorCode ierr; 2346 2347 PetscFunctionBegin; 2348 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2349 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2350 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2351 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2352 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2353 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2354 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2355 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2356 for(r = 0; r < n; ++r) { 2357 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2358 a[r] = diagA[r]; 2359 idx[r] = cstart + diagIdx[r]; 2360 } else { 2361 a[r] = offdiagA[r]; 2362 idx[r] = cmap[offdiagIdx[r]]; 2363 } 2364 } 2365 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2366 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2367 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2368 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2369 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2370 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2371 PetscFunctionReturn(0); 2372 } 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2376 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2377 { 2378 PetscErrorCode ierr; 2379 2380 PetscFunctionBegin; 2381 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2382 PetscFunctionReturn(0); 2383 } 2384 2385 /* -------------------------------------------------------------------*/ 2386 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2387 MatGetRow_MPIAIJ, 2388 MatRestoreRow_MPIAIJ, 2389 MatMult_MPIAIJ, 2390 /* 4*/ MatMultAdd_MPIAIJ, 2391 MatMultTranspose_MPIAIJ, 2392 MatMultTransposeAdd_MPIAIJ, 2393 #ifdef PETSC_HAVE_PBGL 2394 MatSolve_MPIAIJ, 2395 #else 2396 0, 2397 #endif 2398 0, 2399 0, 2400 /*10*/ 0, 2401 0, 2402 0, 2403 MatRelax_MPIAIJ, 2404 MatTranspose_MPIAIJ, 2405 /*15*/ MatGetInfo_MPIAIJ, 2406 MatEqual_MPIAIJ, 2407 MatGetDiagonal_MPIAIJ, 2408 MatDiagonalScale_MPIAIJ, 2409 MatNorm_MPIAIJ, 2410 /*20*/ MatAssemblyBegin_MPIAIJ, 2411 MatAssemblyEnd_MPIAIJ, 2412 0, 2413 MatSetOption_MPIAIJ, 2414 MatZeroEntries_MPIAIJ, 2415 /*25*/ MatZeroRows_MPIAIJ, 2416 0, 2417 #ifdef PETSC_HAVE_PBGL 2418 MatLUFactorNumeric_MPIAIJ, 2419 #else 2420 0, 2421 #endif 2422 0, 2423 0, 2424 /*30*/ MatSetUpPreallocation_MPIAIJ, 2425 #ifdef PETSC_HAVE_PBGL 2426 MatILUFactorSymbolic_MPIAIJ, 2427 #else 2428 0, 2429 #endif 2430 0, 2431 0, 2432 0, 2433 /*35*/ MatDuplicate_MPIAIJ, 2434 0, 2435 0, 2436 0, 2437 0, 2438 /*40*/ MatAXPY_MPIAIJ, 2439 MatGetSubMatrices_MPIAIJ, 2440 MatIncreaseOverlap_MPIAIJ, 2441 MatGetValues_MPIAIJ, 2442 MatCopy_MPIAIJ, 2443 /*45*/ 0, 2444 MatScale_MPIAIJ, 2445 0, 2446 0, 2447 0, 2448 /*50*/ MatSetBlockSize_MPIAIJ, 2449 0, 2450 0, 2451 0, 2452 0, 2453 /*55*/ MatFDColoringCreate_MPIAIJ, 2454 0, 2455 MatSetUnfactored_MPIAIJ, 2456 MatPermute_MPIAIJ, 2457 0, 2458 /*60*/ MatGetSubMatrix_MPIAIJ, 2459 MatDestroy_MPIAIJ, 2460 MatView_MPIAIJ, 2461 0, 2462 0, 2463 /*65*/ 0, 2464 0, 2465 0, 2466 0, 2467 0, 2468 /*70*/ 0, 2469 0, 2470 MatSetColoring_MPIAIJ, 2471 #if defined(PETSC_HAVE_ADIC) 2472 MatSetValuesAdic_MPIAIJ, 2473 #else 2474 0, 2475 #endif 2476 MatSetValuesAdifor_MPIAIJ, 2477 /*75*/ 0, 2478 0, 2479 0, 2480 0, 2481 0, 2482 /*80*/ 0, 2483 0, 2484 0, 2485 0, 2486 /*84*/ MatLoad_MPIAIJ, 2487 0, 2488 0, 2489 0, 2490 0, 2491 0, 2492 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2493 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2494 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2495 MatPtAP_Basic, 2496 MatPtAPSymbolic_MPIAIJ, 2497 /*95*/ MatPtAPNumeric_MPIAIJ, 2498 0, 2499 0, 2500 0, 2501 0, 2502 /*100*/0, 2503 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2504 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2505 MatConjugate_MPIAIJ, 2506 0, 2507 /*105*/MatSetValuesRow_MPIAIJ, 2508 MatRealPart_MPIAIJ, 2509 MatImaginaryPart_MPIAIJ, 2510 0, 2511 0, 2512 /*110*/0, 2513 MatGetRedundantMatrix_MPIAIJ, 2514 MatGetRowMin_MPIAIJ, 2515 0, 2516 0, 2517 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2518 2519 /* ----------------------------------------------------------------------------------------*/ 2520 2521 EXTERN_C_BEGIN 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2524 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2525 { 2526 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2527 PetscErrorCode ierr; 2528 2529 PetscFunctionBegin; 2530 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2531 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2532 PetscFunctionReturn(0); 2533 } 2534 EXTERN_C_END 2535 2536 EXTERN_C_BEGIN 2537 #undef __FUNCT__ 2538 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2539 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2540 { 2541 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2542 PetscErrorCode ierr; 2543 2544 PetscFunctionBegin; 2545 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2546 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2547 PetscFunctionReturn(0); 2548 } 2549 EXTERN_C_END 2550 2551 #include "petscpc.h" 2552 EXTERN_C_BEGIN 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2555 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2556 { 2557 Mat_MPIAIJ *b; 2558 PetscErrorCode ierr; 2559 PetscInt i; 2560 2561 PetscFunctionBegin; 2562 B->preallocated = PETSC_TRUE; 2563 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2564 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2565 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2566 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2567 2568 B->rmap.bs = B->cmap.bs = 1; 2569 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2570 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2571 if (d_nnz) { 2572 for (i=0; i<B->rmap.n; i++) { 2573 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2574 } 2575 } 2576 if (o_nnz) { 2577 for (i=0; i<B->rmap.n; i++) { 2578 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2579 } 2580 } 2581 b = (Mat_MPIAIJ*)B->data; 2582 2583 /* Explicitly create 2 MATSEQAIJ matrices. */ 2584 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2585 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2586 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2587 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2588 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2589 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2590 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2591 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2592 2593 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2594 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2595 2596 PetscFunctionReturn(0); 2597 } 2598 EXTERN_C_END 2599 2600 #undef __FUNCT__ 2601 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2602 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2603 { 2604 Mat mat; 2605 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2606 PetscErrorCode ierr; 2607 2608 PetscFunctionBegin; 2609 *newmat = 0; 2610 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2611 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2612 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2613 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2614 a = (Mat_MPIAIJ*)mat->data; 2615 2616 mat->factor = matin->factor; 2617 mat->rmap.bs = matin->rmap.bs; 2618 mat->assembled = PETSC_TRUE; 2619 mat->insertmode = NOT_SET_VALUES; 2620 mat->preallocated = PETSC_TRUE; 2621 2622 a->size = oldmat->size; 2623 a->rank = oldmat->rank; 2624 a->donotstash = oldmat->donotstash; 2625 a->roworiented = oldmat->roworiented; 2626 a->rowindices = 0; 2627 a->rowvalues = 0; 2628 a->getrowactive = PETSC_FALSE; 2629 2630 ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2631 ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2632 2633 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2634 if (oldmat->colmap) { 2635 #if defined (PETSC_USE_CTABLE) 2636 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2637 #else 2638 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2639 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2640 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2641 #endif 2642 } else a->colmap = 0; 2643 if (oldmat->garray) { 2644 PetscInt len; 2645 len = oldmat->B->cmap.n; 2646 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2647 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2648 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2649 } else a->garray = 0; 2650 2651 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2652 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2653 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2654 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2655 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2656 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2657 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2658 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2659 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2660 *newmat = mat; 2661 PetscFunctionReturn(0); 2662 } 2663 2664 #include "petscsys.h" 2665 2666 #undef __FUNCT__ 2667 #define __FUNCT__ "MatLoad_MPIAIJ" 2668 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2669 { 2670 Mat A; 2671 PetscScalar *vals,*svals; 2672 MPI_Comm comm = ((PetscObject)viewer)->comm; 2673 MPI_Status status; 2674 PetscErrorCode ierr; 2675 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2676 PetscInt i,nz,j,rstart,rend,mmax; 2677 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2678 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2679 PetscInt cend,cstart,n,*rowners; 2680 int fd; 2681 2682 PetscFunctionBegin; 2683 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2684 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2685 if (!rank) { 2686 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2687 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2688 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2689 } 2690 2691 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2692 M = header[1]; N = header[2]; 2693 /* determine ownership of all rows */ 2694 m = M/size + ((M % size) > rank); 2695 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2696 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2697 2698 /* First process needs enough room for process with most rows */ 2699 if (!rank) { 2700 mmax = rowners[1]; 2701 for (i=2; i<size; i++) { 2702 mmax = PetscMax(mmax,rowners[i]); 2703 } 2704 } else mmax = m; 2705 2706 rowners[0] = 0; 2707 for (i=2; i<=size; i++) { 2708 rowners[i] += rowners[i-1]; 2709 } 2710 rstart = rowners[rank]; 2711 rend = rowners[rank+1]; 2712 2713 /* distribute row lengths to all processors */ 2714 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2715 if (!rank) { 2716 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2717 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2718 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2719 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2720 for (j=0; j<m; j++) { 2721 procsnz[0] += ourlens[j]; 2722 } 2723 for (i=1; i<size; i++) { 2724 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2725 /* calculate the number of nonzeros on each processor */ 2726 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2727 procsnz[i] += rowlengths[j]; 2728 } 2729 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2730 } 2731 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2732 } else { 2733 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2734 } 2735 2736 if (!rank) { 2737 /* determine max buffer needed and allocate it */ 2738 maxnz = 0; 2739 for (i=0; i<size; i++) { 2740 maxnz = PetscMax(maxnz,procsnz[i]); 2741 } 2742 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2743 2744 /* read in my part of the matrix column indices */ 2745 nz = procsnz[0]; 2746 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2747 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2748 2749 /* read in every one elses and ship off */ 2750 for (i=1; i<size; i++) { 2751 nz = procsnz[i]; 2752 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2753 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2754 } 2755 ierr = PetscFree(cols);CHKERRQ(ierr); 2756 } else { 2757 /* determine buffer space needed for message */ 2758 nz = 0; 2759 for (i=0; i<m; i++) { 2760 nz += ourlens[i]; 2761 } 2762 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2763 2764 /* receive message of column indices*/ 2765 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2766 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2767 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2768 } 2769 2770 /* determine column ownership if matrix is not square */ 2771 if (N != M) { 2772 n = N/size + ((N % size) > rank); 2773 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2774 cstart = cend - n; 2775 } else { 2776 cstart = rstart; 2777 cend = rend; 2778 n = cend - cstart; 2779 } 2780 2781 /* loop over local rows, determining number of off diagonal entries */ 2782 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2783 jj = 0; 2784 for (i=0; i<m; i++) { 2785 for (j=0; j<ourlens[i]; j++) { 2786 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2787 jj++; 2788 } 2789 } 2790 2791 /* create our matrix */ 2792 for (i=0; i<m; i++) { 2793 ourlens[i] -= offlens[i]; 2794 } 2795 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2796 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2797 ierr = MatSetType(A,type);CHKERRQ(ierr); 2798 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2799 2800 for (i=0; i<m; i++) { 2801 ourlens[i] += offlens[i]; 2802 } 2803 2804 if (!rank) { 2805 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2806 2807 /* read in my part of the matrix numerical values */ 2808 nz = procsnz[0]; 2809 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2810 2811 /* insert into matrix */ 2812 jj = rstart; 2813 smycols = mycols; 2814 svals = vals; 2815 for (i=0; i<m; i++) { 2816 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2817 smycols += ourlens[i]; 2818 svals += ourlens[i]; 2819 jj++; 2820 } 2821 2822 /* read in other processors and ship out */ 2823 for (i=1; i<size; i++) { 2824 nz = procsnz[i]; 2825 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2826 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2827 } 2828 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2829 } else { 2830 /* receive numeric values */ 2831 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2832 2833 /* receive message of values*/ 2834 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2835 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2836 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2837 2838 /* insert into matrix */ 2839 jj = rstart; 2840 smycols = mycols; 2841 svals = vals; 2842 for (i=0; i<m; i++) { 2843 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2844 smycols += ourlens[i]; 2845 svals += ourlens[i]; 2846 jj++; 2847 } 2848 } 2849 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2850 ierr = PetscFree(vals);CHKERRQ(ierr); 2851 ierr = PetscFree(mycols);CHKERRQ(ierr); 2852 ierr = PetscFree(rowners);CHKERRQ(ierr); 2853 2854 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2855 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2856 *newmat = A; 2857 PetscFunctionReturn(0); 2858 } 2859 2860 #undef __FUNCT__ 2861 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2862 /* 2863 Not great since it makes two copies of the submatrix, first an SeqAIJ 2864 in local and then by concatenating the local matrices the end result. 2865 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2866 */ 2867 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2868 { 2869 PetscErrorCode ierr; 2870 PetscMPIInt rank,size; 2871 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2872 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2873 Mat *local,M,Mreuse; 2874 MatScalar *vwork,*aa; 2875 MPI_Comm comm = ((PetscObject)mat)->comm; 2876 Mat_SeqAIJ *aij; 2877 2878 2879 PetscFunctionBegin; 2880 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2881 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2882 2883 if (call == MAT_REUSE_MATRIX) { 2884 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2885 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2886 local = &Mreuse; 2887 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2888 } else { 2889 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2890 Mreuse = *local; 2891 ierr = PetscFree(local);CHKERRQ(ierr); 2892 } 2893 2894 /* 2895 m - number of local rows 2896 n - number of columns (same on all processors) 2897 rstart - first row in new global matrix generated 2898 */ 2899 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2900 if (call == MAT_INITIAL_MATRIX) { 2901 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2902 ii = aij->i; 2903 jj = aij->j; 2904 2905 /* 2906 Determine the number of non-zeros in the diagonal and off-diagonal 2907 portions of the matrix in order to do correct preallocation 2908 */ 2909 2910 /* first get start and end of "diagonal" columns */ 2911 if (csize == PETSC_DECIDE) { 2912 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2913 if (mglobal == n) { /* square matrix */ 2914 nlocal = m; 2915 } else { 2916 nlocal = n/size + ((n % size) > rank); 2917 } 2918 } else { 2919 nlocal = csize; 2920 } 2921 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2922 rstart = rend - nlocal; 2923 if (rank == size - 1 && rend != n) { 2924 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2925 } 2926 2927 /* next, compute all the lengths */ 2928 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2929 olens = dlens + m; 2930 for (i=0; i<m; i++) { 2931 jend = ii[i+1] - ii[i]; 2932 olen = 0; 2933 dlen = 0; 2934 for (j=0; j<jend; j++) { 2935 if (*jj < rstart || *jj >= rend) olen++; 2936 else dlen++; 2937 jj++; 2938 } 2939 olens[i] = olen; 2940 dlens[i] = dlen; 2941 } 2942 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2943 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2944 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2945 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2946 ierr = PetscFree(dlens);CHKERRQ(ierr); 2947 } else { 2948 PetscInt ml,nl; 2949 2950 M = *newmat; 2951 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2952 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2953 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2954 /* 2955 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2956 rather than the slower MatSetValues(). 2957 */ 2958 M->was_assembled = PETSC_TRUE; 2959 M->assembled = PETSC_FALSE; 2960 } 2961 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2962 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2963 ii = aij->i; 2964 jj = aij->j; 2965 aa = aij->a; 2966 for (i=0; i<m; i++) { 2967 row = rstart + i; 2968 nz = ii[i+1] - ii[i]; 2969 cwork = jj; jj += nz; 2970 vwork = aa; aa += nz; 2971 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2972 } 2973 2974 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2975 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2976 *newmat = M; 2977 2978 /* save submatrix used in processor for next request */ 2979 if (call == MAT_INITIAL_MATRIX) { 2980 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2981 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2982 } 2983 2984 PetscFunctionReturn(0); 2985 } 2986 2987 EXTERN_C_BEGIN 2988 #undef __FUNCT__ 2989 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2990 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2991 { 2992 PetscInt m,cstart, cend,j,nnz,i,d; 2993 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2994 const PetscInt *JJ; 2995 PetscScalar *values; 2996 PetscErrorCode ierr; 2997 2998 PetscFunctionBegin; 2999 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3000 3001 B->rmap.bs = B->cmap.bs = 1; 3002 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 3003 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 3004 m = B->rmap.n; 3005 cstart = B->cmap.rstart; 3006 cend = B->cmap.rend; 3007 rstart = B->rmap.rstart; 3008 3009 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3010 o_nnz = d_nnz + m; 3011 3012 #if defined(PETSC_USE_DEBUGGING) 3013 for (i=0; i<m; i++) { 3014 nnz = Ii[i+1]- Ii[i]; 3015 JJ = J + Ii[i]; 3016 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3017 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3018 if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N); 3019 for (j=1; j<nnz; j++) { 3020 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3021 } 3022 } 3023 #endif 3024 3025 for (i=0; i<m; i++) { 3026 nnz = Ii[i+1]- Ii[i]; 3027 JJ = J + Ii[i]; 3028 nnz_max = PetscMax(nnz_max,nnz); 3029 for (j=0; j<nnz; j++) { 3030 if (*JJ >= cstart) break; 3031 JJ++; 3032 } 3033 d = 0; 3034 for (; j<nnz; j++) { 3035 if (*JJ++ >= cend) break; 3036 d++; 3037 } 3038 d_nnz[i] = d; 3039 o_nnz[i] = nnz - d; 3040 } 3041 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3042 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3043 3044 if (v) values = (PetscScalar*)v; 3045 else { 3046 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3047 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3048 } 3049 3050 for (i=0; i<m; i++) { 3051 ii = i + rstart; 3052 nnz = Ii[i+1]- Ii[i]; 3053 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3054 } 3055 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3056 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3057 3058 if (!v) { 3059 ierr = PetscFree(values);CHKERRQ(ierr); 3060 } 3061 PetscFunctionReturn(0); 3062 } 3063 EXTERN_C_END 3064 3065 #undef __FUNCT__ 3066 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3067 /*@ 3068 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3069 (the default parallel PETSc format). 3070 3071 Collective on MPI_Comm 3072 3073 Input Parameters: 3074 + B - the matrix 3075 . i - the indices into j for the start of each local row (starts with zero) 3076 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3077 - v - optional values in the matrix 3078 3079 Level: developer 3080 3081 Notes: 3082 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3083 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3084 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3085 3086 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3087 3088 The format which is used for the sparse matrix input, is equivalent to a 3089 row-major ordering.. i.e for the following matrix, the input data expected is 3090 as shown: 3091 3092 1 0 0 3093 2 0 3 P0 3094 ------- 3095 4 5 6 P1 3096 3097 Process0 [P0]: rows_owned=[0,1] 3098 i = {0,1,3} [size = nrow+1 = 2+1] 3099 j = {0,0,2} [size = nz = 6] 3100 v = {1,2,3} [size = nz = 6] 3101 3102 Process1 [P1]: rows_owned=[2] 3103 i = {0,3} [size = nrow+1 = 1+1] 3104 j = {0,1,2} [size = nz = 6] 3105 v = {4,5,6} [size = nz = 6] 3106 3107 The column indices for each row MUST be sorted. 3108 3109 .keywords: matrix, aij, compressed row, sparse, parallel 3110 3111 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3112 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3113 @*/ 3114 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3115 { 3116 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3117 3118 PetscFunctionBegin; 3119 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3120 if (f) { 3121 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3122 } 3123 PetscFunctionReturn(0); 3124 } 3125 3126 #undef __FUNCT__ 3127 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3128 /*@C 3129 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3130 (the default parallel PETSc format). For good matrix assembly performance 3131 the user should preallocate the matrix storage by setting the parameters 3132 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3133 performance can be increased by more than a factor of 50. 3134 3135 Collective on MPI_Comm 3136 3137 Input Parameters: 3138 + A - the matrix 3139 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3140 (same value is used for all local rows) 3141 . d_nnz - array containing the number of nonzeros in the various rows of the 3142 DIAGONAL portion of the local submatrix (possibly different for each row) 3143 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3144 The size of this array is equal to the number of local rows, i.e 'm'. 3145 You must leave room for the diagonal entry even if it is zero. 3146 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3147 submatrix (same value is used for all local rows). 3148 - o_nnz - array containing the number of nonzeros in the various rows of the 3149 OFF-DIAGONAL portion of the local submatrix (possibly different for 3150 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3151 structure. The size of this array is equal to the number 3152 of local rows, i.e 'm'. 3153 3154 If the *_nnz parameter is given then the *_nz parameter is ignored 3155 3156 The AIJ format (also called the Yale sparse matrix format or 3157 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3158 storage. The stored row and column indices begin with zero. See the users manual for details. 3159 3160 The parallel matrix is partitioned such that the first m0 rows belong to 3161 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3162 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3163 3164 The DIAGONAL portion of the local submatrix of a processor can be defined 3165 as the submatrix which is obtained by extraction the part corresponding 3166 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3167 first row that belongs to the processor, and r2 is the last row belonging 3168 to the this processor. This is a square mxm matrix. The remaining portion 3169 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3170 3171 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3172 3173 You can call MatGetInfo() to get information on how effective the preallocation was; 3174 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3175 You can also run with the option -info and look for messages with the string 3176 malloc in them to see if additional memory allocation was needed. 3177 3178 Example usage: 3179 3180 Consider the following 8x8 matrix with 34 non-zero values, that is 3181 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3182 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3183 as follows: 3184 3185 .vb 3186 1 2 0 | 0 3 0 | 0 4 3187 Proc0 0 5 6 | 7 0 0 | 8 0 3188 9 0 10 | 11 0 0 | 12 0 3189 ------------------------------------- 3190 13 0 14 | 15 16 17 | 0 0 3191 Proc1 0 18 0 | 19 20 21 | 0 0 3192 0 0 0 | 22 23 0 | 24 0 3193 ------------------------------------- 3194 Proc2 25 26 27 | 0 0 28 | 29 0 3195 30 0 0 | 31 32 33 | 0 34 3196 .ve 3197 3198 This can be represented as a collection of submatrices as: 3199 3200 .vb 3201 A B C 3202 D E F 3203 G H I 3204 .ve 3205 3206 Where the submatrices A,B,C are owned by proc0, D,E,F are 3207 owned by proc1, G,H,I are owned by proc2. 3208 3209 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3210 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3211 The 'M','N' parameters are 8,8, and have the same values on all procs. 3212 3213 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3214 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3215 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3216 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3217 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3218 matrix, ans [DF] as another SeqAIJ matrix. 3219 3220 When d_nz, o_nz parameters are specified, d_nz storage elements are 3221 allocated for every row of the local diagonal submatrix, and o_nz 3222 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3223 One way to choose d_nz and o_nz is to use the max nonzerors per local 3224 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3225 In this case, the values of d_nz,o_nz are: 3226 .vb 3227 proc0 : dnz = 2, o_nz = 2 3228 proc1 : dnz = 3, o_nz = 2 3229 proc2 : dnz = 1, o_nz = 4 3230 .ve 3231 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3232 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3233 for proc3. i.e we are using 12+15+10=37 storage locations to store 3234 34 values. 3235 3236 When d_nnz, o_nnz parameters are specified, the storage is specified 3237 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3238 In the above case the values for d_nnz,o_nnz are: 3239 .vb 3240 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3241 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3242 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3243 .ve 3244 Here the space allocated is sum of all the above values i.e 34, and 3245 hence pre-allocation is perfect. 3246 3247 Level: intermediate 3248 3249 .keywords: matrix, aij, compressed row, sparse, parallel 3250 3251 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3252 MPIAIJ, MatGetInfo() 3253 @*/ 3254 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3255 { 3256 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3257 3258 PetscFunctionBegin; 3259 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3260 if (f) { 3261 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3262 } 3263 PetscFunctionReturn(0); 3264 } 3265 3266 #undef __FUNCT__ 3267 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3268 /*@ 3269 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3270 CSR format the local rows. 3271 3272 Collective on MPI_Comm 3273 3274 Input Parameters: 3275 + comm - MPI communicator 3276 . m - number of local rows (Cannot be PETSC_DECIDE) 3277 . n - This value should be the same as the local size used in creating the 3278 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3279 calculated if N is given) For square matrices n is almost always m. 3280 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3281 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3282 . i - row indices 3283 . j - column indices 3284 - a - matrix values 3285 3286 Output Parameter: 3287 . mat - the matrix 3288 3289 Level: intermediate 3290 3291 Notes: 3292 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3293 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3294 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3295 3296 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3297 3298 The format which is used for the sparse matrix input, is equivalent to a 3299 row-major ordering.. i.e for the following matrix, the input data expected is 3300 as shown: 3301 3302 1 0 0 3303 2 0 3 P0 3304 ------- 3305 4 5 6 P1 3306 3307 Process0 [P0]: rows_owned=[0,1] 3308 i = {0,1,3} [size = nrow+1 = 2+1] 3309 j = {0,0,2} [size = nz = 6] 3310 v = {1,2,3} [size = nz = 6] 3311 3312 Process1 [P1]: rows_owned=[2] 3313 i = {0,3} [size = nrow+1 = 1+1] 3314 j = {0,1,2} [size = nz = 6] 3315 v = {4,5,6} [size = nz = 6] 3316 3317 .keywords: matrix, aij, compressed row, sparse, parallel 3318 3319 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3320 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3321 @*/ 3322 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3323 { 3324 PetscErrorCode ierr; 3325 3326 PetscFunctionBegin; 3327 if (i[0]) { 3328 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3329 } 3330 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3331 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3332 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3333 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3334 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3335 PetscFunctionReturn(0); 3336 } 3337 3338 #undef __FUNCT__ 3339 #define __FUNCT__ "MatCreateMPIAIJ" 3340 /*@C 3341 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3342 (the default parallel PETSc format). For good matrix assembly performance 3343 the user should preallocate the matrix storage by setting the parameters 3344 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3345 performance can be increased by more than a factor of 50. 3346 3347 Collective on MPI_Comm 3348 3349 Input Parameters: 3350 + comm - MPI communicator 3351 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3352 This value should be the same as the local size used in creating the 3353 y vector for the matrix-vector product y = Ax. 3354 . n - This value should be the same as the local size used in creating the 3355 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3356 calculated if N is given) For square matrices n is almost always m. 3357 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3358 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3359 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3360 (same value is used for all local rows) 3361 . d_nnz - array containing the number of nonzeros in the various rows of the 3362 DIAGONAL portion of the local submatrix (possibly different for each row) 3363 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3364 The size of this array is equal to the number of local rows, i.e 'm'. 3365 You must leave room for the diagonal entry even if it is zero. 3366 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3367 submatrix (same value is used for all local rows). 3368 - o_nnz - array containing the number of nonzeros in the various rows of the 3369 OFF-DIAGONAL portion of the local submatrix (possibly different for 3370 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3371 structure. The size of this array is equal to the number 3372 of local rows, i.e 'm'. 3373 3374 Output Parameter: 3375 . A - the matrix 3376 3377 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3378 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3379 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3380 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3381 3382 Notes: 3383 If the *_nnz parameter is given then the *_nz parameter is ignored 3384 3385 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3386 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3387 storage requirements for this matrix. 3388 3389 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3390 processor than it must be used on all processors that share the object for 3391 that argument. 3392 3393 The user MUST specify either the local or global matrix dimensions 3394 (possibly both). 3395 3396 The parallel matrix is partitioned across processors such that the 3397 first m0 rows belong to process 0, the next m1 rows belong to 3398 process 1, the next m2 rows belong to process 2 etc.. where 3399 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3400 values corresponding to [m x N] submatrix. 3401 3402 The columns are logically partitioned with the n0 columns belonging 3403 to 0th partition, the next n1 columns belonging to the next 3404 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3405 3406 The DIAGONAL portion of the local submatrix on any given processor 3407 is the submatrix corresponding to the rows and columns m,n 3408 corresponding to the given processor. i.e diagonal matrix on 3409 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3410 etc. The remaining portion of the local submatrix [m x (N-n)] 3411 constitute the OFF-DIAGONAL portion. The example below better 3412 illustrates this concept. 3413 3414 For a square global matrix we define each processor's diagonal portion 3415 to be its local rows and the corresponding columns (a square submatrix); 3416 each processor's off-diagonal portion encompasses the remainder of the 3417 local matrix (a rectangular submatrix). 3418 3419 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3420 3421 When calling this routine with a single process communicator, a matrix of 3422 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3423 type of communicator, use the construction mechanism: 3424 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3425 3426 By default, this format uses inodes (identical nodes) when possible. 3427 We search for consecutive rows with the same nonzero structure, thereby 3428 reusing matrix information to achieve increased efficiency. 3429 3430 Options Database Keys: 3431 + -mat_no_inode - Do not use inodes 3432 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3433 - -mat_aij_oneindex - Internally use indexing starting at 1 3434 rather than 0. Note that when calling MatSetValues(), 3435 the user still MUST index entries starting at 0! 3436 3437 3438 Example usage: 3439 3440 Consider the following 8x8 matrix with 34 non-zero values, that is 3441 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3442 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3443 as follows: 3444 3445 .vb 3446 1 2 0 | 0 3 0 | 0 4 3447 Proc0 0 5 6 | 7 0 0 | 8 0 3448 9 0 10 | 11 0 0 | 12 0 3449 ------------------------------------- 3450 13 0 14 | 15 16 17 | 0 0 3451 Proc1 0 18 0 | 19 20 21 | 0 0 3452 0 0 0 | 22 23 0 | 24 0 3453 ------------------------------------- 3454 Proc2 25 26 27 | 0 0 28 | 29 0 3455 30 0 0 | 31 32 33 | 0 34 3456 .ve 3457 3458 This can be represented as a collection of submatrices as: 3459 3460 .vb 3461 A B C 3462 D E F 3463 G H I 3464 .ve 3465 3466 Where the submatrices A,B,C are owned by proc0, D,E,F are 3467 owned by proc1, G,H,I are owned by proc2. 3468 3469 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3470 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3471 The 'M','N' parameters are 8,8, and have the same values on all procs. 3472 3473 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3474 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3475 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3476 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3477 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3478 matrix, ans [DF] as another SeqAIJ matrix. 3479 3480 When d_nz, o_nz parameters are specified, d_nz storage elements are 3481 allocated for every row of the local diagonal submatrix, and o_nz 3482 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3483 One way to choose d_nz and o_nz is to use the max nonzerors per local 3484 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3485 In this case, the values of d_nz,o_nz are: 3486 .vb 3487 proc0 : dnz = 2, o_nz = 2 3488 proc1 : dnz = 3, o_nz = 2 3489 proc2 : dnz = 1, o_nz = 4 3490 .ve 3491 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3492 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3493 for proc3. i.e we are using 12+15+10=37 storage locations to store 3494 34 values. 3495 3496 When d_nnz, o_nnz parameters are specified, the storage is specified 3497 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3498 In the above case the values for d_nnz,o_nnz are: 3499 .vb 3500 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3501 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3502 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3503 .ve 3504 Here the space allocated is sum of all the above values i.e 34, and 3505 hence pre-allocation is perfect. 3506 3507 Level: intermediate 3508 3509 .keywords: matrix, aij, compressed row, sparse, parallel 3510 3511 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3512 MPIAIJ, MatCreateMPIAIJWithArrays() 3513 @*/ 3514 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3515 { 3516 PetscErrorCode ierr; 3517 PetscMPIInt size; 3518 3519 PetscFunctionBegin; 3520 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3521 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3522 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3523 if (size > 1) { 3524 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3525 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3526 } else { 3527 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3528 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3529 } 3530 PetscFunctionReturn(0); 3531 } 3532 3533 #undef __FUNCT__ 3534 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3535 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3536 { 3537 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3538 3539 PetscFunctionBegin; 3540 *Ad = a->A; 3541 *Ao = a->B; 3542 *colmap = a->garray; 3543 PetscFunctionReturn(0); 3544 } 3545 3546 #undef __FUNCT__ 3547 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3548 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3549 { 3550 PetscErrorCode ierr; 3551 PetscInt i; 3552 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3553 3554 PetscFunctionBegin; 3555 if (coloring->ctype == IS_COLORING_GLOBAL) { 3556 ISColoringValue *allcolors,*colors; 3557 ISColoring ocoloring; 3558 3559 /* set coloring for diagonal portion */ 3560 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3561 3562 /* set coloring for off-diagonal portion */ 3563 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3564 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3565 for (i=0; i<a->B->cmap.n; i++) { 3566 colors[i] = allcolors[a->garray[i]]; 3567 } 3568 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3569 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3570 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3571 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3572 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3573 ISColoringValue *colors; 3574 PetscInt *larray; 3575 ISColoring ocoloring; 3576 3577 /* set coloring for diagonal portion */ 3578 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3579 for (i=0; i<a->A->cmap.n; i++) { 3580 larray[i] = i + A->cmap.rstart; 3581 } 3582 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3583 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3584 for (i=0; i<a->A->cmap.n; i++) { 3585 colors[i] = coloring->colors[larray[i]]; 3586 } 3587 ierr = PetscFree(larray);CHKERRQ(ierr); 3588 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3589 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3590 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3591 3592 /* set coloring for off-diagonal portion */ 3593 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3594 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3595 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3596 for (i=0; i<a->B->cmap.n; i++) { 3597 colors[i] = coloring->colors[larray[i]]; 3598 } 3599 ierr = PetscFree(larray);CHKERRQ(ierr); 3600 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3601 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3602 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3603 } else { 3604 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3605 } 3606 3607 PetscFunctionReturn(0); 3608 } 3609 3610 #if defined(PETSC_HAVE_ADIC) 3611 #undef __FUNCT__ 3612 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3613 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3614 { 3615 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3616 PetscErrorCode ierr; 3617 3618 PetscFunctionBegin; 3619 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3620 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3621 PetscFunctionReturn(0); 3622 } 3623 #endif 3624 3625 #undef __FUNCT__ 3626 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3627 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3628 { 3629 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3630 PetscErrorCode ierr; 3631 3632 PetscFunctionBegin; 3633 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3634 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3635 PetscFunctionReturn(0); 3636 } 3637 3638 #undef __FUNCT__ 3639 #define __FUNCT__ "MatMerge" 3640 /*@ 3641 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3642 matrices from each processor 3643 3644 Collective on MPI_Comm 3645 3646 Input Parameters: 3647 + comm - the communicators the parallel matrix will live on 3648 . inmat - the input sequential matrices 3649 . n - number of local columns (or PETSC_DECIDE) 3650 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3651 3652 Output Parameter: 3653 . outmat - the parallel matrix generated 3654 3655 Level: advanced 3656 3657 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3658 3659 @*/ 3660 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3661 { 3662 PetscErrorCode ierr; 3663 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3664 PetscInt *indx; 3665 PetscScalar *values; 3666 3667 PetscFunctionBegin; 3668 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3669 if (scall == MAT_INITIAL_MATRIX){ 3670 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3671 if (n == PETSC_DECIDE){ 3672 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3673 } 3674 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3675 rstart -= m; 3676 3677 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3678 for (i=0;i<m;i++) { 3679 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3680 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3681 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3682 } 3683 /* This routine will ONLY return MPIAIJ type matrix */ 3684 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3685 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3686 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3687 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3688 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3689 3690 } else if (scall == MAT_REUSE_MATRIX){ 3691 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3692 } else { 3693 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3694 } 3695 3696 for (i=0;i<m;i++) { 3697 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3698 Ii = i + rstart; 3699 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3700 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3701 } 3702 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3703 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3704 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3705 3706 PetscFunctionReturn(0); 3707 } 3708 3709 #undef __FUNCT__ 3710 #define __FUNCT__ "MatFileSplit" 3711 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3712 { 3713 PetscErrorCode ierr; 3714 PetscMPIInt rank; 3715 PetscInt m,N,i,rstart,nnz; 3716 size_t len; 3717 const PetscInt *indx; 3718 PetscViewer out; 3719 char *name; 3720 Mat B; 3721 const PetscScalar *values; 3722 3723 PetscFunctionBegin; 3724 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3725 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3726 /* Should this be the type of the diagonal block of A? */ 3727 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3728 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3729 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3730 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3731 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3732 for (i=0;i<m;i++) { 3733 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3734 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3735 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3736 } 3737 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3738 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3739 3740 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3741 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3742 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3743 sprintf(name,"%s.%d",outfile,rank); 3744 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3745 ierr = PetscFree(name); 3746 ierr = MatView(B,out);CHKERRQ(ierr); 3747 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3748 ierr = MatDestroy(B);CHKERRQ(ierr); 3749 PetscFunctionReturn(0); 3750 } 3751 3752 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3753 #undef __FUNCT__ 3754 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3755 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3756 { 3757 PetscErrorCode ierr; 3758 Mat_Merge_SeqsToMPI *merge; 3759 PetscContainer container; 3760 3761 PetscFunctionBegin; 3762 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3763 if (container) { 3764 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3765 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3766 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3767 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3768 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3769 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3770 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3771 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3772 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3773 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3774 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3775 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3776 3777 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3778 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3779 } 3780 ierr = PetscFree(merge);CHKERRQ(ierr); 3781 3782 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3783 PetscFunctionReturn(0); 3784 } 3785 3786 #include "src/mat/utils/freespace.h" 3787 #include "petscbt.h" 3788 3789 #undef __FUNCT__ 3790 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3791 /*@C 3792 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3793 matrices from each processor 3794 3795 Collective on MPI_Comm 3796 3797 Input Parameters: 3798 + comm - the communicators the parallel matrix will live on 3799 . seqmat - the input sequential matrices 3800 . m - number of local rows (or PETSC_DECIDE) 3801 . n - number of local columns (or PETSC_DECIDE) 3802 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3803 3804 Output Parameter: 3805 . mpimat - the parallel matrix generated 3806 3807 Level: advanced 3808 3809 Notes: 3810 The dimensions of the sequential matrix in each processor MUST be the same. 3811 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3812 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3813 @*/ 3814 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3815 { 3816 PetscErrorCode ierr; 3817 MPI_Comm comm=((PetscObject)mpimat)->comm; 3818 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3819 PetscMPIInt size,rank,taga,*len_s; 3820 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3821 PetscInt proc,m; 3822 PetscInt **buf_ri,**buf_rj; 3823 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3824 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3825 MPI_Request *s_waits,*r_waits; 3826 MPI_Status *status; 3827 MatScalar *aa=a->a; 3828 MatScalar **abuf_r,*ba_i; 3829 Mat_Merge_SeqsToMPI *merge; 3830 PetscContainer container; 3831 3832 PetscFunctionBegin; 3833 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3834 3835 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3836 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3837 3838 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3839 if (container) { 3840 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3841 } 3842 bi = merge->bi; 3843 bj = merge->bj; 3844 buf_ri = merge->buf_ri; 3845 buf_rj = merge->buf_rj; 3846 3847 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3848 owners = merge->rowmap.range; 3849 len_s = merge->len_s; 3850 3851 /* send and recv matrix values */ 3852 /*-----------------------------*/ 3853 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3854 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3855 3856 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3857 for (proc=0,k=0; proc<size; proc++){ 3858 if (!len_s[proc]) continue; 3859 i = owners[proc]; 3860 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3861 k++; 3862 } 3863 3864 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3865 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3866 ierr = PetscFree(status);CHKERRQ(ierr); 3867 3868 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3869 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3870 3871 /* insert mat values of mpimat */ 3872 /*----------------------------*/ 3873 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 3874 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3875 nextrow = buf_ri_k + merge->nrecv; 3876 nextai = nextrow + merge->nrecv; 3877 3878 for (k=0; k<merge->nrecv; k++){ 3879 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3880 nrows = *(buf_ri_k[k]); 3881 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3882 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3883 } 3884 3885 /* set values of ba */ 3886 m = merge->rowmap.n; 3887 for (i=0; i<m; i++) { 3888 arow = owners[rank] + i; 3889 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3890 bnzi = bi[i+1] - bi[i]; 3891 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 3892 3893 /* add local non-zero vals of this proc's seqmat into ba */ 3894 anzi = ai[arow+1] - ai[arow]; 3895 aj = a->j + ai[arow]; 3896 aa = a->a + ai[arow]; 3897 nextaj = 0; 3898 for (j=0; nextaj<anzi; j++){ 3899 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3900 ba_i[j] += aa[nextaj++]; 3901 } 3902 } 3903 3904 /* add received vals into ba */ 3905 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3906 /* i-th row */ 3907 if (i == *nextrow[k]) { 3908 anzi = *(nextai[k]+1) - *nextai[k]; 3909 aj = buf_rj[k] + *(nextai[k]); 3910 aa = abuf_r[k] + *(nextai[k]); 3911 nextaj = 0; 3912 for (j=0; nextaj<anzi; j++){ 3913 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3914 ba_i[j] += aa[nextaj++]; 3915 } 3916 } 3917 nextrow[k]++; nextai[k]++; 3918 } 3919 } 3920 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3921 } 3922 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3923 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3924 3925 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3926 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3927 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3928 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3929 PetscFunctionReturn(0); 3930 } 3931 3932 #undef __FUNCT__ 3933 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3934 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3935 { 3936 PetscErrorCode ierr; 3937 Mat B_mpi; 3938 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3939 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3940 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3941 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3942 PetscInt len,proc,*dnz,*onz; 3943 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3944 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3945 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3946 MPI_Status *status; 3947 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3948 PetscBT lnkbt; 3949 Mat_Merge_SeqsToMPI *merge; 3950 PetscContainer container; 3951 3952 PetscFunctionBegin; 3953 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3954 3955 /* make sure it is a PETSc comm */ 3956 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3957 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3958 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3959 3960 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3961 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3962 3963 /* determine row ownership */ 3964 /*---------------------------------------------------------*/ 3965 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3966 merge->rowmap.n = m; 3967 merge->rowmap.N = M; 3968 merge->rowmap.bs = 1; 3969 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 3970 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3971 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3972 3973 m = merge->rowmap.n; 3974 M = merge->rowmap.N; 3975 owners = merge->rowmap.range; 3976 3977 /* determine the number of messages to send, their lengths */ 3978 /*---------------------------------------------------------*/ 3979 len_s = merge->len_s; 3980 3981 len = 0; /* length of buf_si[] */ 3982 merge->nsend = 0; 3983 for (proc=0; proc<size; proc++){ 3984 len_si[proc] = 0; 3985 if (proc == rank){ 3986 len_s[proc] = 0; 3987 } else { 3988 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3989 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3990 } 3991 if (len_s[proc]) { 3992 merge->nsend++; 3993 nrows = 0; 3994 for (i=owners[proc]; i<owners[proc+1]; i++){ 3995 if (ai[i+1] > ai[i]) nrows++; 3996 } 3997 len_si[proc] = 2*(nrows+1); 3998 len += len_si[proc]; 3999 } 4000 } 4001 4002 /* determine the number and length of messages to receive for ij-structure */ 4003 /*-------------------------------------------------------------------------*/ 4004 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4005 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4006 4007 /* post the Irecv of j-structure */ 4008 /*-------------------------------*/ 4009 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4010 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4011 4012 /* post the Isend of j-structure */ 4013 /*--------------------------------*/ 4014 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4015 sj_waits = si_waits + merge->nsend; 4016 4017 for (proc=0, k=0; proc<size; proc++){ 4018 if (!len_s[proc]) continue; 4019 i = owners[proc]; 4020 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4021 k++; 4022 } 4023 4024 /* receives and sends of j-structure are complete */ 4025 /*------------------------------------------------*/ 4026 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4027 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4028 4029 /* send and recv i-structure */ 4030 /*---------------------------*/ 4031 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4032 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4033 4034 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4035 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4036 for (proc=0,k=0; proc<size; proc++){ 4037 if (!len_s[proc]) continue; 4038 /* form outgoing message for i-structure: 4039 buf_si[0]: nrows to be sent 4040 [1:nrows]: row index (global) 4041 [nrows+1:2*nrows+1]: i-structure index 4042 */ 4043 /*-------------------------------------------*/ 4044 nrows = len_si[proc]/2 - 1; 4045 buf_si_i = buf_si + nrows+1; 4046 buf_si[0] = nrows; 4047 buf_si_i[0] = 0; 4048 nrows = 0; 4049 for (i=owners[proc]; i<owners[proc+1]; i++){ 4050 anzi = ai[i+1] - ai[i]; 4051 if (anzi) { 4052 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4053 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4054 nrows++; 4055 } 4056 } 4057 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4058 k++; 4059 buf_si += len_si[proc]; 4060 } 4061 4062 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4063 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4064 4065 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4066 for (i=0; i<merge->nrecv; i++){ 4067 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4068 } 4069 4070 ierr = PetscFree(len_si);CHKERRQ(ierr); 4071 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4072 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4073 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4074 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4075 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4076 ierr = PetscFree(status);CHKERRQ(ierr); 4077 4078 /* compute a local seq matrix in each processor */ 4079 /*----------------------------------------------*/ 4080 /* allocate bi array and free space for accumulating nonzero column info */ 4081 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4082 bi[0] = 0; 4083 4084 /* create and initialize a linked list */ 4085 nlnk = N+1; 4086 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4087 4088 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4089 len = 0; 4090 len = ai[owners[rank+1]] - ai[owners[rank]]; 4091 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4092 current_space = free_space; 4093 4094 /* determine symbolic info for each local row */ 4095 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4096 nextrow = buf_ri_k + merge->nrecv; 4097 nextai = nextrow + merge->nrecv; 4098 for (k=0; k<merge->nrecv; k++){ 4099 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4100 nrows = *buf_ri_k[k]; 4101 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4102 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4103 } 4104 4105 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4106 len = 0; 4107 for (i=0;i<m;i++) { 4108 bnzi = 0; 4109 /* add local non-zero cols of this proc's seqmat into lnk */ 4110 arow = owners[rank] + i; 4111 anzi = ai[arow+1] - ai[arow]; 4112 aj = a->j + ai[arow]; 4113 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4114 bnzi += nlnk; 4115 /* add received col data into lnk */ 4116 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4117 if (i == *nextrow[k]) { /* i-th row */ 4118 anzi = *(nextai[k]+1) - *nextai[k]; 4119 aj = buf_rj[k] + *nextai[k]; 4120 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4121 bnzi += nlnk; 4122 nextrow[k]++; nextai[k]++; 4123 } 4124 } 4125 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4126 4127 /* if free space is not available, make more free space */ 4128 if (current_space->local_remaining<bnzi) { 4129 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4130 nspacedouble++; 4131 } 4132 /* copy data into free space, then initialize lnk */ 4133 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4134 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4135 4136 current_space->array += bnzi; 4137 current_space->local_used += bnzi; 4138 current_space->local_remaining -= bnzi; 4139 4140 bi[i+1] = bi[i] + bnzi; 4141 } 4142 4143 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4144 4145 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4146 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4147 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4148 4149 /* create symbolic parallel matrix B_mpi */ 4150 /*---------------------------------------*/ 4151 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4152 if (n==PETSC_DECIDE) { 4153 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4154 } else { 4155 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4156 } 4157 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4158 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4159 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4160 4161 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4162 B_mpi->assembled = PETSC_FALSE; 4163 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4164 merge->bi = bi; 4165 merge->bj = bj; 4166 merge->buf_ri = buf_ri; 4167 merge->buf_rj = buf_rj; 4168 merge->coi = PETSC_NULL; 4169 merge->coj = PETSC_NULL; 4170 merge->owners_co = PETSC_NULL; 4171 4172 /* attach the supporting struct to B_mpi for reuse */ 4173 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4174 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4175 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4176 *mpimat = B_mpi; 4177 4178 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4179 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4180 PetscFunctionReturn(0); 4181 } 4182 4183 #undef __FUNCT__ 4184 #define __FUNCT__ "MatMerge_SeqsToMPI" 4185 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4186 { 4187 PetscErrorCode ierr; 4188 4189 PetscFunctionBegin; 4190 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4191 if (scall == MAT_INITIAL_MATRIX){ 4192 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4193 } 4194 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4195 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4196 PetscFunctionReturn(0); 4197 } 4198 4199 #undef __FUNCT__ 4200 #define __FUNCT__ "MatGetLocalMat" 4201 /*@ 4202 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4203 4204 Not Collective 4205 4206 Input Parameters: 4207 + A - the matrix 4208 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4209 4210 Output Parameter: 4211 . A_loc - the local sequential matrix generated 4212 4213 Level: developer 4214 4215 @*/ 4216 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4217 { 4218 PetscErrorCode ierr; 4219 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4220 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4221 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4222 MatScalar *aa=a->a,*ba=b->a,*cam; 4223 PetscScalar *ca; 4224 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 4225 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4226 4227 PetscFunctionBegin; 4228 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4229 if (scall == MAT_INITIAL_MATRIX){ 4230 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4231 ci[0] = 0; 4232 for (i=0; i<am; i++){ 4233 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4234 } 4235 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4236 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4237 k = 0; 4238 for (i=0; i<am; i++) { 4239 ncols_o = bi[i+1] - bi[i]; 4240 ncols_d = ai[i+1] - ai[i]; 4241 /* off-diagonal portion of A */ 4242 for (jo=0; jo<ncols_o; jo++) { 4243 col = cmap[*bj]; 4244 if (col >= cstart) break; 4245 cj[k] = col; bj++; 4246 ca[k++] = *ba++; 4247 } 4248 /* diagonal portion of A */ 4249 for (j=0; j<ncols_d; j++) { 4250 cj[k] = cstart + *aj++; 4251 ca[k++] = *aa++; 4252 } 4253 /* off-diagonal portion of A */ 4254 for (j=jo; j<ncols_o; j++) { 4255 cj[k] = cmap[*bj++]; 4256 ca[k++] = *ba++; 4257 } 4258 } 4259 /* put together the new matrix */ 4260 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4261 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4262 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4263 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4264 mat->free_a = PETSC_TRUE; 4265 mat->free_ij = PETSC_TRUE; 4266 mat->nonew = 0; 4267 } else if (scall == MAT_REUSE_MATRIX){ 4268 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4269 ci = mat->i; cj = mat->j; cam = mat->a; 4270 for (i=0; i<am; i++) { 4271 /* off-diagonal portion of A */ 4272 ncols_o = bi[i+1] - bi[i]; 4273 for (jo=0; jo<ncols_o; jo++) { 4274 col = cmap[*bj]; 4275 if (col >= cstart) break; 4276 *cam++ = *ba++; bj++; 4277 } 4278 /* diagonal portion of A */ 4279 ncols_d = ai[i+1] - ai[i]; 4280 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4281 /* off-diagonal portion of A */ 4282 for (j=jo; j<ncols_o; j++) { 4283 *cam++ = *ba++; bj++; 4284 } 4285 } 4286 } else { 4287 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4288 } 4289 4290 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4291 PetscFunctionReturn(0); 4292 } 4293 4294 #undef __FUNCT__ 4295 #define __FUNCT__ "MatGetLocalMatCondensed" 4296 /*@C 4297 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4298 4299 Not Collective 4300 4301 Input Parameters: 4302 + A - the matrix 4303 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4304 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4305 4306 Output Parameter: 4307 . A_loc - the local sequential matrix generated 4308 4309 Level: developer 4310 4311 @*/ 4312 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4313 { 4314 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4315 PetscErrorCode ierr; 4316 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4317 IS isrowa,iscola; 4318 Mat *aloc; 4319 4320 PetscFunctionBegin; 4321 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4322 if (!row){ 4323 start = A->rmap.rstart; end = A->rmap.rend; 4324 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4325 } else { 4326 isrowa = *row; 4327 } 4328 if (!col){ 4329 start = A->cmap.rstart; 4330 cmap = a->garray; 4331 nzA = a->A->cmap.n; 4332 nzB = a->B->cmap.n; 4333 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4334 ncols = 0; 4335 for (i=0; i<nzB; i++) { 4336 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4337 else break; 4338 } 4339 imark = i; 4340 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4341 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4342 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4343 ierr = PetscFree(idx);CHKERRQ(ierr); 4344 } else { 4345 iscola = *col; 4346 } 4347 if (scall != MAT_INITIAL_MATRIX){ 4348 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4349 aloc[0] = *A_loc; 4350 } 4351 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4352 *A_loc = aloc[0]; 4353 ierr = PetscFree(aloc);CHKERRQ(ierr); 4354 if (!row){ 4355 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4356 } 4357 if (!col){ 4358 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4359 } 4360 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4361 PetscFunctionReturn(0); 4362 } 4363 4364 #undef __FUNCT__ 4365 #define __FUNCT__ "MatGetBrowsOfAcols" 4366 /*@C 4367 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4368 4369 Collective on Mat 4370 4371 Input Parameters: 4372 + A,B - the matrices in mpiaij format 4373 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4374 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4375 4376 Output Parameter: 4377 + rowb, colb - index sets of rows and columns of B to extract 4378 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4379 - B_seq - the sequential matrix generated 4380 4381 Level: developer 4382 4383 @*/ 4384 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4385 { 4386 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4387 PetscErrorCode ierr; 4388 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4389 IS isrowb,iscolb; 4390 Mat *bseq; 4391 4392 PetscFunctionBegin; 4393 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4394 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4395 } 4396 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4397 4398 if (scall == MAT_INITIAL_MATRIX){ 4399 start = A->cmap.rstart; 4400 cmap = a->garray; 4401 nzA = a->A->cmap.n; 4402 nzB = a->B->cmap.n; 4403 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4404 ncols = 0; 4405 for (i=0; i<nzB; i++) { /* row < local row index */ 4406 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4407 else break; 4408 } 4409 imark = i; 4410 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4411 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4412 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4413 ierr = PetscFree(idx);CHKERRQ(ierr); 4414 *brstart = imark; 4415 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4416 } else { 4417 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4418 isrowb = *rowb; iscolb = *colb; 4419 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4420 bseq[0] = *B_seq; 4421 } 4422 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4423 *B_seq = bseq[0]; 4424 ierr = PetscFree(bseq);CHKERRQ(ierr); 4425 if (!rowb){ 4426 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4427 } else { 4428 *rowb = isrowb; 4429 } 4430 if (!colb){ 4431 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4432 } else { 4433 *colb = iscolb; 4434 } 4435 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4436 PetscFunctionReturn(0); 4437 } 4438 4439 #undef __FUNCT__ 4440 #define __FUNCT__ "MatGetBrowsOfAoCols" 4441 /*@C 4442 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4443 of the OFF-DIAGONAL portion of local A 4444 4445 Collective on Mat 4446 4447 Input Parameters: 4448 + A,B - the matrices in mpiaij format 4449 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4450 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4451 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4452 4453 Output Parameter: 4454 + B_oth - the sequential matrix generated 4455 4456 Level: developer 4457 4458 @*/ 4459 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4460 { 4461 VecScatter_MPI_General *gen_to,*gen_from; 4462 PetscErrorCode ierr; 4463 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4464 Mat_SeqAIJ *b_oth; 4465 VecScatter ctx=a->Mvctx; 4466 MPI_Comm comm=((PetscObject)ctx)->comm; 4467 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4468 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4469 PetscScalar *rvalues,*svalues; 4470 MatScalar *b_otha,*bufa,*bufA; 4471 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4472 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4473 MPI_Status *sstatus,rstatus; 4474 PetscMPIInt jj; 4475 PetscInt *cols,sbs,rbs; 4476 PetscScalar *vals; 4477 4478 PetscFunctionBegin; 4479 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4480 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4481 } 4482 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4483 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4484 4485 gen_to = (VecScatter_MPI_General*)ctx->todata; 4486 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4487 rvalues = gen_from->values; /* holds the length of receiving row */ 4488 svalues = gen_to->values; /* holds the length of sending row */ 4489 nrecvs = gen_from->n; 4490 nsends = gen_to->n; 4491 4492 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4493 srow = gen_to->indices; /* local row index to be sent */ 4494 sstarts = gen_to->starts; 4495 sprocs = gen_to->procs; 4496 sstatus = gen_to->sstatus; 4497 sbs = gen_to->bs; 4498 rstarts = gen_from->starts; 4499 rprocs = gen_from->procs; 4500 rbs = gen_from->bs; 4501 4502 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4503 if (scall == MAT_INITIAL_MATRIX){ 4504 /* i-array */ 4505 /*---------*/ 4506 /* post receives */ 4507 for (i=0; i<nrecvs; i++){ 4508 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4509 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4510 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4511 } 4512 4513 /* pack the outgoing message */ 4514 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4515 rstartsj = sstartsj + nsends +1; 4516 sstartsj[0] = 0; rstartsj[0] = 0; 4517 len = 0; /* total length of j or a array to be sent */ 4518 k = 0; 4519 for (i=0; i<nsends; i++){ 4520 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4521 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4522 for (j=0; j<nrows; j++) { 4523 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4524 for (l=0; l<sbs; l++){ 4525 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4526 rowlen[j*sbs+l] = ncols; 4527 len += ncols; 4528 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4529 } 4530 k++; 4531 } 4532 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4533 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4534 } 4535 /* recvs and sends of i-array are completed */ 4536 i = nrecvs; 4537 while (i--) { 4538 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4539 } 4540 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4541 4542 /* allocate buffers for sending j and a arrays */ 4543 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4544 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4545 4546 /* create i-array of B_oth */ 4547 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4548 b_othi[0] = 0; 4549 len = 0; /* total length of j or a array to be received */ 4550 k = 0; 4551 for (i=0; i<nrecvs; i++){ 4552 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4553 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4554 for (j=0; j<nrows; j++) { 4555 b_othi[k+1] = b_othi[k] + rowlen[j]; 4556 len += rowlen[j]; k++; 4557 } 4558 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4559 } 4560 4561 /* allocate space for j and a arrrays of B_oth */ 4562 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4563 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4564 4565 /* j-array */ 4566 /*---------*/ 4567 /* post receives of j-array */ 4568 for (i=0; i<nrecvs; i++){ 4569 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4570 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4571 } 4572 4573 /* pack the outgoing message j-array */ 4574 k = 0; 4575 for (i=0; i<nsends; i++){ 4576 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4577 bufJ = bufj+sstartsj[i]; 4578 for (j=0; j<nrows; j++) { 4579 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4580 for (ll=0; ll<sbs; ll++){ 4581 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4582 for (l=0; l<ncols; l++){ 4583 *bufJ++ = cols[l]; 4584 } 4585 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4586 } 4587 } 4588 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4589 } 4590 4591 /* recvs and sends of j-array are completed */ 4592 i = nrecvs; 4593 while (i--) { 4594 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4595 } 4596 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4597 } else if (scall == MAT_REUSE_MATRIX){ 4598 sstartsj = *startsj; 4599 rstartsj = sstartsj + nsends +1; 4600 bufa = *bufa_ptr; 4601 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4602 b_otha = b_oth->a; 4603 } else { 4604 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4605 } 4606 4607 /* a-array */ 4608 /*---------*/ 4609 /* post receives of a-array */ 4610 for (i=0; i<nrecvs; i++){ 4611 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4612 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4613 } 4614 4615 /* pack the outgoing message a-array */ 4616 k = 0; 4617 for (i=0; i<nsends; i++){ 4618 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4619 bufA = bufa+sstartsj[i]; 4620 for (j=0; j<nrows; j++) { 4621 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4622 for (ll=0; ll<sbs; ll++){ 4623 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4624 for (l=0; l<ncols; l++){ 4625 *bufA++ = vals[l]; 4626 } 4627 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4628 } 4629 } 4630 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4631 } 4632 /* recvs and sends of a-array are completed */ 4633 i = nrecvs; 4634 while (i--) { 4635 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4636 } 4637 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4638 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4639 4640 if (scall == MAT_INITIAL_MATRIX){ 4641 /* put together the new matrix */ 4642 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4643 4644 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4645 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4646 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4647 b_oth->free_a = PETSC_TRUE; 4648 b_oth->free_ij = PETSC_TRUE; 4649 b_oth->nonew = 0; 4650 4651 ierr = PetscFree(bufj);CHKERRQ(ierr); 4652 if (!startsj || !bufa_ptr){ 4653 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4654 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4655 } else { 4656 *startsj = sstartsj; 4657 *bufa_ptr = bufa; 4658 } 4659 } 4660 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4661 PetscFunctionReturn(0); 4662 } 4663 4664 #undef __FUNCT__ 4665 #define __FUNCT__ "MatGetCommunicationStructs" 4666 /*@C 4667 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4668 4669 Not Collective 4670 4671 Input Parameters: 4672 . A - The matrix in mpiaij format 4673 4674 Output Parameter: 4675 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4676 . colmap - A map from global column index to local index into lvec 4677 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4678 4679 Level: developer 4680 4681 @*/ 4682 #if defined (PETSC_USE_CTABLE) 4683 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4684 #else 4685 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4686 #endif 4687 { 4688 Mat_MPIAIJ *a; 4689 4690 PetscFunctionBegin; 4691 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4692 PetscValidPointer(lvec, 2) 4693 PetscValidPointer(colmap, 3) 4694 PetscValidPointer(multScatter, 4) 4695 a = (Mat_MPIAIJ *) A->data; 4696 if (lvec) *lvec = a->lvec; 4697 if (colmap) *colmap = a->colmap; 4698 if (multScatter) *multScatter = a->Mvctx; 4699 PetscFunctionReturn(0); 4700 } 4701 4702 EXTERN_C_BEGIN 4703 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4704 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4705 EXTERN_C_END 4706 4707 /*MC 4708 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4709 4710 Options Database Keys: 4711 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4712 4713 Level: beginner 4714 4715 .seealso: MatCreateMPIAIJ() 4716 M*/ 4717 4718 EXTERN_C_BEGIN 4719 #undef __FUNCT__ 4720 #define __FUNCT__ "MatCreate_MPIAIJ" 4721 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4722 { 4723 Mat_MPIAIJ *b; 4724 PetscErrorCode ierr; 4725 PetscMPIInt size; 4726 4727 PetscFunctionBegin; 4728 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4729 4730 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4731 B->data = (void*)b; 4732 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4733 B->factor = 0; 4734 B->rmap.bs = 1; 4735 B->assembled = PETSC_FALSE; 4736 B->mapping = 0; 4737 4738 B->insertmode = NOT_SET_VALUES; 4739 b->size = size; 4740 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4741 4742 /* build cache for off array entries formed */ 4743 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4744 b->donotstash = PETSC_FALSE; 4745 b->colmap = 0; 4746 b->garray = 0; 4747 b->roworiented = PETSC_TRUE; 4748 4749 /* stuff used for matrix vector multiply */ 4750 b->lvec = PETSC_NULL; 4751 b->Mvctx = PETSC_NULL; 4752 4753 /* stuff for MatGetRow() */ 4754 b->rowindices = 0; 4755 b->rowvalues = 0; 4756 b->getrowactive = PETSC_FALSE; 4757 4758 4759 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4760 "MatStoreValues_MPIAIJ", 4761 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4762 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4763 "MatRetrieveValues_MPIAIJ", 4764 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4765 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4766 "MatGetDiagonalBlock_MPIAIJ", 4767 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4768 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4769 "MatIsTranspose_MPIAIJ", 4770 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4771 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4772 "MatMPIAIJSetPreallocation_MPIAIJ", 4773 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4774 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4775 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4776 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4777 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4778 "MatDiagonalScaleLocal_MPIAIJ", 4779 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4780 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4781 "MatConvert_MPIAIJ_MPICSRPERM", 4782 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4783 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4784 "MatConvert_MPIAIJ_MPICRL", 4785 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4786 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4787 PetscFunctionReturn(0); 4788 } 4789 EXTERN_C_END 4790 4791 #undef __FUNCT__ 4792 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4793 /*@ 4794 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4795 and "off-diagonal" part of the matrix in CSR format. 4796 4797 Collective on MPI_Comm 4798 4799 Input Parameters: 4800 + comm - MPI communicator 4801 . m - number of local rows (Cannot be PETSC_DECIDE) 4802 . n - This value should be the same as the local size used in creating the 4803 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4804 calculated if N is given) For square matrices n is almost always m. 4805 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4806 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4807 . i - row indices for "diagonal" portion of matrix 4808 . j - column indices 4809 . a - matrix values 4810 . oi - row indices for "off-diagonal" portion of matrix 4811 . oj - column indices 4812 - oa - matrix values 4813 4814 Output Parameter: 4815 . mat - the matrix 4816 4817 Level: advanced 4818 4819 Notes: 4820 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4821 4822 The i and j indices are 0 based 4823 4824 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4825 4826 4827 .keywords: matrix, aij, compressed row, sparse, parallel 4828 4829 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4830 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4831 @*/ 4832 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4833 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4834 { 4835 PetscErrorCode ierr; 4836 Mat_MPIAIJ *maij; 4837 4838 PetscFunctionBegin; 4839 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4840 if (i[0]) { 4841 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4842 } 4843 if (oi[0]) { 4844 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4845 } 4846 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4847 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4848 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4849 maij = (Mat_MPIAIJ*) (*mat)->data; 4850 maij->donotstash = PETSC_TRUE; 4851 (*mat)->preallocated = PETSC_TRUE; 4852 4853 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4854 ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr); 4855 ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr); 4856 4857 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4858 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4859 4860 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4861 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4862 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4863 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4864 4865 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4866 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4867 PetscFunctionReturn(0); 4868 } 4869 4870 /* 4871 Special version for direct calls from Fortran 4872 */ 4873 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4874 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4875 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4876 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4877 #endif 4878 4879 /* Change these macros so can be used in void function */ 4880 #undef CHKERRQ 4881 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4882 #undef SETERRQ2 4883 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4884 #undef SETERRQ 4885 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4886 4887 EXTERN_C_BEGIN 4888 #undef __FUNCT__ 4889 #define __FUNCT__ "matsetvaluesmpiaij_" 4890 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4891 { 4892 Mat mat = *mmat; 4893 PetscInt m = *mm, n = *mn; 4894 InsertMode addv = *maddv; 4895 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4896 PetscScalar value; 4897 PetscErrorCode ierr; 4898 4899 MatPreallocated(mat); 4900 if (mat->insertmode == NOT_SET_VALUES) { 4901 mat->insertmode = addv; 4902 } 4903 #if defined(PETSC_USE_DEBUG) 4904 else if (mat->insertmode != addv) { 4905 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4906 } 4907 #endif 4908 { 4909 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4910 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4911 PetscTruth roworiented = aij->roworiented; 4912 4913 /* Some Variables required in the macro */ 4914 Mat A = aij->A; 4915 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4916 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4917 MatScalar *aa = a->a; 4918 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4919 Mat B = aij->B; 4920 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4921 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4922 MatScalar *ba = b->a; 4923 4924 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4925 PetscInt nonew = a->nonew; 4926 MatScalar *ap1,*ap2; 4927 4928 PetscFunctionBegin; 4929 for (i=0; i<m; i++) { 4930 if (im[i] < 0) continue; 4931 #if defined(PETSC_USE_DEBUG) 4932 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4933 #endif 4934 if (im[i] >= rstart && im[i] < rend) { 4935 row = im[i] - rstart; 4936 lastcol1 = -1; 4937 rp1 = aj + ai[row]; 4938 ap1 = aa + ai[row]; 4939 rmax1 = aimax[row]; 4940 nrow1 = ailen[row]; 4941 low1 = 0; 4942 high1 = nrow1; 4943 lastcol2 = -1; 4944 rp2 = bj + bi[row]; 4945 ap2 = ba + bi[row]; 4946 rmax2 = bimax[row]; 4947 nrow2 = bilen[row]; 4948 low2 = 0; 4949 high2 = nrow2; 4950 4951 for (j=0; j<n; j++) { 4952 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4953 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4954 if (in[j] >= cstart && in[j] < cend){ 4955 col = in[j] - cstart; 4956 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4957 } else if (in[j] < 0) continue; 4958 #if defined(PETSC_USE_DEBUG) 4959 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4960 #endif 4961 else { 4962 if (mat->was_assembled) { 4963 if (!aij->colmap) { 4964 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4965 } 4966 #if defined (PETSC_USE_CTABLE) 4967 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4968 col--; 4969 #else 4970 col = aij->colmap[in[j]] - 1; 4971 #endif 4972 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4973 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4974 col = in[j]; 4975 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4976 B = aij->B; 4977 b = (Mat_SeqAIJ*)B->data; 4978 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4979 rp2 = bj + bi[row]; 4980 ap2 = ba + bi[row]; 4981 rmax2 = bimax[row]; 4982 nrow2 = bilen[row]; 4983 low2 = 0; 4984 high2 = nrow2; 4985 bm = aij->B->rmap.n; 4986 ba = b->a; 4987 } 4988 } else col = in[j]; 4989 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4990 } 4991 } 4992 } else { 4993 if (!aij->donotstash) { 4994 if (roworiented) { 4995 if (ignorezeroentries && v[i*n] == 0.0) continue; 4996 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4997 } else { 4998 if (ignorezeroentries && v[i] == 0.0) continue; 4999 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5000 } 5001 } 5002 } 5003 }} 5004 PetscFunctionReturnVoid(); 5005 } 5006 EXTERN_C_END 5007 5008