1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatDistribute_MPIAIJ" 7 /* 8 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 9 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 10 11 Only for square matrices 12 */ 13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 14 { 15 PetscMPIInt rank,size; 16 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 17 PetscErrorCode ierr; 18 Mat mat; 19 Mat_SeqAIJ *gmata; 20 PetscMPIInt tag; 21 MPI_Status status; 22 PetscTruth aij; 23 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 24 25 PetscFunctionBegin; 26 CHKMEMQ; 27 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 28 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 29 if (!rank) { 30 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 31 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 32 } 33 if (reuse == MAT_INITIAL_MATRIX) { 34 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 35 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 36 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 37 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 38 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 39 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 40 rowners[0] = 0; 41 for (i=2; i<=size; i++) { 42 rowners[i] += rowners[i-1]; 43 } 44 rstart = rowners[rank]; 45 rend = rowners[rank+1]; 46 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 47 if (!rank) { 48 gmata = (Mat_SeqAIJ*) gmat->data; 49 /* send row lengths to all processors */ 50 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 51 for (i=1; i<size; i++) { 52 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 53 } 54 /* determine number diagonal and off-diagonal counts */ 55 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 56 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 57 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 58 jj = 0; 59 for (i=0; i<m; i++) { 60 for (j=0; j<dlens[i]; j++) { 61 if (gmata->j[jj] < rstart) ld[i]++; 62 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 63 jj++; 64 } 65 } 66 /* send column indices to other processes */ 67 for (i=1; i<size; i++) { 68 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 69 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 70 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 } 72 73 /* send numerical values to other processes */ 74 for (i=1; i<size; i++) { 75 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 76 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 77 } 78 gmataa = gmata->a; 79 gmataj = gmata->j; 80 81 } else { 82 /* receive row lengths */ 83 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 84 /* receive column indices */ 85 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 86 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 87 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 88 /* determine number diagonal and off-diagonal counts */ 89 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 90 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 91 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 92 jj = 0; 93 for (i=0; i<m; i++) { 94 for (j=0; j<dlens[i]; j++) { 95 if (gmataj[jj] < rstart) ld[i]++; 96 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 97 jj++; 98 } 99 } 100 /* receive numerical values */ 101 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 103 } 104 /* set preallocation */ 105 for (i=0; i<m; i++) { 106 dlens[i] -= olens[i]; 107 } 108 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 109 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 110 111 for (i=0; i<m; i++) { 112 dlens[i] += olens[i]; 113 } 114 cnt = 0; 115 for (i=0; i<m; i++) { 116 row = rstart + i; 117 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 118 cnt += dlens[i]; 119 } 120 if (rank) { 121 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 122 } 123 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 124 ierr = PetscFree(rowners);CHKERRQ(ierr); 125 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 126 *inmat = mat; 127 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 128 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 129 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 130 mat = *inmat; 131 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 132 if (!rank) { 133 /* send numerical values to other processes */ 134 gmata = (Mat_SeqAIJ*) gmat->data; 135 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 136 gmataa = gmata->a; 137 for (i=1; i<size; i++) { 138 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 139 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 140 } 141 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 142 } else { 143 /* receive numerical values from process 0*/ 144 nz = Ad->nz + Ao->nz; 145 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 146 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 147 } 148 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 149 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 150 ad = Ad->a; 151 ao = Ao->a; 152 if (mat->rmap->n) { 153 i = 0; 154 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 155 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 156 } 157 for (i=1; i<mat->rmap->n; i++) { 158 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 159 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 160 } 161 i--; 162 if (mat->rmap->n) { 163 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 164 } 165 if (rank) { 166 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 167 } 168 } 169 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 CHKMEMQ; 172 PetscFunctionReturn(0); 173 } 174 175 /* 176 Local utility routine that creates a mapping from the global column 177 number to the local number in the off-diagonal part of the local 178 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 179 a slightly higher hash table cost; without it it is not scalable (each processor 180 has an order N integer array but is fast to acess. 181 */ 182 #undef __FUNCT__ 183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 187 PetscErrorCode ierr; 188 PetscInt n = aij->B->cmap->n,i; 189 190 PetscFunctionBegin; 191 #if defined (PETSC_USE_CTABLE) 192 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 193 for (i=0; i<n; i++){ 194 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 195 } 196 #else 197 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 199 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 201 #endif 202 PetscFunctionReturn(0); 203 } 204 205 206 #define CHUNKSIZE 15 207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 208 { \ 209 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 210 lastcol1 = col;\ 211 while (high1-low1 > 5) { \ 212 t = (low1+high1)/2; \ 213 if (rp1[t] > col) high1 = t; \ 214 else low1 = t; \ 215 } \ 216 for (_i=low1; _i<high1; _i++) { \ 217 if (rp1[_i] > col) break; \ 218 if (rp1[_i] == col) { \ 219 if (addv == ADD_VALUES) ap1[_i] += value; \ 220 else ap1[_i] = value; \ 221 goto a_noinsert; \ 222 } \ 223 } \ 224 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 225 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 226 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 227 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 228 N = nrow1++ - 1; a->nz++; high1++; \ 229 /* shift up all the later entries in this row */ \ 230 for (ii=N; ii>=_i; ii--) { \ 231 rp1[ii+1] = rp1[ii]; \ 232 ap1[ii+1] = ap1[ii]; \ 233 } \ 234 rp1[_i] = col; \ 235 ap1[_i] = value; \ 236 a_noinsert: ; \ 237 ailen[row] = nrow1; \ 238 } 239 240 241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 242 { \ 243 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 244 lastcol2 = col;\ 245 while (high2-low2 > 5) { \ 246 t = (low2+high2)/2; \ 247 if (rp2[t] > col) high2 = t; \ 248 else low2 = t; \ 249 } \ 250 for (_i=low2; _i<high2; _i++) { \ 251 if (rp2[_i] > col) break; \ 252 if (rp2[_i] == col) { \ 253 if (addv == ADD_VALUES) ap2[_i] += value; \ 254 else ap2[_i] = value; \ 255 goto b_noinsert; \ 256 } \ 257 } \ 258 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 261 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 262 N = nrow2++ - 1; b->nz++; high2++; \ 263 /* shift up all the later entries in this row */ \ 264 for (ii=N; ii>=_i; ii--) { \ 265 rp2[ii+1] = rp2[ii]; \ 266 ap2[ii+1] = ap2[ii]; \ 267 } \ 268 rp2[_i] = col; \ 269 ap2[_i] = value; \ 270 b_noinsert: ; \ 271 bilen[row] = nrow2; \ 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 277 { 278 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 280 PetscErrorCode ierr; 281 PetscInt l,*garray = mat->garray,diag; 282 283 PetscFunctionBegin; 284 /* code only works for square matrices A */ 285 286 /* find size of row to the left of the diagonal part */ 287 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 288 row = row - diag; 289 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 290 if (garray[b->j[b->i[row]+l]] > diag) break; 291 } 292 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 293 294 /* diagonal part */ 295 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 296 297 /* right of diagonal part */ 298 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIAIJ" 304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscScalar value; 308 PetscErrorCode ierr; 309 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 310 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 311 PetscTruth roworiented = aij->roworiented; 312 313 /* Some Variables required in the macro */ 314 Mat A = aij->A; 315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 316 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 317 MatScalar *aa = a->a; 318 PetscTruth ignorezeroentries = a->ignorezeroentries; 319 Mat B = aij->B; 320 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 321 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 322 MatScalar *ba = b->a; 323 324 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 325 PetscInt nonew = a->nonew; 326 MatScalar *ap1,*ap2; 327 328 PetscFunctionBegin; 329 if (v) PetscValidScalarPointer(v,6); 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 544 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 550 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 551 { 552 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 553 PetscErrorCode ierr; 554 555 PetscFunctionBegin; 556 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 557 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatZeroRows_MPIAIJ" 563 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 564 { 565 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 568 PetscInt i,*owners = A->rmap->range; 569 PetscInt *nprocs,j,idx,nsends,row; 570 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 571 PetscInt *rvalues,count,base,slen,*source; 572 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 573 MPI_Comm comm = ((PetscObject)A)->comm; 574 MPI_Request *send_waits,*recv_waits; 575 MPI_Status recv_status,*send_status; 576 #if defined(PETSC_DEBUG) 577 PetscTruth found = PETSC_FALSE; 578 #endif 579 580 PetscFunctionBegin; 581 /* first count number of contributors to each processor */ 582 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 583 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 584 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 585 j = 0; 586 for (i=0; i<N; i++) { 587 if (lastidx > (idx = rows[i])) j = 0; 588 lastidx = idx; 589 for (; j<size; j++) { 590 if (idx >= owners[j] && idx < owners[j+1]) { 591 nprocs[2*j]++; 592 nprocs[2*j+1] = 1; 593 owner[i] = j; 594 #if defined(PETSC_DEBUG) 595 found = PETSC_TRUE; 596 #endif 597 break; 598 } 599 } 600 #if defined(PETSC_DEBUG) 601 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 602 found = PETSC_FALSE; 603 #endif 604 } 605 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 606 607 /* inform other processors of number of messages and max length*/ 608 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 609 610 /* post receives: */ 611 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 612 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 613 for (i=0; i<nrecvs; i++) { 614 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 615 } 616 617 /* do sends: 618 1) starts[i] gives the starting index in svalues for stuff going to 619 the ith processor 620 */ 621 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 622 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 623 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 624 starts[0] = 0; 625 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 626 for (i=0; i<N; i++) { 627 svalues[starts[owner[i]]++] = rows[i]; 628 } 629 630 starts[0] = 0; 631 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 632 count = 0; 633 for (i=0; i<size; i++) { 634 if (nprocs[2*i+1]) { 635 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 636 } 637 } 638 ierr = PetscFree(starts);CHKERRQ(ierr); 639 640 base = owners[rank]; 641 642 /* wait on receives */ 643 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 644 count = nrecvs; slen = 0; 645 while (count) { 646 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 647 /* unpack receives into our local space */ 648 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 649 source[imdex] = recv_status.MPI_SOURCE; 650 lens[imdex] = n; 651 slen += n; 652 count--; 653 } 654 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 655 656 /* move the data into the send scatter */ 657 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 658 count = 0; 659 for (i=0; i<nrecvs; i++) { 660 values = rvalues + i*nmax; 661 for (j=0; j<lens[i]; j++) { 662 lrows[count++] = values[j] - base; 663 } 664 } 665 ierr = PetscFree(rvalues);CHKERRQ(ierr); 666 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 667 ierr = PetscFree(owner);CHKERRQ(ierr); 668 ierr = PetscFree(nprocs);CHKERRQ(ierr); 669 670 /* actually zap the local rows */ 671 /* 672 Zero the required rows. If the "diagonal block" of the matrix 673 is square and the user wishes to set the diagonal we use separate 674 code so that MatSetValues() is not called for each diagonal allocating 675 new memory, thus calling lots of mallocs and slowing things down. 676 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatMultAdd_MPIAIJ" 745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 746 { 747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 752 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 753 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 761 { 762 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 763 PetscErrorCode ierr; 764 PetscTruth merged; 765 766 PetscFunctionBegin; 767 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 768 /* do nondiagonal part */ 769 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 770 if (!merged) { 771 /* send it on its way */ 772 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 /* do local part */ 774 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 775 /* receive remote parts: note this assumes the values are not actually */ 776 /* added in yy until the next line, */ 777 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 } else { 779 /* do local part */ 780 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 781 /* send it on its way */ 782 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 783 /* values actually were received in the Begin() but we need to call this nop */ 784 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 } 786 PetscFunctionReturn(0); 787 } 788 789 EXTERN_C_BEGIN 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 793 { 794 MPI_Comm comm; 795 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 796 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 797 IS Me,Notme; 798 PetscErrorCode ierr; 799 PetscInt M,N,first,last,*notme,i; 800 PetscMPIInt size; 801 802 PetscFunctionBegin; 803 804 /* Easy test: symmetric diagonal block */ 805 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 806 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 807 if (!*f) PetscFunctionReturn(0); 808 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 809 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 810 if (size == 1) PetscFunctionReturn(0); 811 812 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 813 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 814 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 815 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 816 for (i=0; i<first; i++) notme[i] = i; 817 for (i=last; i<M; i++) notme[i-last+first] = i; 818 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 819 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 820 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 821 Aoff = Aoffs[0]; 822 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 823 Boff = Boffs[0]; 824 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 825 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 826 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 827 ierr = ISDestroy(Me);CHKERRQ(ierr); 828 ierr = ISDestroy(Notme);CHKERRQ(ierr); 829 830 PetscFunctionReturn(0); 831 } 832 EXTERN_C_END 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 837 { 838 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 839 PetscErrorCode ierr; 840 841 PetscFunctionBegin; 842 /* do nondiagonal part */ 843 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 844 /* send it on its way */ 845 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 846 /* do local part */ 847 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 848 /* receive remote parts */ 849 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 /* 854 This only works correctly for square matrices where the subblock A->A is the 855 diagonal block 856 */ 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 860 { 861 PetscErrorCode ierr; 862 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 863 864 PetscFunctionBegin; 865 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 866 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 868 } 869 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatScale_MPIAIJ" 875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876 { 877 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_MPIAIJ" 888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_LOG) 895 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896 #endif 897 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 899 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 900 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 901 #if defined (PETSC_USE_CTABLE) 902 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903 #else 904 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905 #endif 906 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 907 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 908 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 909 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 910 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911 ierr = PetscFree(aij);CHKERRQ(ierr); 912 913 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatView_MPIAIJ_Binary" 927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 928 { 929 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 930 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 931 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 932 PetscErrorCode ierr; 933 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 934 int fd; 935 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 936 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 937 PetscScalar *column_values; 938 939 PetscFunctionBegin; 940 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 941 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 942 nz = A->nz + B->nz; 943 if (!rank) { 944 header[0] = MAT_FILE_COOKIE; 945 header[1] = mat->rmap->N; 946 header[2] = mat->cmap->N; 947 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 948 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 949 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 950 /* get largest number of rows any processor has */ 951 rlen = mat->rmap->n; 952 range = mat->rmap->range; 953 for (i=1; i<size; i++) { 954 rlen = PetscMax(rlen,range[i+1] - range[i]); 955 } 956 } else { 957 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 958 rlen = mat->rmap->n; 959 } 960 961 /* load up the local row counts */ 962 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 963 for (i=0; i<mat->rmap->n; i++) { 964 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 965 } 966 967 /* store the row lengths to the file */ 968 if (!rank) { 969 MPI_Status status; 970 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 971 for (i=1; i<size; i++) { 972 rlen = range[i+1] - range[i]; 973 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 974 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 975 } 976 } else { 977 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 978 } 979 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 980 981 /* load up the local column indices */ 982 nzmax = nz; /* )th processor needs space a largest processor needs */ 983 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 984 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 985 cnt = 0; 986 for (i=0; i<mat->rmap->n; i++) { 987 for (j=B->i[i]; j<B->i[i+1]; j++) { 988 if ( (col = garray[B->j[j]]) > cstart) break; 989 column_indices[cnt++] = col; 990 } 991 for (k=A->i[i]; k<A->i[i+1]; k++) { 992 column_indices[cnt++] = A->j[k] + cstart; 993 } 994 for (; j<B->i[i+1]; j++) { 995 column_indices[cnt++] = garray[B->j[j]]; 996 } 997 } 998 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 999 1000 /* store the column indices to the file */ 1001 if (!rank) { 1002 MPI_Status status; 1003 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1004 for (i=1; i<size; i++) { 1005 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1006 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1007 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1008 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1009 } 1010 } else { 1011 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1012 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1013 } 1014 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1015 1016 /* load up the local column values */ 1017 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1018 cnt = 0; 1019 for (i=0; i<mat->rmap->n; i++) { 1020 for (j=B->i[i]; j<B->i[i+1]; j++) { 1021 if ( garray[B->j[j]] > cstart) break; 1022 column_values[cnt++] = B->a[j]; 1023 } 1024 for (k=A->i[i]; k<A->i[i+1]; k++) { 1025 column_values[cnt++] = A->a[k]; 1026 } 1027 for (; j<B->i[i+1]; j++) { 1028 column_values[cnt++] = B->a[j]; 1029 } 1030 } 1031 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1032 1033 /* store the column values to the file */ 1034 if (!rank) { 1035 MPI_Status status; 1036 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1037 for (i=1; i<size; i++) { 1038 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1039 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1040 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1041 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1042 } 1043 } else { 1044 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1045 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1046 } 1047 ierr = PetscFree(column_values);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1054 { 1055 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1056 PetscErrorCode ierr; 1057 PetscMPIInt rank = aij->rank,size = aij->size; 1058 PetscTruth isdraw,iascii,isbinary; 1059 PetscViewer sviewer; 1060 PetscViewerFormat format; 1061 1062 PetscFunctionBegin; 1063 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1065 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1066 if (iascii) { 1067 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1068 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1069 MatInfo info; 1070 PetscTruth inodes; 1071 1072 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1073 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1074 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1075 if (!inodes) { 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1077 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1078 } else { 1079 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1080 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1081 } 1082 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1083 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1084 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1086 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1088 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1091 PetscInt inodecount,inodelimit,*inodes; 1092 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1093 if (inodes) { 1094 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1095 } else { 1096 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1097 } 1098 PetscFunctionReturn(0); 1099 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1100 PetscFunctionReturn(0); 1101 } 1102 } else if (isbinary) { 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1108 } 1109 PetscFunctionReturn(0); 1110 } else if (isdraw) { 1111 PetscDraw draw; 1112 PetscTruth isnull; 1113 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1114 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1115 } 1116 1117 if (size == 1) { 1118 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1119 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1120 } else { 1121 /* assemble the entire matrix onto first processor. */ 1122 Mat A; 1123 Mat_SeqAIJ *Aloc; 1124 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1125 MatScalar *a; 1126 1127 if (mat->rmap->N > 1024) { 1128 PetscTruth flg = PETSC_FALSE; 1129 1130 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1131 if (!flg) { 1132 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1133 } 1134 } 1135 1136 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1137 if (!rank) { 1138 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1139 } else { 1140 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1141 } 1142 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1143 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1144 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1145 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1146 1147 /* copy over the A part */ 1148 Aloc = (Mat_SeqAIJ*)aij->A->data; 1149 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1150 row = mat->rmap->rstart; 1151 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1155 } 1156 aj = Aloc->j; 1157 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1158 1159 /* copy over the B part */ 1160 Aloc = (Mat_SeqAIJ*)aij->B->data; 1161 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1162 row = mat->rmap->rstart; 1163 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1164 ct = cols; 1165 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1166 for (i=0; i<m; i++) { 1167 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1168 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1169 } 1170 ierr = PetscFree(ct);CHKERRQ(ierr); 1171 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 /* 1174 Everyone has to call to draw the matrix since the graphics waits are 1175 synchronized across all processors that share the PetscDraw object 1176 */ 1177 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1180 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1181 } 1182 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 ierr = MatDestroy(A);CHKERRQ(ierr); 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_MPIAIJ" 1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1191 { 1192 PetscErrorCode ierr; 1193 PetscTruth iascii,isdraw,issocket,isbinary; 1194 1195 PetscFunctionBegin; 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1200 if (iascii || isdraw || isbinary || issocket) { 1201 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1202 } else { 1203 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1204 } 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatSOR_MPIAIJ" 1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1211 { 1212 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1213 PetscErrorCode ierr; 1214 Vec bb1 = 0; 1215 PetscTruth hasop; 1216 1217 PetscFunctionBegin; 1218 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1219 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1220 } 1221 1222 if (flag == SOR_APPLY_UPPER) { 1223 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1228 if (flag & SOR_ZERO_INITIAL_GUESS) { 1229 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1230 its--; 1231 } 1232 1233 while (its--) { 1234 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1236 1237 /* update rhs: bb1 = bb - B*x */ 1238 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1239 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1240 1241 /* local sweep */ 1242 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1243 } 1244 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1245 if (flag & SOR_ZERO_INITIAL_GUESS) { 1246 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1247 its--; 1248 } 1249 while (its--) { 1250 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1251 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 1253 /* update rhs: bb1 = bb - B*x */ 1254 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1255 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1256 1257 /* local sweep */ 1258 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1259 } 1260 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1261 if (flag & SOR_ZERO_INITIAL_GUESS) { 1262 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1263 its--; 1264 } 1265 while (its--) { 1266 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 1269 /* update rhs: bb1 = bb - B*x */ 1270 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1271 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1272 1273 /* local sweep */ 1274 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1275 } 1276 } else if (flag & SOR_EISENSTAT) { 1277 Vec xx1; 1278 1279 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1280 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1281 1282 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 if (!mat->diag) { 1285 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1286 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1287 } 1288 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1289 if (hasop) { 1290 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1291 } else { 1292 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1293 } 1294 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1295 1296 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1297 1298 /* local sweep */ 1299 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1300 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1301 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1302 } else { 1303 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1304 } 1305 1306 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatPermute_MPIAIJ" 1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1313 { 1314 MPI_Comm comm,pcomm; 1315 PetscInt first,local_size,nrows; 1316 const PetscInt *rows; 1317 PetscMPIInt size; 1318 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1319 PetscErrorCode ierr; 1320 1321 PetscFunctionBegin; 1322 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1323 /* make a collective version of 'rowp' */ 1324 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1325 if (pcomm==comm) { 1326 crowp = rowp; 1327 } else { 1328 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1329 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1330 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1331 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1332 } 1333 /* collect the global row permutation and invert it */ 1334 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1335 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1336 if (pcomm!=comm) { 1337 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1338 } 1339 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1340 /* get the local target indices */ 1341 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1342 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1343 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1344 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1345 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1346 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1347 /* the column permutation is so much easier; 1348 make a local version of 'colp' and invert it */ 1349 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1350 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1351 if (size==1) { 1352 lcolp = colp; 1353 } else { 1354 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1355 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1357 } 1358 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1359 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1360 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1361 if (size>1) { 1362 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1363 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1364 } 1365 /* now we just get the submatrix */ 1366 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1367 /* clean up */ 1368 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1369 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1376 { 1377 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1378 Mat A = mat->A,B = mat->B; 1379 PetscErrorCode ierr; 1380 PetscReal isend[5],irecv[5]; 1381 1382 PetscFunctionBegin; 1383 info->block_size = 1.0; 1384 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1385 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1386 isend[3] = info->memory; isend[4] = info->mallocs; 1387 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1388 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1389 isend[3] += info->memory; isend[4] += info->mallocs; 1390 if (flag == MAT_LOCAL) { 1391 info->nz_used = isend[0]; 1392 info->nz_allocated = isend[1]; 1393 info->nz_unneeded = isend[2]; 1394 info->memory = isend[3]; 1395 info->mallocs = isend[4]; 1396 } else if (flag == MAT_GLOBAL_MAX) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } else if (flag == MAT_GLOBAL_SUM) { 1404 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1405 info->nz_used = irecv[0]; 1406 info->nz_allocated = irecv[1]; 1407 info->nz_unneeded = irecv[2]; 1408 info->memory = irecv[3]; 1409 info->mallocs = irecv[4]; 1410 } 1411 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1412 info->fill_ratio_needed = 0; 1413 info->factor_mallocs = 0; 1414 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatSetOption_MPIAIJ" 1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1421 { 1422 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1423 PetscErrorCode ierr; 1424 1425 PetscFunctionBegin; 1426 switch (op) { 1427 case MAT_NEW_NONZERO_LOCATIONS: 1428 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1429 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1430 case MAT_KEEP_NONZERO_PATTERN: 1431 case MAT_NEW_NONZERO_LOCATION_ERR: 1432 case MAT_USE_INODES: 1433 case MAT_IGNORE_ZERO_ENTRIES: 1434 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1435 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1436 break; 1437 case MAT_ROW_ORIENTED: 1438 a->roworiented = flg; 1439 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1440 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1441 break; 1442 case MAT_NEW_DIAGONALS: 1443 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1444 break; 1445 case MAT_IGNORE_OFF_PROC_ENTRIES: 1446 a->donotstash = PETSC_TRUE; 1447 break; 1448 case MAT_SYMMETRIC: 1449 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1450 break; 1451 case MAT_STRUCTURALLY_SYMMETRIC: 1452 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1453 break; 1454 case MAT_HERMITIAN: 1455 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1456 break; 1457 case MAT_SYMMETRY_ETERNAL: 1458 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1459 break; 1460 default: 1461 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatGetRow_MPIAIJ" 1468 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1469 { 1470 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1471 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1472 PetscErrorCode ierr; 1473 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1474 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1475 PetscInt *cmap,*idx_p; 1476 1477 PetscFunctionBegin; 1478 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1479 mat->getrowactive = PETSC_TRUE; 1480 1481 if (!mat->rowvalues && (idx || v)) { 1482 /* 1483 allocate enough space to hold information from the longest row. 1484 */ 1485 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1486 PetscInt max = 1,tmp; 1487 for (i=0; i<matin->rmap->n; i++) { 1488 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1489 if (max < tmp) { max = tmp; } 1490 } 1491 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1492 } 1493 1494 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1495 lrow = row - rstart; 1496 1497 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1498 if (!v) {pvA = 0; pvB = 0;} 1499 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1500 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1501 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1502 nztot = nzA + nzB; 1503 1504 cmap = mat->garray; 1505 if (v || idx) { 1506 if (nztot) { 1507 /* Sort by increasing column numbers, assuming A and B already sorted */ 1508 PetscInt imark = -1; 1509 if (v) { 1510 *v = v_p = mat->rowvalues; 1511 for (i=0; i<nzB; i++) { 1512 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1513 else break; 1514 } 1515 imark = i; 1516 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1517 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1518 } 1519 if (idx) { 1520 *idx = idx_p = mat->rowindices; 1521 if (imark > -1) { 1522 for (i=0; i<imark; i++) { 1523 idx_p[i] = cmap[cworkB[i]]; 1524 } 1525 } else { 1526 for (i=0; i<nzB; i++) { 1527 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1528 else break; 1529 } 1530 imark = i; 1531 } 1532 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1533 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1534 } 1535 } else { 1536 if (idx) *idx = 0; 1537 if (v) *v = 0; 1538 } 1539 } 1540 *nz = nztot; 1541 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1542 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1548 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1549 { 1550 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1551 1552 PetscFunctionBegin; 1553 if (!aij->getrowactive) { 1554 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1555 } 1556 aij->getrowactive = PETSC_FALSE; 1557 PetscFunctionReturn(0); 1558 } 1559 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatNorm_MPIAIJ" 1562 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1563 { 1564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1565 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1566 PetscErrorCode ierr; 1567 PetscInt i,j,cstart = mat->cmap->rstart; 1568 PetscReal sum = 0.0; 1569 MatScalar *v; 1570 1571 PetscFunctionBegin; 1572 if (aij->size == 1) { 1573 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1574 } else { 1575 if (type == NORM_FROBENIUS) { 1576 v = amat->a; 1577 for (i=0; i<amat->nz; i++) { 1578 #if defined(PETSC_USE_COMPLEX) 1579 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1580 #else 1581 sum += (*v)*(*v); v++; 1582 #endif 1583 } 1584 v = bmat->a; 1585 for (i=0; i<bmat->nz; i++) { 1586 #if defined(PETSC_USE_COMPLEX) 1587 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1588 #else 1589 sum += (*v)*(*v); v++; 1590 #endif 1591 } 1592 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1593 *norm = sqrt(*norm); 1594 } else if (type == NORM_1) { /* max column norm */ 1595 PetscReal *tmp,*tmp2; 1596 PetscInt *jj,*garray = aij->garray; 1597 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1598 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1599 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1600 *norm = 0.0; 1601 v = amat->a; jj = amat->j; 1602 for (j=0; j<amat->nz; j++) { 1603 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1604 } 1605 v = bmat->a; jj = bmat->j; 1606 for (j=0; j<bmat->nz; j++) { 1607 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1608 } 1609 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1610 for (j=0; j<mat->cmap->N; j++) { 1611 if (tmp2[j] > *norm) *norm = tmp2[j]; 1612 } 1613 ierr = PetscFree(tmp);CHKERRQ(ierr); 1614 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1615 } else if (type == NORM_INFINITY) { /* max row norm */ 1616 PetscReal ntemp = 0.0; 1617 for (j=0; j<aij->A->rmap->n; j++) { 1618 v = amat->a + amat->i[j]; 1619 sum = 0.0; 1620 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1621 sum += PetscAbsScalar(*v); v++; 1622 } 1623 v = bmat->a + bmat->i[j]; 1624 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1625 sum += PetscAbsScalar(*v); v++; 1626 } 1627 if (sum > ntemp) ntemp = sum; 1628 } 1629 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1630 } else { 1631 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1632 } 1633 } 1634 PetscFunctionReturn(0); 1635 } 1636 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "MatTranspose_MPIAIJ" 1639 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1640 { 1641 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1642 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1643 PetscErrorCode ierr; 1644 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1645 PetscInt cstart=A->cmap->rstart,ncol; 1646 Mat B; 1647 MatScalar *array; 1648 1649 PetscFunctionBegin; 1650 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1651 1652 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1653 ai = Aloc->i; aj = Aloc->j; 1654 bi = Bloc->i; bj = Bloc->j; 1655 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1656 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1657 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1658 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1659 for (i=0; i<ai[ma]; i++){ 1660 d_nnz[aj[i]] ++; 1661 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1662 } 1663 1664 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1665 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1666 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1667 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1668 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1669 } else { 1670 B = *matout; 1671 } 1672 1673 /* copy over the A part */ 1674 array = Aloc->a; 1675 row = A->rmap->rstart; 1676 for (i=0; i<ma; i++) { 1677 ncol = ai[i+1]-ai[i]; 1678 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1679 row++; array += ncol; aj += ncol; 1680 } 1681 aj = Aloc->j; 1682 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1683 1684 /* copy over the B part */ 1685 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1686 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1687 array = Bloc->a; 1688 row = A->rmap->rstart; 1689 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1690 cols_tmp = cols; 1691 for (i=0; i<mb; i++) { 1692 ncol = bi[i+1]-bi[i]; 1693 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1694 row++; array += ncol; cols_tmp += ncol; 1695 } 1696 ierr = PetscFree(cols);CHKERRQ(ierr); 1697 1698 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1699 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1701 *matout = B; 1702 } else { 1703 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1710 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1711 { 1712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1713 Mat a = aij->A,b = aij->B; 1714 PetscErrorCode ierr; 1715 PetscInt s1,s2,s3; 1716 1717 PetscFunctionBegin; 1718 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1719 if (rr) { 1720 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1721 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1722 /* Overlap communication with computation. */ 1723 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1724 } 1725 if (ll) { 1726 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1727 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1728 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1729 } 1730 /* scale the diagonal block */ 1731 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1732 1733 if (rr) { 1734 /* Do a scatter end and then right scale the off-diagonal block */ 1735 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1736 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1737 } 1738 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1744 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1745 { 1746 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1747 PetscErrorCode ierr; 1748 1749 PetscFunctionBegin; 1750 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1751 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1752 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1753 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1754 PetscFunctionReturn(0); 1755 } 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1758 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1759 { 1760 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1761 PetscErrorCode ierr; 1762 1763 PetscFunctionBegin; 1764 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1765 PetscFunctionReturn(0); 1766 } 1767 1768 #undef __FUNCT__ 1769 #define __FUNCT__ "MatEqual_MPIAIJ" 1770 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1771 { 1772 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1773 Mat a,b,c,d; 1774 PetscTruth flg; 1775 PetscErrorCode ierr; 1776 1777 PetscFunctionBegin; 1778 a = matA->A; b = matA->B; 1779 c = matB->A; d = matB->B; 1780 1781 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1782 if (flg) { 1783 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1784 } 1785 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1786 PetscFunctionReturn(0); 1787 } 1788 1789 #undef __FUNCT__ 1790 #define __FUNCT__ "MatCopy_MPIAIJ" 1791 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1792 { 1793 PetscErrorCode ierr; 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1795 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1796 1797 PetscFunctionBegin; 1798 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1799 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1800 /* because of the column compression in the off-processor part of the matrix a->B, 1801 the number of columns in a->B and b->B may be different, hence we cannot call 1802 the MatCopy() directly on the two parts. If need be, we can provide a more 1803 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1804 then copying the submatrices */ 1805 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1806 } else { 1807 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1808 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1809 } 1810 PetscFunctionReturn(0); 1811 } 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1815 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1816 { 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 #include "petscblaslapack.h" 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatAXPY_MPIAIJ" 1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1828 { 1829 PetscErrorCode ierr; 1830 PetscInt i; 1831 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1832 PetscBLASInt bnz,one=1; 1833 Mat_SeqAIJ *x,*y; 1834 1835 PetscFunctionBegin; 1836 if (str == SAME_NONZERO_PATTERN) { 1837 PetscScalar alpha = a; 1838 x = (Mat_SeqAIJ *)xx->A->data; 1839 y = (Mat_SeqAIJ *)yy->A->data; 1840 bnz = PetscBLASIntCast(x->nz); 1841 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1842 x = (Mat_SeqAIJ *)xx->B->data; 1843 y = (Mat_SeqAIJ *)yy->B->data; 1844 bnz = PetscBLASIntCast(x->nz); 1845 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1846 } else if (str == SUBSET_NONZERO_PATTERN) { 1847 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1848 1849 x = (Mat_SeqAIJ *)xx->B->data; 1850 y = (Mat_SeqAIJ *)yy->B->data; 1851 if (y->xtoy && y->XtoY != xx->B) { 1852 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1853 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1854 } 1855 if (!y->xtoy) { /* get xtoy */ 1856 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1857 y->XtoY = xx->B; 1858 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1859 } 1860 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1861 } else { 1862 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1863 } 1864 PetscFunctionReturn(0); 1865 } 1866 1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatConjugate_MPIAIJ" 1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1872 { 1873 #if defined(PETSC_USE_COMPLEX) 1874 PetscErrorCode ierr; 1875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1876 1877 PetscFunctionBegin; 1878 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1879 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1880 #else 1881 PetscFunctionBegin; 1882 #endif 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatRealPart_MPIAIJ" 1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1889 { 1890 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1891 PetscErrorCode ierr; 1892 1893 PetscFunctionBegin; 1894 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1895 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1902 { 1903 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1908 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #ifdef PETSC_HAVE_PBGL 1913 1914 #include <boost/parallel/mpi/bsp_process_group.hpp> 1915 #include <boost/graph/distributed/ilu_default_graph.hpp> 1916 #include <boost/graph/distributed/ilu_0_block.hpp> 1917 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1918 #include <boost/graph/distributed/petsc/interface.hpp> 1919 #include <boost/multi_array.hpp> 1920 #include <boost/parallel/distributed_property_map->hpp> 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1924 /* 1925 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1926 */ 1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1928 { 1929 namespace petsc = boost::distributed::petsc; 1930 1931 namespace graph_dist = boost::graph::distributed; 1932 using boost::graph::distributed::ilu_default::process_group_type; 1933 using boost::graph::ilu_permuted; 1934 1935 PetscTruth row_identity, col_identity; 1936 PetscContainer c; 1937 PetscInt m, n, M, N; 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1942 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1943 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1944 if (!row_identity || !col_identity) { 1945 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1946 } 1947 1948 process_group_type pg; 1949 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1950 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1951 lgraph_type& level_graph = *lgraph_p; 1952 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1953 1954 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1955 ilu_permuted(level_graph); 1956 1957 /* put together the new matrix */ 1958 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1959 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1960 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1961 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1962 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1963 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1965 1966 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1967 ierr = PetscContainerSetPointer(c, lgraph_p); 1968 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1975 { 1976 PetscFunctionBegin; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatSolve_MPIAIJ" 1982 /* 1983 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1984 */ 1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1986 { 1987 namespace graph_dist = boost::graph::distributed; 1988 1989 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1990 lgraph_type* lgraph_p; 1991 PetscContainer c; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1996 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1997 ierr = VecCopy(b, x);CHKERRQ(ierr); 1998 1999 PetscScalar* array_x; 2000 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2001 PetscInt sx; 2002 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2003 2004 PetscScalar* array_b; 2005 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2006 PetscInt sb; 2007 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2008 2009 lgraph_type& level_graph = *lgraph_p; 2010 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2011 2012 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2013 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2014 ref_x(array_x, boost::extents[num_vertices(graph)]); 2015 2016 typedef boost::iterator_property_map<array_ref_type::iterator, 2017 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2018 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2019 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2020 2021 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2022 2023 PetscFunctionReturn(0); 2024 } 2025 #endif 2026 2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2028 PetscInt nzlocal,nsends,nrecvs; 2029 PetscMPIInt *send_rank,*recv_rank; 2030 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2031 PetscScalar *sbuf_a,**rbuf_a; 2032 PetscErrorCode (*MatDestroy)(Mat); 2033 } Mat_Redundant; 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2038 { 2039 PetscErrorCode ierr; 2040 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2041 PetscInt i; 2042 2043 PetscFunctionBegin; 2044 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2045 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2046 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2047 for (i=0; i<redund->nrecvs; i++){ 2048 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2049 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2050 } 2051 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2052 ierr = PetscFree(redund);CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatDestroy_MatRedundant" 2058 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2059 { 2060 PetscErrorCode ierr; 2061 PetscContainer container; 2062 Mat_Redundant *redund=PETSC_NULL; 2063 2064 PetscFunctionBegin; 2065 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2066 if (container) { 2067 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2068 } else { 2069 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2070 } 2071 A->ops->destroy = redund->MatDestroy; 2072 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2073 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2074 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2081 { 2082 PetscMPIInt rank,size; 2083 MPI_Comm comm=((PetscObject)mat)->comm; 2084 PetscErrorCode ierr; 2085 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2086 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2087 PetscInt *rowrange=mat->rmap->range; 2088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2089 Mat A=aij->A,B=aij->B,C=*matredundant; 2090 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2091 PetscScalar *sbuf_a; 2092 PetscInt nzlocal=a->nz+b->nz; 2093 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2094 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2095 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2096 MatScalar *aworkA,*aworkB; 2097 PetscScalar *vals; 2098 PetscMPIInt tag1,tag2,tag3,imdex; 2099 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2100 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2101 MPI_Status recv_status,*send_status; 2102 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2103 PetscInt **rbuf_j=PETSC_NULL; 2104 PetscScalar **rbuf_a=PETSC_NULL; 2105 Mat_Redundant *redund=PETSC_NULL; 2106 PetscContainer container; 2107 2108 PetscFunctionBegin; 2109 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2110 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2111 2112 if (reuse == MAT_REUSE_MATRIX) { 2113 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2114 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2115 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2116 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2117 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2118 if (container) { 2119 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2120 } else { 2121 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2122 } 2123 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2124 2125 nsends = redund->nsends; 2126 nrecvs = redund->nrecvs; 2127 send_rank = redund->send_rank; 2128 recv_rank = redund->recv_rank; 2129 sbuf_nz = redund->sbuf_nz; 2130 rbuf_nz = redund->rbuf_nz; 2131 sbuf_j = redund->sbuf_j; 2132 sbuf_a = redund->sbuf_a; 2133 rbuf_j = redund->rbuf_j; 2134 rbuf_a = redund->rbuf_a; 2135 } 2136 2137 if (reuse == MAT_INITIAL_MATRIX){ 2138 PetscMPIInt subrank,subsize; 2139 PetscInt nleftover,np_subcomm; 2140 /* get the destination processors' id send_rank, nsends and nrecvs */ 2141 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2142 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2143 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2144 np_subcomm = size/nsubcomm; 2145 nleftover = size - nsubcomm*np_subcomm; 2146 nsends = 0; nrecvs = 0; 2147 for (i=0; i<size; i++){ /* i=rank*/ 2148 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2149 send_rank[nsends] = i; nsends++; 2150 recv_rank[nrecvs++] = i; 2151 } 2152 } 2153 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2154 i = size-nleftover-1; 2155 j = 0; 2156 while (j < nsubcomm - nleftover){ 2157 send_rank[nsends++] = i; 2158 i--; j++; 2159 } 2160 } 2161 2162 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2163 for (i=0; i<nleftover; i++){ 2164 recv_rank[nrecvs++] = size-nleftover+i; 2165 } 2166 } 2167 2168 /* allocate sbuf_j, sbuf_a */ 2169 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2170 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2171 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2172 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2173 2174 /* copy mat's local entries into the buffers */ 2175 if (reuse == MAT_INITIAL_MATRIX){ 2176 rownz_max = 0; 2177 rptr = sbuf_j; 2178 cols = sbuf_j + rend-rstart + 1; 2179 vals = sbuf_a; 2180 rptr[0] = 0; 2181 for (i=0; i<rend-rstart; i++){ 2182 row = i + rstart; 2183 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2184 ncols = nzA + nzB; 2185 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2186 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2187 /* load the column indices for this row into cols */ 2188 lwrite = 0; 2189 for (l=0; l<nzB; l++) { 2190 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2191 vals[lwrite] = aworkB[l]; 2192 cols[lwrite++] = ctmp; 2193 } 2194 } 2195 for (l=0; l<nzA; l++){ 2196 vals[lwrite] = aworkA[l]; 2197 cols[lwrite++] = cstart + cworkA[l]; 2198 } 2199 for (l=0; l<nzB; l++) { 2200 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2201 vals[lwrite] = aworkB[l]; 2202 cols[lwrite++] = ctmp; 2203 } 2204 } 2205 vals += ncols; 2206 cols += ncols; 2207 rptr[i+1] = rptr[i] + ncols; 2208 if (rownz_max < ncols) rownz_max = ncols; 2209 } 2210 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2211 } else { /* only copy matrix values into sbuf_a */ 2212 rptr = sbuf_j; 2213 vals = sbuf_a; 2214 rptr[0] = 0; 2215 for (i=0; i<rend-rstart; i++){ 2216 row = i + rstart; 2217 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2218 ncols = nzA + nzB; 2219 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2220 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2221 lwrite = 0; 2222 for (l=0; l<nzB; l++) { 2223 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2224 } 2225 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2226 for (l=0; l<nzB; l++) { 2227 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2228 } 2229 vals += ncols; 2230 rptr[i+1] = rptr[i] + ncols; 2231 } 2232 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2233 2234 /* send nzlocal to others, and recv other's nzlocal */ 2235 /*--------------------------------------------------*/ 2236 if (reuse == MAT_INITIAL_MATRIX){ 2237 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2238 s_waits2 = s_waits3 + nsends; 2239 s_waits1 = s_waits2 + nsends; 2240 r_waits1 = s_waits1 + nsends; 2241 r_waits2 = r_waits1 + nrecvs; 2242 r_waits3 = r_waits2 + nrecvs; 2243 } else { 2244 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2245 r_waits3 = s_waits3 + nsends; 2246 } 2247 2248 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2249 if (reuse == MAT_INITIAL_MATRIX){ 2250 /* get new tags to keep the communication clean */ 2251 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2252 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2253 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2254 2255 /* post receives of other's nzlocal */ 2256 for (i=0; i<nrecvs; i++){ 2257 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2258 } 2259 /* send nzlocal to others */ 2260 for (i=0; i<nsends; i++){ 2261 sbuf_nz[i] = nzlocal; 2262 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2263 } 2264 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2265 count = nrecvs; 2266 while (count) { 2267 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2268 recv_rank[imdex] = recv_status.MPI_SOURCE; 2269 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2270 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2271 2272 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2273 rbuf_nz[imdex] += i + 2; 2274 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2275 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2276 count--; 2277 } 2278 /* wait on sends of nzlocal */ 2279 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2280 /* send mat->i,j to others, and recv from other's */ 2281 /*------------------------------------------------*/ 2282 for (i=0; i<nsends; i++){ 2283 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2284 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2285 } 2286 /* wait on receives of mat->i,j */ 2287 /*------------------------------*/ 2288 count = nrecvs; 2289 while (count) { 2290 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2291 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2292 count--; 2293 } 2294 /* wait on sends of mat->i,j */ 2295 /*---------------------------*/ 2296 if (nsends) { 2297 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2298 } 2299 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2300 2301 /* post receives, send and receive mat->a */ 2302 /*----------------------------------------*/ 2303 for (imdex=0; imdex<nrecvs; imdex++) { 2304 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2305 } 2306 for (i=0; i<nsends; i++){ 2307 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2308 } 2309 count = nrecvs; 2310 while (count) { 2311 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2312 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2313 count--; 2314 } 2315 if (nsends) { 2316 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2317 } 2318 2319 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2320 2321 /* create redundant matrix */ 2322 /*-------------------------*/ 2323 if (reuse == MAT_INITIAL_MATRIX){ 2324 /* compute rownz_max for preallocation */ 2325 for (imdex=0; imdex<nrecvs; imdex++){ 2326 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2327 rptr = rbuf_j[imdex]; 2328 for (i=0; i<j; i++){ 2329 ncols = rptr[i+1] - rptr[i]; 2330 if (rownz_max < ncols) rownz_max = ncols; 2331 } 2332 } 2333 2334 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2335 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2336 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2337 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2338 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2339 } else { 2340 C = *matredundant; 2341 } 2342 2343 /* insert local matrix entries */ 2344 rptr = sbuf_j; 2345 cols = sbuf_j + rend-rstart + 1; 2346 vals = sbuf_a; 2347 for (i=0; i<rend-rstart; i++){ 2348 row = i + rstart; 2349 ncols = rptr[i+1] - rptr[i]; 2350 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2351 vals += ncols; 2352 cols += ncols; 2353 } 2354 /* insert received matrix entries */ 2355 for (imdex=0; imdex<nrecvs; imdex++){ 2356 rstart = rowrange[recv_rank[imdex]]; 2357 rend = rowrange[recv_rank[imdex]+1]; 2358 rptr = rbuf_j[imdex]; 2359 cols = rbuf_j[imdex] + rend-rstart + 1; 2360 vals = rbuf_a[imdex]; 2361 for (i=0; i<rend-rstart; i++){ 2362 row = i + rstart; 2363 ncols = rptr[i+1] - rptr[i]; 2364 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2365 vals += ncols; 2366 cols += ncols; 2367 } 2368 } 2369 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2370 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2371 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2372 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2373 if (reuse == MAT_INITIAL_MATRIX){ 2374 PetscContainer container; 2375 *matredundant = C; 2376 /* create a supporting struct and attach it to C for reuse */ 2377 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2378 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2379 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2380 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2381 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2382 2383 redund->nzlocal = nzlocal; 2384 redund->nsends = nsends; 2385 redund->nrecvs = nrecvs; 2386 redund->send_rank = send_rank; 2387 redund->recv_rank = recv_rank; 2388 redund->sbuf_nz = sbuf_nz; 2389 redund->rbuf_nz = rbuf_nz; 2390 redund->sbuf_j = sbuf_j; 2391 redund->sbuf_a = sbuf_a; 2392 redund->rbuf_j = rbuf_j; 2393 redund->rbuf_a = rbuf_a; 2394 2395 redund->MatDestroy = C->ops->destroy; 2396 C->ops->destroy = MatDestroy_MatRedundant; 2397 } 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2404 { 2405 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2406 PetscErrorCode ierr; 2407 PetscInt i,*idxb = 0; 2408 PetscScalar *va,*vb; 2409 Vec vtmp; 2410 2411 PetscFunctionBegin; 2412 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2413 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2414 if (idx) { 2415 for (i=0; i<A->rmap->n; i++) { 2416 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2417 } 2418 } 2419 2420 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2421 if (idx) { 2422 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2423 } 2424 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2425 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2426 2427 for (i=0; i<A->rmap->n; i++){ 2428 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2429 va[i] = vb[i]; 2430 if (idx) idx[i] = a->garray[idxb[i]]; 2431 } 2432 } 2433 2434 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2435 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2436 if (idxb) { 2437 ierr = PetscFree(idxb);CHKERRQ(ierr); 2438 } 2439 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2446 { 2447 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2448 PetscErrorCode ierr; 2449 PetscInt i,*idxb = 0; 2450 PetscScalar *va,*vb; 2451 Vec vtmp; 2452 2453 PetscFunctionBegin; 2454 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2455 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2456 if (idx) { 2457 for (i=0; i<A->cmap->n; i++) { 2458 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2459 } 2460 } 2461 2462 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2463 if (idx) { 2464 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2465 } 2466 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2467 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2468 2469 for (i=0; i<A->rmap->n; i++){ 2470 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2471 va[i] = vb[i]; 2472 if (idx) idx[i] = a->garray[idxb[i]]; 2473 } 2474 } 2475 2476 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2477 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2478 if (idxb) { 2479 ierr = PetscFree(idxb);CHKERRQ(ierr); 2480 } 2481 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2488 { 2489 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2490 PetscInt n = A->rmap->n; 2491 PetscInt cstart = A->cmap->rstart; 2492 PetscInt *cmap = mat->garray; 2493 PetscInt *diagIdx, *offdiagIdx; 2494 Vec diagV, offdiagV; 2495 PetscScalar *a, *diagA, *offdiagA; 2496 PetscInt r; 2497 PetscErrorCode ierr; 2498 2499 PetscFunctionBegin; 2500 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2501 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2502 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2503 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2504 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2505 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2506 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2507 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2508 for(r = 0; r < n; ++r) { 2509 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2510 a[r] = diagA[r]; 2511 idx[r] = cstart + diagIdx[r]; 2512 } else { 2513 a[r] = offdiagA[r]; 2514 idx[r] = cmap[offdiagIdx[r]]; 2515 } 2516 } 2517 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2518 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2519 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2520 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2521 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2522 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2529 { 2530 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2531 PetscInt n = A->rmap->n; 2532 PetscInt cstart = A->cmap->rstart; 2533 PetscInt *cmap = mat->garray; 2534 PetscInt *diagIdx, *offdiagIdx; 2535 Vec diagV, offdiagV; 2536 PetscScalar *a, *diagA, *offdiagA; 2537 PetscInt r; 2538 PetscErrorCode ierr; 2539 2540 PetscFunctionBegin; 2541 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2542 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2543 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2544 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2545 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2546 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2547 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2548 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2549 for(r = 0; r < n; ++r) { 2550 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2551 a[r] = diagA[r]; 2552 idx[r] = cstart + diagIdx[r]; 2553 } else { 2554 a[r] = offdiagA[r]; 2555 idx[r] = cmap[offdiagIdx[r]]; 2556 } 2557 } 2558 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2559 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2560 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2561 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2562 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2563 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2570 { 2571 PetscErrorCode ierr; 2572 Mat *dummy; 2573 2574 PetscFunctionBegin; 2575 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2576 *newmat = *dummy; 2577 ierr = PetscFree(dummy);CHKERRQ(ierr); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2582 /* -------------------------------------------------------------------*/ 2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2584 MatGetRow_MPIAIJ, 2585 MatRestoreRow_MPIAIJ, 2586 MatMult_MPIAIJ, 2587 /* 4*/ MatMultAdd_MPIAIJ, 2588 MatMultTranspose_MPIAIJ, 2589 MatMultTransposeAdd_MPIAIJ, 2590 #ifdef PETSC_HAVE_PBGL 2591 MatSolve_MPIAIJ, 2592 #else 2593 0, 2594 #endif 2595 0, 2596 0, 2597 /*10*/ 0, 2598 0, 2599 0, 2600 MatSOR_MPIAIJ, 2601 MatTranspose_MPIAIJ, 2602 /*15*/ MatGetInfo_MPIAIJ, 2603 MatEqual_MPIAIJ, 2604 MatGetDiagonal_MPIAIJ, 2605 MatDiagonalScale_MPIAIJ, 2606 MatNorm_MPIAIJ, 2607 /*20*/ MatAssemblyBegin_MPIAIJ, 2608 MatAssemblyEnd_MPIAIJ, 2609 MatSetOption_MPIAIJ, 2610 MatZeroEntries_MPIAIJ, 2611 /*24*/ MatZeroRows_MPIAIJ, 2612 0, 2613 #ifdef PETSC_HAVE_PBGL 2614 0, 2615 #else 2616 0, 2617 #endif 2618 0, 2619 0, 2620 /*29*/ MatSetUpPreallocation_MPIAIJ, 2621 #ifdef PETSC_HAVE_PBGL 2622 0, 2623 #else 2624 0, 2625 #endif 2626 0, 2627 0, 2628 0, 2629 /*34*/ MatDuplicate_MPIAIJ, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*39*/ MatAXPY_MPIAIJ, 2635 MatGetSubMatrices_MPIAIJ, 2636 MatIncreaseOverlap_MPIAIJ, 2637 MatGetValues_MPIAIJ, 2638 MatCopy_MPIAIJ, 2639 /*44*/ MatGetRowMax_MPIAIJ, 2640 MatScale_MPIAIJ, 2641 0, 2642 0, 2643 0, 2644 /*49*/ MatSetBlockSize_MPIAIJ, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*54*/ MatFDColoringCreate_MPIAIJ, 2650 0, 2651 MatSetUnfactored_MPIAIJ, 2652 MatPermute_MPIAIJ, 2653 0, 2654 /*59*/ MatGetSubMatrix_MPIAIJ, 2655 MatDestroy_MPIAIJ, 2656 MatView_MPIAIJ, 2657 0, 2658 0, 2659 /*64*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2665 MatGetRowMinAbs_MPIAIJ, 2666 0, 2667 MatSetColoring_MPIAIJ, 2668 #if defined(PETSC_HAVE_ADIC) 2669 MatSetValuesAdic_MPIAIJ, 2670 #else 2671 0, 2672 #endif 2673 MatSetValuesAdifor_MPIAIJ, 2674 /*75*/ MatFDColoringApply_AIJ, 2675 0, 2676 0, 2677 0, 2678 0, 2679 /*80*/ 0, 2680 0, 2681 0, 2682 /*83*/ MatLoad_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2689 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2690 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2691 MatPtAP_Basic, 2692 MatPtAPSymbolic_MPIAIJ, 2693 /*94*/ MatPtAPNumeric_MPIAIJ, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /*99*/ 0, 2699 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2700 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2701 MatConjugate_MPIAIJ, 2702 0, 2703 /*104*/MatSetValuesRow_MPIAIJ, 2704 MatRealPart_MPIAIJ, 2705 MatImaginaryPart_MPIAIJ, 2706 0, 2707 0, 2708 /*109*/0, 2709 MatGetRedundantMatrix_MPIAIJ, 2710 MatGetRowMin_MPIAIJ, 2711 0, 2712 0, 2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2714 0, 2715 0, 2716 0, 2717 0, 2718 0 2719 }; 2720 2721 /* ----------------------------------------------------------------------------------------*/ 2722 2723 EXTERN_C_BEGIN 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2726 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2727 { 2728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2729 PetscErrorCode ierr; 2730 2731 PetscFunctionBegin; 2732 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2733 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2734 PetscFunctionReturn(0); 2735 } 2736 EXTERN_C_END 2737 2738 EXTERN_C_BEGIN 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2741 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2742 { 2743 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2748 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751 EXTERN_C_END 2752 2753 #include "petscpc.h" 2754 EXTERN_C_BEGIN 2755 #undef __FUNCT__ 2756 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2757 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2758 { 2759 Mat_MPIAIJ *b; 2760 PetscErrorCode ierr; 2761 PetscInt i; 2762 2763 PetscFunctionBegin; 2764 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2765 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2766 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2767 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2768 2769 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2770 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2771 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2772 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2773 if (d_nnz) { 2774 for (i=0; i<B->rmap->n; i++) { 2775 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2776 } 2777 } 2778 if (o_nnz) { 2779 for (i=0; i<B->rmap->n; i++) { 2780 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2781 } 2782 } 2783 b = (Mat_MPIAIJ*)B->data; 2784 2785 if (!B->preallocated) { 2786 /* Explicitly create 2 MATSEQAIJ matrices. */ 2787 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2788 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2789 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2791 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2792 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2793 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2794 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2795 } 2796 2797 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2798 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2799 B->preallocated = PETSC_TRUE; 2800 PetscFunctionReturn(0); 2801 } 2802 EXTERN_C_END 2803 2804 #undef __FUNCT__ 2805 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2806 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2807 { 2808 Mat mat; 2809 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2810 PetscErrorCode ierr; 2811 2812 PetscFunctionBegin; 2813 *newmat = 0; 2814 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2815 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2816 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2817 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2818 a = (Mat_MPIAIJ*)mat->data; 2819 2820 mat->factor = matin->factor; 2821 mat->rmap->bs = matin->rmap->bs; 2822 mat->assembled = PETSC_TRUE; 2823 mat->insertmode = NOT_SET_VALUES; 2824 mat->preallocated = PETSC_TRUE; 2825 2826 a->size = oldmat->size; 2827 a->rank = oldmat->rank; 2828 a->donotstash = oldmat->donotstash; 2829 a->roworiented = oldmat->roworiented; 2830 a->rowindices = 0; 2831 a->rowvalues = 0; 2832 a->getrowactive = PETSC_FALSE; 2833 2834 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2835 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2836 2837 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2838 if (oldmat->colmap) { 2839 #if defined (PETSC_USE_CTABLE) 2840 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2841 #else 2842 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2843 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2844 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2845 #endif 2846 } else a->colmap = 0; 2847 if (oldmat->garray) { 2848 PetscInt len; 2849 len = oldmat->B->cmap->n; 2850 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2851 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2852 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2853 } else a->garray = 0; 2854 2855 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2856 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2857 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2858 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2859 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2860 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2861 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2862 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2863 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2864 *newmat = mat; 2865 PetscFunctionReturn(0); 2866 } 2867 2868 #include "petscsys.h" 2869 2870 #undef __FUNCT__ 2871 #define __FUNCT__ "MatLoad_MPIAIJ" 2872 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2873 { 2874 Mat A; 2875 PetscScalar *vals,*svals; 2876 MPI_Comm comm = ((PetscObject)viewer)->comm; 2877 MPI_Status status; 2878 PetscErrorCode ierr; 2879 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2880 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2881 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2882 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2883 PetscInt cend,cstart,n,*rowners; 2884 int fd; 2885 2886 PetscFunctionBegin; 2887 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2888 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2889 if (!rank) { 2890 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2891 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2892 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2893 } 2894 2895 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2896 M = header[1]; N = header[2]; 2897 /* determine ownership of all rows */ 2898 m = M/size + ((M % size) > rank); 2899 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2900 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2901 2902 /* First process needs enough room for process with most rows */ 2903 if (!rank) { 2904 mmax = rowners[1]; 2905 for (i=2; i<size; i++) { 2906 mmax = PetscMax(mmax,rowners[i]); 2907 } 2908 } else mmax = m; 2909 2910 rowners[0] = 0; 2911 for (i=2; i<=size; i++) { 2912 rowners[i] += rowners[i-1]; 2913 } 2914 rstart = rowners[rank]; 2915 rend = rowners[rank+1]; 2916 2917 /* distribute row lengths to all processors */ 2918 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2919 if (!rank) { 2920 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2921 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2922 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2923 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2924 for (j=0; j<m; j++) { 2925 procsnz[0] += ourlens[j]; 2926 } 2927 for (i=1; i<size; i++) { 2928 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2929 /* calculate the number of nonzeros on each processor */ 2930 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2931 procsnz[i] += rowlengths[j]; 2932 } 2933 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2934 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2935 } 2936 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2937 } else { 2938 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2939 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2940 } 2941 2942 if (!rank) { 2943 /* determine max buffer needed and allocate it */ 2944 maxnz = 0; 2945 for (i=0; i<size; i++) { 2946 maxnz = PetscMax(maxnz,procsnz[i]); 2947 } 2948 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2949 2950 /* read in my part of the matrix column indices */ 2951 nz = procsnz[0]; 2952 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2953 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2954 2955 /* read in every one elses and ship off */ 2956 for (i=1; i<size; i++) { 2957 nz = procsnz[i]; 2958 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2959 mpicnt = PetscMPIIntCast(nz); 2960 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2961 } 2962 ierr = PetscFree(cols);CHKERRQ(ierr); 2963 } else { 2964 /* determine buffer space needed for message */ 2965 nz = 0; 2966 for (i=0; i<m; i++) { 2967 nz += ourlens[i]; 2968 } 2969 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2970 2971 /* receive message of column indices*/ 2972 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2973 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2974 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2975 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2976 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2977 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2978 } 2979 2980 /* determine column ownership if matrix is not square */ 2981 if (N != M) { 2982 n = N/size + ((N % size) > rank); 2983 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2984 cstart = cend - n; 2985 } else { 2986 cstart = rstart; 2987 cend = rend; 2988 n = cend - cstart; 2989 } 2990 2991 /* loop over local rows, determining number of off diagonal entries */ 2992 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2993 jj = 0; 2994 for (i=0; i<m; i++) { 2995 for (j=0; j<ourlens[i]; j++) { 2996 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2997 jj++; 2998 } 2999 } 3000 3001 /* create our matrix */ 3002 for (i=0; i<m; i++) { 3003 ourlens[i] -= offlens[i]; 3004 } 3005 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3006 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 3007 ierr = MatSetType(A,type);CHKERRQ(ierr); 3008 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3009 3010 for (i=0; i<m; i++) { 3011 ourlens[i] += offlens[i]; 3012 } 3013 3014 if (!rank) { 3015 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3016 3017 /* read in my part of the matrix numerical values */ 3018 nz = procsnz[0]; 3019 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3020 3021 /* insert into matrix */ 3022 jj = rstart; 3023 smycols = mycols; 3024 svals = vals; 3025 for (i=0; i<m; i++) { 3026 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3027 smycols += ourlens[i]; 3028 svals += ourlens[i]; 3029 jj++; 3030 } 3031 3032 /* read in other processors and ship out */ 3033 for (i=1; i<size; i++) { 3034 nz = procsnz[i]; 3035 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3036 mpicnt = PetscMPIIntCast(nz); 3037 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3038 } 3039 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3040 } else { 3041 /* receive numeric values */ 3042 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3043 3044 /* receive message of values*/ 3045 mpicnt = PetscMPIIntCast(nz); 3046 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3047 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3048 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3049 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3050 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3051 3052 /* insert into matrix */ 3053 jj = rstart; 3054 smycols = mycols; 3055 svals = vals; 3056 for (i=0; i<m; i++) { 3057 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3058 smycols += ourlens[i]; 3059 svals += ourlens[i]; 3060 jj++; 3061 } 3062 } 3063 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3064 ierr = PetscFree(vals);CHKERRQ(ierr); 3065 ierr = PetscFree(mycols);CHKERRQ(ierr); 3066 ierr = PetscFree(rowners);CHKERRQ(ierr); 3067 3068 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3069 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3070 *newmat = A; 3071 PetscFunctionReturn(0); 3072 } 3073 3074 #undef __FUNCT__ 3075 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3076 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3077 { 3078 PetscErrorCode ierr; 3079 IS iscol_local; 3080 PetscInt csize; 3081 3082 PetscFunctionBegin; 3083 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3084 if (call == MAT_REUSE_MATRIX) { 3085 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3086 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3087 } else { 3088 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3089 } 3090 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3091 if (call == MAT_INITIAL_MATRIX) { 3092 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3093 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3094 } 3095 PetscFunctionReturn(0); 3096 } 3097 3098 #undef __FUNCT__ 3099 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3100 /* 3101 Not great since it makes two copies of the submatrix, first an SeqAIJ 3102 in local and then by concatenating the local matrices the end result. 3103 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3104 3105 Note: This requires a sequential iscol with all indices. 3106 */ 3107 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3108 { 3109 PetscErrorCode ierr; 3110 PetscMPIInt rank,size; 3111 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3112 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3113 Mat *local,M,Mreuse; 3114 MatScalar *vwork,*aa; 3115 MPI_Comm comm = ((PetscObject)mat)->comm; 3116 Mat_SeqAIJ *aij; 3117 3118 3119 PetscFunctionBegin; 3120 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3121 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3122 3123 if (call == MAT_REUSE_MATRIX) { 3124 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3125 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3126 local = &Mreuse; 3127 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3128 } else { 3129 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3130 Mreuse = *local; 3131 ierr = PetscFree(local);CHKERRQ(ierr); 3132 } 3133 3134 /* 3135 m - number of local rows 3136 n - number of columns (same on all processors) 3137 rstart - first row in new global matrix generated 3138 */ 3139 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3140 if (call == MAT_INITIAL_MATRIX) { 3141 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3142 ii = aij->i; 3143 jj = aij->j; 3144 3145 /* 3146 Determine the number of non-zeros in the diagonal and off-diagonal 3147 portions of the matrix in order to do correct preallocation 3148 */ 3149 3150 /* first get start and end of "diagonal" columns */ 3151 if (csize == PETSC_DECIDE) { 3152 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3153 if (mglobal == n) { /* square matrix */ 3154 nlocal = m; 3155 } else { 3156 nlocal = n/size + ((n % size) > rank); 3157 } 3158 } else { 3159 nlocal = csize; 3160 } 3161 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3162 rstart = rend - nlocal; 3163 if (rank == size - 1 && rend != n) { 3164 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3165 } 3166 3167 /* next, compute all the lengths */ 3168 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3169 olens = dlens + m; 3170 for (i=0; i<m; i++) { 3171 jend = ii[i+1] - ii[i]; 3172 olen = 0; 3173 dlen = 0; 3174 for (j=0; j<jend; j++) { 3175 if (*jj < rstart || *jj >= rend) olen++; 3176 else dlen++; 3177 jj++; 3178 } 3179 olens[i] = olen; 3180 dlens[i] = dlen; 3181 } 3182 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3183 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3184 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3185 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3186 ierr = PetscFree(dlens);CHKERRQ(ierr); 3187 } else { 3188 PetscInt ml,nl; 3189 3190 M = *newmat; 3191 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3192 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3193 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3194 /* 3195 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3196 rather than the slower MatSetValues(). 3197 */ 3198 M->was_assembled = PETSC_TRUE; 3199 M->assembled = PETSC_FALSE; 3200 } 3201 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3202 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3203 ii = aij->i; 3204 jj = aij->j; 3205 aa = aij->a; 3206 for (i=0; i<m; i++) { 3207 row = rstart + i; 3208 nz = ii[i+1] - ii[i]; 3209 cwork = jj; jj += nz; 3210 vwork = aa; aa += nz; 3211 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3212 } 3213 3214 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3215 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3216 *newmat = M; 3217 3218 /* save submatrix used in processor for next request */ 3219 if (call == MAT_INITIAL_MATRIX) { 3220 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3221 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3222 } 3223 3224 PetscFunctionReturn(0); 3225 } 3226 3227 EXTERN_C_BEGIN 3228 #undef __FUNCT__ 3229 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3230 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3231 { 3232 PetscInt m,cstart, cend,j,nnz,i,d; 3233 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3234 const PetscInt *JJ; 3235 PetscScalar *values; 3236 PetscErrorCode ierr; 3237 3238 PetscFunctionBegin; 3239 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3240 3241 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3242 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3243 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3244 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3245 m = B->rmap->n; 3246 cstart = B->cmap->rstart; 3247 cend = B->cmap->rend; 3248 rstart = B->rmap->rstart; 3249 3250 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3251 3252 #if defined(PETSC_USE_DEBUGGING) 3253 for (i=0; i<m; i++) { 3254 nnz = Ii[i+1]- Ii[i]; 3255 JJ = J + Ii[i]; 3256 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3257 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3258 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3259 } 3260 #endif 3261 3262 for (i=0; i<m; i++) { 3263 nnz = Ii[i+1]- Ii[i]; 3264 JJ = J + Ii[i]; 3265 nnz_max = PetscMax(nnz_max,nnz); 3266 d = 0; 3267 for (j=0; j<nnz; j++) { 3268 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3269 } 3270 d_nnz[i] = d; 3271 o_nnz[i] = nnz - d; 3272 } 3273 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3274 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3275 3276 if (v) values = (PetscScalar*)v; 3277 else { 3278 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3279 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3280 } 3281 3282 for (i=0; i<m; i++) { 3283 ii = i + rstart; 3284 nnz = Ii[i+1]- Ii[i]; 3285 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3286 } 3287 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3288 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3289 3290 if (!v) { 3291 ierr = PetscFree(values);CHKERRQ(ierr); 3292 } 3293 PetscFunctionReturn(0); 3294 } 3295 EXTERN_C_END 3296 3297 #undef __FUNCT__ 3298 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3299 /*@ 3300 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3301 (the default parallel PETSc format). 3302 3303 Collective on MPI_Comm 3304 3305 Input Parameters: 3306 + B - the matrix 3307 . i - the indices into j for the start of each local row (starts with zero) 3308 . j - the column indices for each local row (starts with zero) 3309 - v - optional values in the matrix 3310 3311 Level: developer 3312 3313 Notes: 3314 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3315 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3316 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3317 3318 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3319 3320 The format which is used for the sparse matrix input, is equivalent to a 3321 row-major ordering.. i.e for the following matrix, the input data expected is 3322 as shown: 3323 3324 1 0 0 3325 2 0 3 P0 3326 ------- 3327 4 5 6 P1 3328 3329 Process0 [P0]: rows_owned=[0,1] 3330 i = {0,1,3} [size = nrow+1 = 2+1] 3331 j = {0,0,2} [size = nz = 6] 3332 v = {1,2,3} [size = nz = 6] 3333 3334 Process1 [P1]: rows_owned=[2] 3335 i = {0,3} [size = nrow+1 = 1+1] 3336 j = {0,1,2} [size = nz = 6] 3337 v = {4,5,6} [size = nz = 6] 3338 3339 .keywords: matrix, aij, compressed row, sparse, parallel 3340 3341 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3342 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3343 @*/ 3344 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3345 { 3346 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3347 3348 PetscFunctionBegin; 3349 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3350 if (f) { 3351 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3352 } 3353 PetscFunctionReturn(0); 3354 } 3355 3356 #undef __FUNCT__ 3357 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3358 /*@C 3359 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3360 (the default parallel PETSc format). For good matrix assembly performance 3361 the user should preallocate the matrix storage by setting the parameters 3362 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3363 performance can be increased by more than a factor of 50. 3364 3365 Collective on MPI_Comm 3366 3367 Input Parameters: 3368 + A - the matrix 3369 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3370 (same value is used for all local rows) 3371 . d_nnz - array containing the number of nonzeros in the various rows of the 3372 DIAGONAL portion of the local submatrix (possibly different for each row) 3373 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3374 The size of this array is equal to the number of local rows, i.e 'm'. 3375 You must leave room for the diagonal entry even if it is zero. 3376 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3377 submatrix (same value is used for all local rows). 3378 - o_nnz - array containing the number of nonzeros in the various rows of the 3379 OFF-DIAGONAL portion of the local submatrix (possibly different for 3380 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3381 structure. The size of this array is equal to the number 3382 of local rows, i.e 'm'. 3383 3384 If the *_nnz parameter is given then the *_nz parameter is ignored 3385 3386 The AIJ format (also called the Yale sparse matrix format or 3387 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3388 storage. The stored row and column indices begin with zero. See the users manual for details. 3389 3390 The parallel matrix is partitioned such that the first m0 rows belong to 3391 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3392 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3393 3394 The DIAGONAL portion of the local submatrix of a processor can be defined 3395 as the submatrix which is obtained by extraction the part corresponding 3396 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3397 first row that belongs to the processor, and r2 is the last row belonging 3398 to the this processor. This is a square mxm matrix. The remaining portion 3399 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3400 3401 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3402 3403 You can call MatGetInfo() to get information on how effective the preallocation was; 3404 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3405 You can also run with the option -info and look for messages with the string 3406 malloc in them to see if additional memory allocation was needed. 3407 3408 Example usage: 3409 3410 Consider the following 8x8 matrix with 34 non-zero values, that is 3411 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3412 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3413 as follows: 3414 3415 .vb 3416 1 2 0 | 0 3 0 | 0 4 3417 Proc0 0 5 6 | 7 0 0 | 8 0 3418 9 0 10 | 11 0 0 | 12 0 3419 ------------------------------------- 3420 13 0 14 | 15 16 17 | 0 0 3421 Proc1 0 18 0 | 19 20 21 | 0 0 3422 0 0 0 | 22 23 0 | 24 0 3423 ------------------------------------- 3424 Proc2 25 26 27 | 0 0 28 | 29 0 3425 30 0 0 | 31 32 33 | 0 34 3426 .ve 3427 3428 This can be represented as a collection of submatrices as: 3429 3430 .vb 3431 A B C 3432 D E F 3433 G H I 3434 .ve 3435 3436 Where the submatrices A,B,C are owned by proc0, D,E,F are 3437 owned by proc1, G,H,I are owned by proc2. 3438 3439 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3440 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3441 The 'M','N' parameters are 8,8, and have the same values on all procs. 3442 3443 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3444 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3445 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3446 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3447 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3448 matrix, ans [DF] as another SeqAIJ matrix. 3449 3450 When d_nz, o_nz parameters are specified, d_nz storage elements are 3451 allocated for every row of the local diagonal submatrix, and o_nz 3452 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3453 One way to choose d_nz and o_nz is to use the max nonzerors per local 3454 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3455 In this case, the values of d_nz,o_nz are: 3456 .vb 3457 proc0 : dnz = 2, o_nz = 2 3458 proc1 : dnz = 3, o_nz = 2 3459 proc2 : dnz = 1, o_nz = 4 3460 .ve 3461 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3462 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3463 for proc3. i.e we are using 12+15+10=37 storage locations to store 3464 34 values. 3465 3466 When d_nnz, o_nnz parameters are specified, the storage is specified 3467 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3468 In the above case the values for d_nnz,o_nnz are: 3469 .vb 3470 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3471 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3472 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3473 .ve 3474 Here the space allocated is sum of all the above values i.e 34, and 3475 hence pre-allocation is perfect. 3476 3477 Level: intermediate 3478 3479 .keywords: matrix, aij, compressed row, sparse, parallel 3480 3481 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3482 MPIAIJ, MatGetInfo() 3483 @*/ 3484 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3485 { 3486 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3487 3488 PetscFunctionBegin; 3489 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3490 if (f) { 3491 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3492 } 3493 PetscFunctionReturn(0); 3494 } 3495 3496 #undef __FUNCT__ 3497 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3498 /*@ 3499 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3500 CSR format the local rows. 3501 3502 Collective on MPI_Comm 3503 3504 Input Parameters: 3505 + comm - MPI communicator 3506 . m - number of local rows (Cannot be PETSC_DECIDE) 3507 . n - This value should be the same as the local size used in creating the 3508 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3509 calculated if N is given) For square matrices n is almost always m. 3510 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3511 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3512 . i - row indices 3513 . j - column indices 3514 - a - matrix values 3515 3516 Output Parameter: 3517 . mat - the matrix 3518 3519 Level: intermediate 3520 3521 Notes: 3522 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3523 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3524 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3525 3526 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3527 3528 The format which is used for the sparse matrix input, is equivalent to a 3529 row-major ordering.. i.e for the following matrix, the input data expected is 3530 as shown: 3531 3532 1 0 0 3533 2 0 3 P0 3534 ------- 3535 4 5 6 P1 3536 3537 Process0 [P0]: rows_owned=[0,1] 3538 i = {0,1,3} [size = nrow+1 = 2+1] 3539 j = {0,0,2} [size = nz = 6] 3540 v = {1,2,3} [size = nz = 6] 3541 3542 Process1 [P1]: rows_owned=[2] 3543 i = {0,3} [size = nrow+1 = 1+1] 3544 j = {0,1,2} [size = nz = 6] 3545 v = {4,5,6} [size = nz = 6] 3546 3547 .keywords: matrix, aij, compressed row, sparse, parallel 3548 3549 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3550 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3551 @*/ 3552 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3553 { 3554 PetscErrorCode ierr; 3555 3556 PetscFunctionBegin; 3557 if (i[0]) { 3558 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3559 } 3560 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3561 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3562 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3563 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3564 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3565 PetscFunctionReturn(0); 3566 } 3567 3568 #undef __FUNCT__ 3569 #define __FUNCT__ "MatCreateMPIAIJ" 3570 /*@C 3571 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3572 (the default parallel PETSc format). For good matrix assembly performance 3573 the user should preallocate the matrix storage by setting the parameters 3574 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3575 performance can be increased by more than a factor of 50. 3576 3577 Collective on MPI_Comm 3578 3579 Input Parameters: 3580 + comm - MPI communicator 3581 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3582 This value should be the same as the local size used in creating the 3583 y vector for the matrix-vector product y = Ax. 3584 . n - This value should be the same as the local size used in creating the 3585 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3586 calculated if N is given) For square matrices n is almost always m. 3587 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3588 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3589 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3590 (same value is used for all local rows) 3591 . d_nnz - array containing the number of nonzeros in the various rows of the 3592 DIAGONAL portion of the local submatrix (possibly different for each row) 3593 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3594 The size of this array is equal to the number of local rows, i.e 'm'. 3595 You must leave room for the diagonal entry even if it is zero. 3596 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3597 submatrix (same value is used for all local rows). 3598 - o_nnz - array containing the number of nonzeros in the various rows of the 3599 OFF-DIAGONAL portion of the local submatrix (possibly different for 3600 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3601 structure. The size of this array is equal to the number 3602 of local rows, i.e 'm'. 3603 3604 Output Parameter: 3605 . A - the matrix 3606 3607 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3608 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3609 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3610 3611 Notes: 3612 If the *_nnz parameter is given then the *_nz parameter is ignored 3613 3614 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3615 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3616 storage requirements for this matrix. 3617 3618 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3619 processor than it must be used on all processors that share the object for 3620 that argument. 3621 3622 The user MUST specify either the local or global matrix dimensions 3623 (possibly both). 3624 3625 The parallel matrix is partitioned across processors such that the 3626 first m0 rows belong to process 0, the next m1 rows belong to 3627 process 1, the next m2 rows belong to process 2 etc.. where 3628 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3629 values corresponding to [m x N] submatrix. 3630 3631 The columns are logically partitioned with the n0 columns belonging 3632 to 0th partition, the next n1 columns belonging to the next 3633 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3634 3635 The DIAGONAL portion of the local submatrix on any given processor 3636 is the submatrix corresponding to the rows and columns m,n 3637 corresponding to the given processor. i.e diagonal matrix on 3638 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3639 etc. The remaining portion of the local submatrix [m x (N-n)] 3640 constitute the OFF-DIAGONAL portion. The example below better 3641 illustrates this concept. 3642 3643 For a square global matrix we define each processor's diagonal portion 3644 to be its local rows and the corresponding columns (a square submatrix); 3645 each processor's off-diagonal portion encompasses the remainder of the 3646 local matrix (a rectangular submatrix). 3647 3648 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3649 3650 When calling this routine with a single process communicator, a matrix of 3651 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3652 type of communicator, use the construction mechanism: 3653 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3654 3655 By default, this format uses inodes (identical nodes) when possible. 3656 We search for consecutive rows with the same nonzero structure, thereby 3657 reusing matrix information to achieve increased efficiency. 3658 3659 Options Database Keys: 3660 + -mat_no_inode - Do not use inodes 3661 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3662 - -mat_aij_oneindex - Internally use indexing starting at 1 3663 rather than 0. Note that when calling MatSetValues(), 3664 the user still MUST index entries starting at 0! 3665 3666 3667 Example usage: 3668 3669 Consider the following 8x8 matrix with 34 non-zero values, that is 3670 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3671 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3672 as follows: 3673 3674 .vb 3675 1 2 0 | 0 3 0 | 0 4 3676 Proc0 0 5 6 | 7 0 0 | 8 0 3677 9 0 10 | 11 0 0 | 12 0 3678 ------------------------------------- 3679 13 0 14 | 15 16 17 | 0 0 3680 Proc1 0 18 0 | 19 20 21 | 0 0 3681 0 0 0 | 22 23 0 | 24 0 3682 ------------------------------------- 3683 Proc2 25 26 27 | 0 0 28 | 29 0 3684 30 0 0 | 31 32 33 | 0 34 3685 .ve 3686 3687 This can be represented as a collection of submatrices as: 3688 3689 .vb 3690 A B C 3691 D E F 3692 G H I 3693 .ve 3694 3695 Where the submatrices A,B,C are owned by proc0, D,E,F are 3696 owned by proc1, G,H,I are owned by proc2. 3697 3698 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3699 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3700 The 'M','N' parameters are 8,8, and have the same values on all procs. 3701 3702 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3703 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3704 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3705 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3706 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3707 matrix, ans [DF] as another SeqAIJ matrix. 3708 3709 When d_nz, o_nz parameters are specified, d_nz storage elements are 3710 allocated for every row of the local diagonal submatrix, and o_nz 3711 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3712 One way to choose d_nz and o_nz is to use the max nonzerors per local 3713 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3714 In this case, the values of d_nz,o_nz are: 3715 .vb 3716 proc0 : dnz = 2, o_nz = 2 3717 proc1 : dnz = 3, o_nz = 2 3718 proc2 : dnz = 1, o_nz = 4 3719 .ve 3720 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3721 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3722 for proc3. i.e we are using 12+15+10=37 storage locations to store 3723 34 values. 3724 3725 When d_nnz, o_nnz parameters are specified, the storage is specified 3726 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3727 In the above case the values for d_nnz,o_nnz are: 3728 .vb 3729 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3730 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3731 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3732 .ve 3733 Here the space allocated is sum of all the above values i.e 34, and 3734 hence pre-allocation is perfect. 3735 3736 Level: intermediate 3737 3738 .keywords: matrix, aij, compressed row, sparse, parallel 3739 3740 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3741 MPIAIJ, MatCreateMPIAIJWithArrays() 3742 @*/ 3743 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3744 { 3745 PetscErrorCode ierr; 3746 PetscMPIInt size; 3747 3748 PetscFunctionBegin; 3749 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3750 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3751 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3752 if (size > 1) { 3753 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3754 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3755 } else { 3756 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3757 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3758 } 3759 PetscFunctionReturn(0); 3760 } 3761 3762 #undef __FUNCT__ 3763 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3764 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3765 { 3766 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3767 3768 PetscFunctionBegin; 3769 *Ad = a->A; 3770 *Ao = a->B; 3771 *colmap = a->garray; 3772 PetscFunctionReturn(0); 3773 } 3774 3775 #undef __FUNCT__ 3776 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3777 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3778 { 3779 PetscErrorCode ierr; 3780 PetscInt i; 3781 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3782 3783 PetscFunctionBegin; 3784 if (coloring->ctype == IS_COLORING_GLOBAL) { 3785 ISColoringValue *allcolors,*colors; 3786 ISColoring ocoloring; 3787 3788 /* set coloring for diagonal portion */ 3789 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3790 3791 /* set coloring for off-diagonal portion */ 3792 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3793 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3794 for (i=0; i<a->B->cmap->n; i++) { 3795 colors[i] = allcolors[a->garray[i]]; 3796 } 3797 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3798 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3799 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3800 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3801 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3802 ISColoringValue *colors; 3803 PetscInt *larray; 3804 ISColoring ocoloring; 3805 3806 /* set coloring for diagonal portion */ 3807 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3808 for (i=0; i<a->A->cmap->n; i++) { 3809 larray[i] = i + A->cmap->rstart; 3810 } 3811 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3812 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3813 for (i=0; i<a->A->cmap->n; i++) { 3814 colors[i] = coloring->colors[larray[i]]; 3815 } 3816 ierr = PetscFree(larray);CHKERRQ(ierr); 3817 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3818 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3819 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3820 3821 /* set coloring for off-diagonal portion */ 3822 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3823 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3824 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3825 for (i=0; i<a->B->cmap->n; i++) { 3826 colors[i] = coloring->colors[larray[i]]; 3827 } 3828 ierr = PetscFree(larray);CHKERRQ(ierr); 3829 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3830 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3831 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3832 } else { 3833 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3834 } 3835 3836 PetscFunctionReturn(0); 3837 } 3838 3839 #if defined(PETSC_HAVE_ADIC) 3840 #undef __FUNCT__ 3841 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3842 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3843 { 3844 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3845 PetscErrorCode ierr; 3846 3847 PetscFunctionBegin; 3848 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3849 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3850 PetscFunctionReturn(0); 3851 } 3852 #endif 3853 3854 #undef __FUNCT__ 3855 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3856 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3857 { 3858 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3859 PetscErrorCode ierr; 3860 3861 PetscFunctionBegin; 3862 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3863 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3864 PetscFunctionReturn(0); 3865 } 3866 3867 #undef __FUNCT__ 3868 #define __FUNCT__ "MatMerge" 3869 /*@ 3870 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3871 matrices from each processor 3872 3873 Collective on MPI_Comm 3874 3875 Input Parameters: 3876 + comm - the communicators the parallel matrix will live on 3877 . inmat - the input sequential matrices 3878 . n - number of local columns (or PETSC_DECIDE) 3879 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3880 3881 Output Parameter: 3882 . outmat - the parallel matrix generated 3883 3884 Level: advanced 3885 3886 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3887 3888 @*/ 3889 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3890 { 3891 PetscErrorCode ierr; 3892 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3893 PetscInt *indx; 3894 PetscScalar *values; 3895 3896 PetscFunctionBegin; 3897 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3898 if (scall == MAT_INITIAL_MATRIX){ 3899 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3900 if (n == PETSC_DECIDE){ 3901 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3902 } 3903 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3904 rstart -= m; 3905 3906 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3907 for (i=0;i<m;i++) { 3908 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3909 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3910 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3911 } 3912 /* This routine will ONLY return MPIAIJ type matrix */ 3913 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3914 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3915 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3916 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3917 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3918 3919 } else if (scall == MAT_REUSE_MATRIX){ 3920 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3921 } else { 3922 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3923 } 3924 3925 for (i=0;i<m;i++) { 3926 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3927 Ii = i + rstart; 3928 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3929 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3930 } 3931 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3932 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3933 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3934 3935 PetscFunctionReturn(0); 3936 } 3937 3938 #undef __FUNCT__ 3939 #define __FUNCT__ "MatFileSplit" 3940 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3941 { 3942 PetscErrorCode ierr; 3943 PetscMPIInt rank; 3944 PetscInt m,N,i,rstart,nnz; 3945 size_t len; 3946 const PetscInt *indx; 3947 PetscViewer out; 3948 char *name; 3949 Mat B; 3950 const PetscScalar *values; 3951 3952 PetscFunctionBegin; 3953 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3954 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3955 /* Should this be the type of the diagonal block of A? */ 3956 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3957 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3958 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3959 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3960 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3961 for (i=0;i<m;i++) { 3962 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3963 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3964 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3965 } 3966 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3967 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3968 3969 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3970 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3971 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3972 sprintf(name,"%s.%d",outfile,rank); 3973 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3974 ierr = PetscFree(name); 3975 ierr = MatView(B,out);CHKERRQ(ierr); 3976 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3977 ierr = MatDestroy(B);CHKERRQ(ierr); 3978 PetscFunctionReturn(0); 3979 } 3980 3981 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3982 #undef __FUNCT__ 3983 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3984 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3985 { 3986 PetscErrorCode ierr; 3987 Mat_Merge_SeqsToMPI *merge; 3988 PetscContainer container; 3989 3990 PetscFunctionBegin; 3991 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3992 if (container) { 3993 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3994 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3999 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4000 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4001 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4002 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4003 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4004 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4005 4006 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4007 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4008 } 4009 ierr = PetscFree(merge);CHKERRQ(ierr); 4010 4011 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4012 PetscFunctionReturn(0); 4013 } 4014 4015 #include "../src/mat/utils/freespace.h" 4016 #include "petscbt.h" 4017 4018 #undef __FUNCT__ 4019 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4020 /*@C 4021 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4022 matrices from each processor 4023 4024 Collective on MPI_Comm 4025 4026 Input Parameters: 4027 + comm - the communicators the parallel matrix will live on 4028 . seqmat - the input sequential matrices 4029 . m - number of local rows (or PETSC_DECIDE) 4030 . n - number of local columns (or PETSC_DECIDE) 4031 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4032 4033 Output Parameter: 4034 . mpimat - the parallel matrix generated 4035 4036 Level: advanced 4037 4038 Notes: 4039 The dimensions of the sequential matrix in each processor MUST be the same. 4040 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4041 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4042 @*/ 4043 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4044 { 4045 PetscErrorCode ierr; 4046 MPI_Comm comm=((PetscObject)mpimat)->comm; 4047 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4048 PetscMPIInt size,rank,taga,*len_s; 4049 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4050 PetscInt proc,m; 4051 PetscInt **buf_ri,**buf_rj; 4052 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4053 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4054 MPI_Request *s_waits,*r_waits; 4055 MPI_Status *status; 4056 MatScalar *aa=a->a; 4057 MatScalar **abuf_r,*ba_i; 4058 Mat_Merge_SeqsToMPI *merge; 4059 PetscContainer container; 4060 4061 PetscFunctionBegin; 4062 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4063 4064 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4065 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4066 4067 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4068 if (container) { 4069 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4070 } 4071 bi = merge->bi; 4072 bj = merge->bj; 4073 buf_ri = merge->buf_ri; 4074 buf_rj = merge->buf_rj; 4075 4076 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4077 owners = merge->rowmap->range; 4078 len_s = merge->len_s; 4079 4080 /* send and recv matrix values */ 4081 /*-----------------------------*/ 4082 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4083 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4084 4085 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4086 for (proc=0,k=0; proc<size; proc++){ 4087 if (!len_s[proc]) continue; 4088 i = owners[proc]; 4089 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4090 k++; 4091 } 4092 4093 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4094 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4095 ierr = PetscFree(status);CHKERRQ(ierr); 4096 4097 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4098 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4099 4100 /* insert mat values of mpimat */ 4101 /*----------------------------*/ 4102 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4103 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4104 4105 for (k=0; k<merge->nrecv; k++){ 4106 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4107 nrows = *(buf_ri_k[k]); 4108 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4109 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4110 } 4111 4112 /* set values of ba */ 4113 m = merge->rowmap->n; 4114 for (i=0; i<m; i++) { 4115 arow = owners[rank] + i; 4116 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4117 bnzi = bi[i+1] - bi[i]; 4118 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4119 4120 /* add local non-zero vals of this proc's seqmat into ba */ 4121 anzi = ai[arow+1] - ai[arow]; 4122 aj = a->j + ai[arow]; 4123 aa = a->a + ai[arow]; 4124 nextaj = 0; 4125 for (j=0; nextaj<anzi; j++){ 4126 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4127 ba_i[j] += aa[nextaj++]; 4128 } 4129 } 4130 4131 /* add received vals into ba */ 4132 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4133 /* i-th row */ 4134 if (i == *nextrow[k]) { 4135 anzi = *(nextai[k]+1) - *nextai[k]; 4136 aj = buf_rj[k] + *(nextai[k]); 4137 aa = abuf_r[k] + *(nextai[k]); 4138 nextaj = 0; 4139 for (j=0; nextaj<anzi; j++){ 4140 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4141 ba_i[j] += aa[nextaj++]; 4142 } 4143 } 4144 nextrow[k]++; nextai[k]++; 4145 } 4146 } 4147 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4148 } 4149 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4150 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4151 4152 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4153 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4154 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4155 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4156 PetscFunctionReturn(0); 4157 } 4158 4159 #undef __FUNCT__ 4160 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4161 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4162 { 4163 PetscErrorCode ierr; 4164 Mat B_mpi; 4165 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4166 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4167 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4168 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4169 PetscInt len,proc,*dnz,*onz; 4170 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4171 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4172 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4173 MPI_Status *status; 4174 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4175 PetscBT lnkbt; 4176 Mat_Merge_SeqsToMPI *merge; 4177 PetscContainer container; 4178 4179 PetscFunctionBegin; 4180 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4181 4182 /* make sure it is a PETSc comm */ 4183 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4184 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4185 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4186 4187 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4188 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4189 4190 /* determine row ownership */ 4191 /*---------------------------------------------------------*/ 4192 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4193 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4194 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4195 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4196 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4197 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4198 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4199 4200 m = merge->rowmap->n; 4201 M = merge->rowmap->N; 4202 owners = merge->rowmap->range; 4203 4204 /* determine the number of messages to send, their lengths */ 4205 /*---------------------------------------------------------*/ 4206 len_s = merge->len_s; 4207 4208 len = 0; /* length of buf_si[] */ 4209 merge->nsend = 0; 4210 for (proc=0; proc<size; proc++){ 4211 len_si[proc] = 0; 4212 if (proc == rank){ 4213 len_s[proc] = 0; 4214 } else { 4215 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4216 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4217 } 4218 if (len_s[proc]) { 4219 merge->nsend++; 4220 nrows = 0; 4221 for (i=owners[proc]; i<owners[proc+1]; i++){ 4222 if (ai[i+1] > ai[i]) nrows++; 4223 } 4224 len_si[proc] = 2*(nrows+1); 4225 len += len_si[proc]; 4226 } 4227 } 4228 4229 /* determine the number and length of messages to receive for ij-structure */ 4230 /*-------------------------------------------------------------------------*/ 4231 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4232 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4233 4234 /* post the Irecv of j-structure */ 4235 /*-------------------------------*/ 4236 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4237 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4238 4239 /* post the Isend of j-structure */ 4240 /*--------------------------------*/ 4241 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4242 4243 for (proc=0, k=0; proc<size; proc++){ 4244 if (!len_s[proc]) continue; 4245 i = owners[proc]; 4246 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4247 k++; 4248 } 4249 4250 /* receives and sends of j-structure are complete */ 4251 /*------------------------------------------------*/ 4252 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4253 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4254 4255 /* send and recv i-structure */ 4256 /*---------------------------*/ 4257 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4258 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4259 4260 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4261 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4262 for (proc=0,k=0; proc<size; proc++){ 4263 if (!len_s[proc]) continue; 4264 /* form outgoing message for i-structure: 4265 buf_si[0]: nrows to be sent 4266 [1:nrows]: row index (global) 4267 [nrows+1:2*nrows+1]: i-structure index 4268 */ 4269 /*-------------------------------------------*/ 4270 nrows = len_si[proc]/2 - 1; 4271 buf_si_i = buf_si + nrows+1; 4272 buf_si[0] = nrows; 4273 buf_si_i[0] = 0; 4274 nrows = 0; 4275 for (i=owners[proc]; i<owners[proc+1]; i++){ 4276 anzi = ai[i+1] - ai[i]; 4277 if (anzi) { 4278 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4279 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4280 nrows++; 4281 } 4282 } 4283 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4284 k++; 4285 buf_si += len_si[proc]; 4286 } 4287 4288 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4289 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4290 4291 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4292 for (i=0; i<merge->nrecv; i++){ 4293 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4294 } 4295 4296 ierr = PetscFree(len_si);CHKERRQ(ierr); 4297 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4298 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4299 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4300 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4301 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4302 ierr = PetscFree(status);CHKERRQ(ierr); 4303 4304 /* compute a local seq matrix in each processor */ 4305 /*----------------------------------------------*/ 4306 /* allocate bi array and free space for accumulating nonzero column info */ 4307 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4308 bi[0] = 0; 4309 4310 /* create and initialize a linked list */ 4311 nlnk = N+1; 4312 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4313 4314 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4315 len = 0; 4316 len = ai[owners[rank+1]] - ai[owners[rank]]; 4317 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4318 current_space = free_space; 4319 4320 /* determine symbolic info for each local row */ 4321 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4322 4323 for (k=0; k<merge->nrecv; k++){ 4324 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4325 nrows = *buf_ri_k[k]; 4326 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4327 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4328 } 4329 4330 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4331 len = 0; 4332 for (i=0;i<m;i++) { 4333 bnzi = 0; 4334 /* add local non-zero cols of this proc's seqmat into lnk */ 4335 arow = owners[rank] + i; 4336 anzi = ai[arow+1] - ai[arow]; 4337 aj = a->j + ai[arow]; 4338 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4339 bnzi += nlnk; 4340 /* add received col data into lnk */ 4341 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4342 if (i == *nextrow[k]) { /* i-th row */ 4343 anzi = *(nextai[k]+1) - *nextai[k]; 4344 aj = buf_rj[k] + *nextai[k]; 4345 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4346 bnzi += nlnk; 4347 nextrow[k]++; nextai[k]++; 4348 } 4349 } 4350 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4351 4352 /* if free space is not available, make more free space */ 4353 if (current_space->local_remaining<bnzi) { 4354 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4355 nspacedouble++; 4356 } 4357 /* copy data into free space, then initialize lnk */ 4358 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4359 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4360 4361 current_space->array += bnzi; 4362 current_space->local_used += bnzi; 4363 current_space->local_remaining -= bnzi; 4364 4365 bi[i+1] = bi[i] + bnzi; 4366 } 4367 4368 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4369 4370 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4371 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4372 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4373 4374 /* create symbolic parallel matrix B_mpi */ 4375 /*---------------------------------------*/ 4376 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4377 if (n==PETSC_DECIDE) { 4378 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4379 } else { 4380 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4381 } 4382 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4383 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4384 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4385 4386 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4387 B_mpi->assembled = PETSC_FALSE; 4388 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4389 merge->bi = bi; 4390 merge->bj = bj; 4391 merge->buf_ri = buf_ri; 4392 merge->buf_rj = buf_rj; 4393 merge->coi = PETSC_NULL; 4394 merge->coj = PETSC_NULL; 4395 merge->owners_co = PETSC_NULL; 4396 4397 /* attach the supporting struct to B_mpi for reuse */ 4398 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4399 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4400 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4401 *mpimat = B_mpi; 4402 4403 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4404 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4405 PetscFunctionReturn(0); 4406 } 4407 4408 #undef __FUNCT__ 4409 #define __FUNCT__ "MatMerge_SeqsToMPI" 4410 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4411 { 4412 PetscErrorCode ierr; 4413 4414 PetscFunctionBegin; 4415 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4416 if (scall == MAT_INITIAL_MATRIX){ 4417 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4418 } 4419 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4420 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4421 PetscFunctionReturn(0); 4422 } 4423 4424 #undef __FUNCT__ 4425 #define __FUNCT__ "MatGetLocalMat" 4426 /*@ 4427 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4428 4429 Not Collective 4430 4431 Input Parameters: 4432 + A - the matrix 4433 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4434 4435 Output Parameter: 4436 . A_loc - the local sequential matrix generated 4437 4438 Level: developer 4439 4440 @*/ 4441 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4442 { 4443 PetscErrorCode ierr; 4444 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4445 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4446 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4447 MatScalar *aa=a->a,*ba=b->a,*cam; 4448 PetscScalar *ca; 4449 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4450 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4451 4452 PetscFunctionBegin; 4453 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4454 if (scall == MAT_INITIAL_MATRIX){ 4455 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4456 ci[0] = 0; 4457 for (i=0; i<am; i++){ 4458 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4459 } 4460 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4461 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4462 k = 0; 4463 for (i=0; i<am; i++) { 4464 ncols_o = bi[i+1] - bi[i]; 4465 ncols_d = ai[i+1] - ai[i]; 4466 /* off-diagonal portion of A */ 4467 for (jo=0; jo<ncols_o; jo++) { 4468 col = cmap[*bj]; 4469 if (col >= cstart) break; 4470 cj[k] = col; bj++; 4471 ca[k++] = *ba++; 4472 } 4473 /* diagonal portion of A */ 4474 for (j=0; j<ncols_d; j++) { 4475 cj[k] = cstart + *aj++; 4476 ca[k++] = *aa++; 4477 } 4478 /* off-diagonal portion of A */ 4479 for (j=jo; j<ncols_o; j++) { 4480 cj[k] = cmap[*bj++]; 4481 ca[k++] = *ba++; 4482 } 4483 } 4484 /* put together the new matrix */ 4485 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4486 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4487 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4488 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4489 mat->free_a = PETSC_TRUE; 4490 mat->free_ij = PETSC_TRUE; 4491 mat->nonew = 0; 4492 } else if (scall == MAT_REUSE_MATRIX){ 4493 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4494 ci = mat->i; cj = mat->j; cam = mat->a; 4495 for (i=0; i<am; i++) { 4496 /* off-diagonal portion of A */ 4497 ncols_o = bi[i+1] - bi[i]; 4498 for (jo=0; jo<ncols_o; jo++) { 4499 col = cmap[*bj]; 4500 if (col >= cstart) break; 4501 *cam++ = *ba++; bj++; 4502 } 4503 /* diagonal portion of A */ 4504 ncols_d = ai[i+1] - ai[i]; 4505 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4506 /* off-diagonal portion of A */ 4507 for (j=jo; j<ncols_o; j++) { 4508 *cam++ = *ba++; bj++; 4509 } 4510 } 4511 } else { 4512 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4513 } 4514 4515 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4516 PetscFunctionReturn(0); 4517 } 4518 4519 #undef __FUNCT__ 4520 #define __FUNCT__ "MatGetLocalMatCondensed" 4521 /*@C 4522 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4523 4524 Not Collective 4525 4526 Input Parameters: 4527 + A - the matrix 4528 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4529 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4530 4531 Output Parameter: 4532 . A_loc - the local sequential matrix generated 4533 4534 Level: developer 4535 4536 @*/ 4537 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4538 { 4539 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4540 PetscErrorCode ierr; 4541 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4542 IS isrowa,iscola; 4543 Mat *aloc; 4544 4545 PetscFunctionBegin; 4546 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4547 if (!row){ 4548 start = A->rmap->rstart; end = A->rmap->rend; 4549 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4550 } else { 4551 isrowa = *row; 4552 } 4553 if (!col){ 4554 start = A->cmap->rstart; 4555 cmap = a->garray; 4556 nzA = a->A->cmap->n; 4557 nzB = a->B->cmap->n; 4558 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4559 ncols = 0; 4560 for (i=0; i<nzB; i++) { 4561 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4562 else break; 4563 } 4564 imark = i; 4565 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4566 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4567 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4568 ierr = PetscFree(idx);CHKERRQ(ierr); 4569 } else { 4570 iscola = *col; 4571 } 4572 if (scall != MAT_INITIAL_MATRIX){ 4573 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4574 aloc[0] = *A_loc; 4575 } 4576 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4577 *A_loc = aloc[0]; 4578 ierr = PetscFree(aloc);CHKERRQ(ierr); 4579 if (!row){ 4580 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4581 } 4582 if (!col){ 4583 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4584 } 4585 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4586 PetscFunctionReturn(0); 4587 } 4588 4589 #undef __FUNCT__ 4590 #define __FUNCT__ "MatGetBrowsOfAcols" 4591 /*@C 4592 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4593 4594 Collective on Mat 4595 4596 Input Parameters: 4597 + A,B - the matrices in mpiaij format 4598 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4599 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4600 4601 Output Parameter: 4602 + rowb, colb - index sets of rows and columns of B to extract 4603 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4604 - B_seq - the sequential matrix generated 4605 4606 Level: developer 4607 4608 @*/ 4609 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4610 { 4611 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4612 PetscErrorCode ierr; 4613 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4614 IS isrowb,iscolb; 4615 Mat *bseq; 4616 4617 PetscFunctionBegin; 4618 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4619 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4620 } 4621 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4622 4623 if (scall == MAT_INITIAL_MATRIX){ 4624 start = A->cmap->rstart; 4625 cmap = a->garray; 4626 nzA = a->A->cmap->n; 4627 nzB = a->B->cmap->n; 4628 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4629 ncols = 0; 4630 for (i=0; i<nzB; i++) { /* row < local row index */ 4631 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4632 else break; 4633 } 4634 imark = i; 4635 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4636 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4637 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4638 ierr = PetscFree(idx);CHKERRQ(ierr); 4639 *brstart = imark; 4640 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4641 } else { 4642 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4643 isrowb = *rowb; iscolb = *colb; 4644 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4645 bseq[0] = *B_seq; 4646 } 4647 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4648 *B_seq = bseq[0]; 4649 ierr = PetscFree(bseq);CHKERRQ(ierr); 4650 if (!rowb){ 4651 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4652 } else { 4653 *rowb = isrowb; 4654 } 4655 if (!colb){ 4656 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4657 } else { 4658 *colb = iscolb; 4659 } 4660 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4661 PetscFunctionReturn(0); 4662 } 4663 4664 #undef __FUNCT__ 4665 #define __FUNCT__ "MatGetBrowsOfAoCols" 4666 /*@C 4667 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4668 of the OFF-DIAGONAL portion of local A 4669 4670 Collective on Mat 4671 4672 Input Parameters: 4673 + A,B - the matrices in mpiaij format 4674 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4675 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4676 . startsj_r - similar to startsj for receives 4677 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4678 4679 Output Parameter: 4680 + B_oth - the sequential matrix generated 4681 4682 Level: developer 4683 4684 @*/ 4685 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4686 { 4687 VecScatter_MPI_General *gen_to,*gen_from; 4688 PetscErrorCode ierr; 4689 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4690 Mat_SeqAIJ *b_oth; 4691 VecScatter ctx=a->Mvctx; 4692 MPI_Comm comm=((PetscObject)ctx)->comm; 4693 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4694 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4695 PetscScalar *rvalues,*svalues; 4696 MatScalar *b_otha,*bufa,*bufA; 4697 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4698 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4699 MPI_Status *sstatus,rstatus; 4700 PetscMPIInt jj; 4701 PetscInt *cols,sbs,rbs; 4702 PetscScalar *vals; 4703 4704 PetscFunctionBegin; 4705 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4706 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4707 } 4708 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4709 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4710 4711 gen_to = (VecScatter_MPI_General*)ctx->todata; 4712 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4713 rvalues = gen_from->values; /* holds the length of receiving row */ 4714 svalues = gen_to->values; /* holds the length of sending row */ 4715 nrecvs = gen_from->n; 4716 nsends = gen_to->n; 4717 4718 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4719 srow = gen_to->indices; /* local row index to be sent */ 4720 sstarts = gen_to->starts; 4721 sprocs = gen_to->procs; 4722 sstatus = gen_to->sstatus; 4723 sbs = gen_to->bs; 4724 rstarts = gen_from->starts; 4725 rprocs = gen_from->procs; 4726 rbs = gen_from->bs; 4727 4728 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4729 if (scall == MAT_INITIAL_MATRIX){ 4730 /* i-array */ 4731 /*---------*/ 4732 /* post receives */ 4733 for (i=0; i<nrecvs; i++){ 4734 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4735 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4736 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4737 } 4738 4739 /* pack the outgoing message */ 4740 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4741 sstartsj[0] = 0; rstartsj[0] = 0; 4742 len = 0; /* total length of j or a array to be sent */ 4743 k = 0; 4744 for (i=0; i<nsends; i++){ 4745 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4746 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4747 for (j=0; j<nrows; j++) { 4748 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4749 for (l=0; l<sbs; l++){ 4750 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4751 rowlen[j*sbs+l] = ncols; 4752 len += ncols; 4753 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4754 } 4755 k++; 4756 } 4757 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4758 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4759 } 4760 /* recvs and sends of i-array are completed */ 4761 i = nrecvs; 4762 while (i--) { 4763 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4764 } 4765 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4766 4767 /* allocate buffers for sending j and a arrays */ 4768 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4769 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4770 4771 /* create i-array of B_oth */ 4772 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4773 b_othi[0] = 0; 4774 len = 0; /* total length of j or a array to be received */ 4775 k = 0; 4776 for (i=0; i<nrecvs; i++){ 4777 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4778 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4779 for (j=0; j<nrows; j++) { 4780 b_othi[k+1] = b_othi[k] + rowlen[j]; 4781 len += rowlen[j]; k++; 4782 } 4783 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4784 } 4785 4786 /* allocate space for j and a arrrays of B_oth */ 4787 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4788 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4789 4790 /* j-array */ 4791 /*---------*/ 4792 /* post receives of j-array */ 4793 for (i=0; i<nrecvs; i++){ 4794 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4795 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4796 } 4797 4798 /* pack the outgoing message j-array */ 4799 k = 0; 4800 for (i=0; i<nsends; i++){ 4801 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4802 bufJ = bufj+sstartsj[i]; 4803 for (j=0; j<nrows; j++) { 4804 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4805 for (ll=0; ll<sbs; ll++){ 4806 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4807 for (l=0; l<ncols; l++){ 4808 *bufJ++ = cols[l]; 4809 } 4810 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4811 } 4812 } 4813 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4814 } 4815 4816 /* recvs and sends of j-array are completed */ 4817 i = nrecvs; 4818 while (i--) { 4819 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4820 } 4821 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4822 } else if (scall == MAT_REUSE_MATRIX){ 4823 sstartsj = *startsj; 4824 rstartsj = *startsj_r; 4825 bufa = *bufa_ptr; 4826 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4827 b_otha = b_oth->a; 4828 } else { 4829 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4830 } 4831 4832 /* a-array */ 4833 /*---------*/ 4834 /* post receives of a-array */ 4835 for (i=0; i<nrecvs; i++){ 4836 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4837 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4838 } 4839 4840 /* pack the outgoing message a-array */ 4841 k = 0; 4842 for (i=0; i<nsends; i++){ 4843 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4844 bufA = bufa+sstartsj[i]; 4845 for (j=0; j<nrows; j++) { 4846 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4847 for (ll=0; ll<sbs; ll++){ 4848 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4849 for (l=0; l<ncols; l++){ 4850 *bufA++ = vals[l]; 4851 } 4852 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4853 } 4854 } 4855 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4856 } 4857 /* recvs and sends of a-array are completed */ 4858 i = nrecvs; 4859 while (i--) { 4860 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4861 } 4862 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4863 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4864 4865 if (scall == MAT_INITIAL_MATRIX){ 4866 /* put together the new matrix */ 4867 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4868 4869 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4870 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4871 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4872 b_oth->free_a = PETSC_TRUE; 4873 b_oth->free_ij = PETSC_TRUE; 4874 b_oth->nonew = 0; 4875 4876 ierr = PetscFree(bufj);CHKERRQ(ierr); 4877 if (!startsj || !bufa_ptr){ 4878 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4879 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4880 } else { 4881 *startsj = sstartsj; 4882 *startsj_r = rstartsj; 4883 *bufa_ptr = bufa; 4884 } 4885 } 4886 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4887 PetscFunctionReturn(0); 4888 } 4889 4890 #undef __FUNCT__ 4891 #define __FUNCT__ "MatGetCommunicationStructs" 4892 /*@C 4893 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4894 4895 Not Collective 4896 4897 Input Parameters: 4898 . A - The matrix in mpiaij format 4899 4900 Output Parameter: 4901 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4902 . colmap - A map from global column index to local index into lvec 4903 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4904 4905 Level: developer 4906 4907 @*/ 4908 #if defined (PETSC_USE_CTABLE) 4909 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4910 #else 4911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4912 #endif 4913 { 4914 Mat_MPIAIJ *a; 4915 4916 PetscFunctionBegin; 4917 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4918 PetscValidPointer(lvec, 2) 4919 PetscValidPointer(colmap, 3) 4920 PetscValidPointer(multScatter, 4) 4921 a = (Mat_MPIAIJ *) A->data; 4922 if (lvec) *lvec = a->lvec; 4923 if (colmap) *colmap = a->colmap; 4924 if (multScatter) *multScatter = a->Mvctx; 4925 PetscFunctionReturn(0); 4926 } 4927 4928 EXTERN_C_BEGIN 4929 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4930 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4931 EXTERN_C_END 4932 4933 #undef __FUNCT__ 4934 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4935 /* 4936 Computes (B'*A')' since computing B*A directly is untenable 4937 4938 n p p 4939 ( ) ( ) ( ) 4940 m ( A ) * n ( B ) = m ( C ) 4941 ( ) ( ) ( ) 4942 4943 */ 4944 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4945 { 4946 PetscErrorCode ierr; 4947 Mat At,Bt,Ct; 4948 4949 PetscFunctionBegin; 4950 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4951 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4952 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4953 ierr = MatDestroy(At);CHKERRQ(ierr); 4954 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4955 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4956 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4957 PetscFunctionReturn(0); 4958 } 4959 4960 #undef __FUNCT__ 4961 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4962 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4963 { 4964 PetscErrorCode ierr; 4965 PetscInt m=A->rmap->n,n=B->cmap->n; 4966 Mat Cmat; 4967 4968 PetscFunctionBegin; 4969 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4970 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4971 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4972 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4973 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4974 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4975 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4976 *C = Cmat; 4977 PetscFunctionReturn(0); 4978 } 4979 4980 /* ----------------------------------------------------------------*/ 4981 #undef __FUNCT__ 4982 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4983 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4984 { 4985 PetscErrorCode ierr; 4986 4987 PetscFunctionBegin; 4988 if (scall == MAT_INITIAL_MATRIX){ 4989 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4990 } 4991 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4992 PetscFunctionReturn(0); 4993 } 4994 4995 EXTERN_C_BEGIN 4996 #if defined(PETSC_HAVE_MUMPS) 4997 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4998 #endif 4999 #if defined(PETSC_HAVE_PASTIX) 5000 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5001 #endif 5002 #if defined(PETSC_HAVE_SUPERLU_DIST) 5003 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5004 #endif 5005 #if defined(PETSC_HAVE_SPOOLES) 5006 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5007 #endif 5008 EXTERN_C_END 5009 5010 /*MC 5011 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5012 5013 Options Database Keys: 5014 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5015 5016 Level: beginner 5017 5018 .seealso: MatCreateMPIAIJ() 5019 M*/ 5020 5021 EXTERN_C_BEGIN 5022 #undef __FUNCT__ 5023 #define __FUNCT__ "MatCreate_MPIAIJ" 5024 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5025 { 5026 Mat_MPIAIJ *b; 5027 PetscErrorCode ierr; 5028 PetscMPIInt size; 5029 5030 PetscFunctionBegin; 5031 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5032 5033 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5034 B->data = (void*)b; 5035 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5036 B->rmap->bs = 1; 5037 B->assembled = PETSC_FALSE; 5038 B->mapping = 0; 5039 5040 B->insertmode = NOT_SET_VALUES; 5041 b->size = size; 5042 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5043 5044 /* build cache for off array entries formed */ 5045 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5046 b->donotstash = PETSC_FALSE; 5047 b->colmap = 0; 5048 b->garray = 0; 5049 b->roworiented = PETSC_TRUE; 5050 5051 /* stuff used for matrix vector multiply */ 5052 b->lvec = PETSC_NULL; 5053 b->Mvctx = PETSC_NULL; 5054 5055 /* stuff for MatGetRow() */ 5056 b->rowindices = 0; 5057 b->rowvalues = 0; 5058 b->getrowactive = PETSC_FALSE; 5059 5060 #if defined(PETSC_HAVE_SPOOLES) 5061 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5062 "MatGetFactor_mpiaij_spooles", 5063 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5064 #endif 5065 #if defined(PETSC_HAVE_MUMPS) 5066 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5067 "MatGetFactor_mpiaij_mumps", 5068 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5069 #endif 5070 #if defined(PETSC_HAVE_PASTIX) 5071 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5072 "MatGetFactor_mpiaij_pastix", 5073 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5074 #endif 5075 #if defined(PETSC_HAVE_SUPERLU_DIST) 5076 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5077 "MatGetFactor_mpiaij_superlu_dist", 5078 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5079 #endif 5080 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5081 "MatStoreValues_MPIAIJ", 5082 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5083 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5084 "MatRetrieveValues_MPIAIJ", 5085 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5086 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5087 "MatGetDiagonalBlock_MPIAIJ", 5088 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5089 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5090 "MatIsTranspose_MPIAIJ", 5091 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5092 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5093 "MatMPIAIJSetPreallocation_MPIAIJ", 5094 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5095 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5096 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5097 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5098 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5099 "MatDiagonalScaleLocal_MPIAIJ", 5100 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5101 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5102 "MatConvert_MPIAIJ_MPICSRPERM", 5103 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5104 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5105 "MatConvert_MPIAIJ_MPICRL", 5106 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5108 "MatConvert_MPIAIJ_MPISBAIJ", 5109 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5110 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5111 "MatMatMult_MPIDense_MPIAIJ", 5112 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5113 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5114 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5115 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5116 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5117 "MatMatMultNumeric_MPIDense_MPIAIJ", 5118 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5119 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5120 PetscFunctionReturn(0); 5121 } 5122 EXTERN_C_END 5123 5124 #undef __FUNCT__ 5125 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5126 /*@ 5127 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5128 and "off-diagonal" part of the matrix in CSR format. 5129 5130 Collective on MPI_Comm 5131 5132 Input Parameters: 5133 + comm - MPI communicator 5134 . m - number of local rows (Cannot be PETSC_DECIDE) 5135 . n - This value should be the same as the local size used in creating the 5136 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5137 calculated if N is given) For square matrices n is almost always m. 5138 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5139 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5140 . i - row indices for "diagonal" portion of matrix 5141 . j - column indices 5142 . a - matrix values 5143 . oi - row indices for "off-diagonal" portion of matrix 5144 . oj - column indices 5145 - oa - matrix values 5146 5147 Output Parameter: 5148 . mat - the matrix 5149 5150 Level: advanced 5151 5152 Notes: 5153 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5154 5155 The i and j indices are 0 based 5156 5157 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5158 5159 This sets local rows and cannot be used to set off-processor values. 5160 5161 You cannot later use MatSetValues() to change values in this matrix. 5162 5163 .keywords: matrix, aij, compressed row, sparse, parallel 5164 5165 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5166 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5167 @*/ 5168 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5169 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5170 { 5171 PetscErrorCode ierr; 5172 Mat_MPIAIJ *maij; 5173 5174 PetscFunctionBegin; 5175 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5176 if (i[0]) { 5177 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5178 } 5179 if (oi[0]) { 5180 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5181 } 5182 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5183 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5184 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5185 maij = (Mat_MPIAIJ*) (*mat)->data; 5186 maij->donotstash = PETSC_TRUE; 5187 (*mat)->preallocated = PETSC_TRUE; 5188 5189 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5190 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5191 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5192 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5193 5194 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5195 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5196 5197 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5198 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5199 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5200 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5201 5202 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5203 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5204 PetscFunctionReturn(0); 5205 } 5206 5207 /* 5208 Special version for direct calls from Fortran 5209 */ 5210 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5211 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5212 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5213 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5214 #endif 5215 5216 /* Change these macros so can be used in void function */ 5217 #undef CHKERRQ 5218 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5219 #undef SETERRQ2 5220 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5221 #undef SETERRQ 5222 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5223 5224 EXTERN_C_BEGIN 5225 #undef __FUNCT__ 5226 #define __FUNCT__ "matsetvaluesmpiaij_" 5227 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5228 { 5229 Mat mat = *mmat; 5230 PetscInt m = *mm, n = *mn; 5231 InsertMode addv = *maddv; 5232 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5233 PetscScalar value; 5234 PetscErrorCode ierr; 5235 5236 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5237 if (mat->insertmode == NOT_SET_VALUES) { 5238 mat->insertmode = addv; 5239 } 5240 #if defined(PETSC_USE_DEBUG) 5241 else if (mat->insertmode != addv) { 5242 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5243 } 5244 #endif 5245 { 5246 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5247 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5248 PetscTruth roworiented = aij->roworiented; 5249 5250 /* Some Variables required in the macro */ 5251 Mat A = aij->A; 5252 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5253 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5254 MatScalar *aa = a->a; 5255 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5256 Mat B = aij->B; 5257 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5258 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5259 MatScalar *ba = b->a; 5260 5261 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5262 PetscInt nonew = a->nonew; 5263 MatScalar *ap1,*ap2; 5264 5265 PetscFunctionBegin; 5266 for (i=0; i<m; i++) { 5267 if (im[i] < 0) continue; 5268 #if defined(PETSC_USE_DEBUG) 5269 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5270 #endif 5271 if (im[i] >= rstart && im[i] < rend) { 5272 row = im[i] - rstart; 5273 lastcol1 = -1; 5274 rp1 = aj + ai[row]; 5275 ap1 = aa + ai[row]; 5276 rmax1 = aimax[row]; 5277 nrow1 = ailen[row]; 5278 low1 = 0; 5279 high1 = nrow1; 5280 lastcol2 = -1; 5281 rp2 = bj + bi[row]; 5282 ap2 = ba + bi[row]; 5283 rmax2 = bimax[row]; 5284 nrow2 = bilen[row]; 5285 low2 = 0; 5286 high2 = nrow2; 5287 5288 for (j=0; j<n; j++) { 5289 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5290 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5291 if (in[j] >= cstart && in[j] < cend){ 5292 col = in[j] - cstart; 5293 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5294 } else if (in[j] < 0) continue; 5295 #if defined(PETSC_USE_DEBUG) 5296 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5297 #endif 5298 else { 5299 if (mat->was_assembled) { 5300 if (!aij->colmap) { 5301 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5302 } 5303 #if defined (PETSC_USE_CTABLE) 5304 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5305 col--; 5306 #else 5307 col = aij->colmap[in[j]] - 1; 5308 #endif 5309 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5310 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5311 col = in[j]; 5312 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5313 B = aij->B; 5314 b = (Mat_SeqAIJ*)B->data; 5315 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5316 rp2 = bj + bi[row]; 5317 ap2 = ba + bi[row]; 5318 rmax2 = bimax[row]; 5319 nrow2 = bilen[row]; 5320 low2 = 0; 5321 high2 = nrow2; 5322 bm = aij->B->rmap->n; 5323 ba = b->a; 5324 } 5325 } else col = in[j]; 5326 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5327 } 5328 } 5329 } else { 5330 if (!aij->donotstash) { 5331 if (roworiented) { 5332 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5333 } else { 5334 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5335 } 5336 } 5337 } 5338 }} 5339 PetscFunctionReturnVoid(); 5340 } 5341 EXTERN_C_END 5342 5343