1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 if (v) PetscValidScalarPointer(v,6); 331 for (i=0; i<m; i++) { 332 if (im[i] < 0) continue; 333 #if defined(PETSC_USE_DEBUG) 334 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 335 #endif 336 if (im[i] >= rstart && im[i] < rend) { 337 row = im[i] - rstart; 338 lastcol1 = -1; 339 rp1 = aj + ai[row]; 340 ap1 = aa + ai[row]; 341 rmax1 = aimax[row]; 342 nrow1 = ailen[row]; 343 low1 = 0; 344 high1 = nrow1; 345 lastcol2 = -1; 346 rp2 = bj + bi[row]; 347 ap2 = ba + bi[row]; 348 rmax2 = bimax[row]; 349 nrow2 = bilen[row]; 350 low2 = 0; 351 high2 = nrow2; 352 353 for (j=0; j<n; j++) { 354 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 355 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 356 if (in[j] >= cstart && in[j] < cend){ 357 col = in[j] - cstart; 358 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 359 } else if (in[j] < 0) continue; 360 #if defined(PETSC_USE_DEBUG) 361 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 362 #endif 363 else { 364 if (mat->was_assembled) { 365 if (!aij->colmap) { 366 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 367 } 368 #if defined (PETSC_USE_CTABLE) 369 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 370 col--; 371 #else 372 col = aij->colmap[in[j]] - 1; 373 #endif 374 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 375 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 376 col = in[j]; 377 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 378 B = aij->B; 379 b = (Mat_SeqAIJ*)B->data; 380 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 381 rp2 = bj + bi[row]; 382 ap2 = ba + bi[row]; 383 rmax2 = bimax[row]; 384 nrow2 = bilen[row]; 385 low2 = 0; 386 high2 = nrow2; 387 bm = aij->B->rmap->n; 388 ba = b->a; 389 } 390 } else col = in[j]; 391 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 392 } 393 } 394 } else { 395 if (!aij->donotstash) { 396 if (roworiented) { 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } else { 399 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 400 } 401 } 402 } 403 } 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatGetValues_MPIAIJ" 409 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 414 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 415 416 PetscFunctionBegin; 417 for (i=0; i<m; i++) { 418 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 419 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 420 if (idxm[i] >= rstart && idxm[i] < rend) { 421 row = idxm[i] - rstart; 422 for (j=0; j<n; j++) { 423 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 424 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 425 if (idxn[j] >= cstart && idxn[j] < cend){ 426 col = idxn[j] - cstart; 427 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 428 } else { 429 if (!aij->colmap) { 430 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 431 } 432 #if defined (PETSC_USE_CTABLE) 433 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 434 col --; 435 #else 436 col = aij->colmap[idxn[j]] - 1; 437 #endif 438 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 439 else { 440 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 441 } 442 } 443 } 444 } else { 445 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 446 } 447 } 448 PetscFunctionReturn(0); 449 } 450 451 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 455 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 456 { 457 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 458 PetscErrorCode ierr; 459 PetscInt nstash,reallocs; 460 InsertMode addv; 461 462 PetscFunctionBegin; 463 if (aij->donotstash) { 464 PetscFunctionReturn(0); 465 } 466 467 /* make sure all processors are either in INSERTMODE or ADDMODE */ 468 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 469 if (addv == (ADD_VALUES|INSERT_VALUES)) { 470 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 471 } 472 mat->insertmode = addv; /* in case this processor had no cache */ 473 474 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 475 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 476 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 477 PetscFunctionReturn(0); 478 } 479 480 #undef __FUNCT__ 481 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 482 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 483 { 484 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 485 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 486 PetscErrorCode ierr; 487 PetscMPIInt n; 488 PetscInt i,j,rstart,ncols,flg; 489 PetscInt *row,*col; 490 PetscTruth other_disassembled; 491 PetscScalar *val; 492 InsertMode addv = mat->insertmode; 493 494 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 495 PetscFunctionBegin; 496 if (!aij->donotstash) { 497 while (1) { 498 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 499 if (!flg) break; 500 501 for (i=0; i<n;) { 502 /* Now identify the consecutive vals belonging to the same row */ 503 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 504 if (j < n) ncols = j-i; 505 else ncols = n-i; 506 /* Now assemble all these values with a single function call */ 507 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 508 i = j; 509 } 510 } 511 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 512 } 513 a->compressedrow.use = PETSC_FALSE; 514 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 515 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 516 517 /* determine if any processor has disassembled, if so we must 518 also disassemble ourselfs, in order that we may reassemble. */ 519 /* 520 if nonzero structure of submatrix B cannot change then we know that 521 no processor disassembled thus we can skip this stuff 522 */ 523 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 524 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 525 if (mat->was_assembled && !other_disassembled) { 526 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 527 } 528 } 529 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 530 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 531 } 532 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 533 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 534 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 535 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 536 537 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 538 aij->rowvalues = 0; 539 540 /* used by MatAXPY() */ 541 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 542 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 543 544 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 545 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 546 PetscFunctionReturn(0); 547 } 548 549 #undef __FUNCT__ 550 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 551 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 552 { 553 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 554 PetscErrorCode ierr; 555 556 PetscFunctionBegin; 557 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 558 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 559 PetscFunctionReturn(0); 560 } 561 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatZeroRows_MPIAIJ" 564 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 565 { 566 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 567 PetscErrorCode ierr; 568 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 569 PetscInt i,*owners = A->rmap->range; 570 PetscInt *nprocs,j,idx,nsends,row; 571 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 572 PetscInt *rvalues,count,base,slen,*source; 573 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 574 MPI_Comm comm = ((PetscObject)A)->comm; 575 MPI_Request *send_waits,*recv_waits; 576 MPI_Status recv_status,*send_status; 577 #if defined(PETSC_DEBUG) 578 PetscTruth found = PETSC_FALSE; 579 #endif 580 581 PetscFunctionBegin; 582 /* first count number of contributors to each processor */ 583 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 584 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 585 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 586 j = 0; 587 for (i=0; i<N; i++) { 588 if (lastidx > (idx = rows[i])) j = 0; 589 lastidx = idx; 590 for (; j<size; j++) { 591 if (idx >= owners[j] && idx < owners[j+1]) { 592 nprocs[2*j]++; 593 nprocs[2*j+1] = 1; 594 owner[i] = j; 595 #if defined(PETSC_DEBUG) 596 found = PETSC_TRUE; 597 #endif 598 break; 599 } 600 } 601 #if defined(PETSC_DEBUG) 602 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 603 found = PETSC_FALSE; 604 #endif 605 } 606 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 607 608 /* inform other processors of number of messages and max length*/ 609 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 610 611 /* post receives: */ 612 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 613 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 614 for (i=0; i<nrecvs; i++) { 615 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 616 } 617 618 /* do sends: 619 1) starts[i] gives the starting index in svalues for stuff going to 620 the ith processor 621 */ 622 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 623 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 624 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 625 starts[0] = 0; 626 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 627 for (i=0; i<N; i++) { 628 svalues[starts[owner[i]]++] = rows[i]; 629 } 630 631 starts[0] = 0; 632 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 633 count = 0; 634 for (i=0; i<size; i++) { 635 if (nprocs[2*i+1]) { 636 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 637 } 638 } 639 ierr = PetscFree(starts);CHKERRQ(ierr); 640 641 base = owners[rank]; 642 643 /* wait on receives */ 644 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 645 count = nrecvs; slen = 0; 646 while (count) { 647 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 648 /* unpack receives into our local space */ 649 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 650 source[imdex] = recv_status.MPI_SOURCE; 651 lens[imdex] = n; 652 slen += n; 653 count--; 654 } 655 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 656 657 /* move the data into the send scatter */ 658 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 659 count = 0; 660 for (i=0; i<nrecvs; i++) { 661 values = rvalues + i*nmax; 662 for (j=0; j<lens[i]; j++) { 663 lrows[count++] = values[j] - base; 664 } 665 } 666 ierr = PetscFree(rvalues);CHKERRQ(ierr); 667 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 668 ierr = PetscFree(owner);CHKERRQ(ierr); 669 ierr = PetscFree(nprocs);CHKERRQ(ierr); 670 671 /* actually zap the local rows */ 672 /* 673 Zero the required rows. If the "diagonal block" of the matrix 674 is square and the user wishes to set the diagonal we use separate 675 code so that MatSetValues() is not called for each diagonal allocating 676 new memory, thus calling lots of mallocs and slowing things down. 677 678 */ 679 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 680 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 681 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 682 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 683 } else if (diag != 0.0) { 684 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 685 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 686 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 687 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 688 } 689 for (i = 0; i < slen; i++) { 690 row = lrows[i] + rstart; 691 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 692 } 693 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 695 } else { 696 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 697 } 698 ierr = PetscFree(lrows);CHKERRQ(ierr); 699 700 /* wait on sends */ 701 if (nsends) { 702 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 703 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 704 ierr = PetscFree(send_status);CHKERRQ(ierr); 705 } 706 ierr = PetscFree(send_waits);CHKERRQ(ierr); 707 ierr = PetscFree(svalues);CHKERRQ(ierr); 708 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatMult_MPIAIJ" 714 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 715 { 716 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscInt nt; 719 720 PetscFunctionBegin; 721 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 722 if (nt != A->cmap->n) { 723 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 724 } 725 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 726 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 727 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 728 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 734 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 735 { 736 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 737 PetscErrorCode ierr; 738 739 PetscFunctionBegin; 740 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatMultAdd_MPIAIJ" 746 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 747 { 748 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 753 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 754 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 755 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 756 PetscFunctionReturn(0); 757 } 758 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 761 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 762 { 763 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 764 PetscErrorCode ierr; 765 PetscTruth merged; 766 767 PetscFunctionBegin; 768 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 769 /* do nondiagonal part */ 770 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 771 if (!merged) { 772 /* send it on its way */ 773 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 774 /* do local part */ 775 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 776 /* receive remote parts: note this assumes the values are not actually */ 777 /* added in yy until the next line, */ 778 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 779 } else { 780 /* do local part */ 781 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 782 /* send it on its way */ 783 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 784 /* values actually were received in the Begin() but we need to call this nop */ 785 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 786 } 787 PetscFunctionReturn(0); 788 } 789 790 EXTERN_C_BEGIN 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 793 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 794 { 795 MPI_Comm comm; 796 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 797 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 798 IS Me,Notme; 799 PetscErrorCode ierr; 800 PetscInt M,N,first,last,*notme,i; 801 PetscMPIInt size; 802 803 PetscFunctionBegin; 804 805 /* Easy test: symmetric diagonal block */ 806 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 807 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 808 if (!*f) PetscFunctionReturn(0); 809 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 810 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 811 if (size == 1) PetscFunctionReturn(0); 812 813 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 814 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 815 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 816 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 817 for (i=0; i<first; i++) notme[i] = i; 818 for (i=last; i<M; i++) notme[i-last+first] = i; 819 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 820 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 821 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 822 Aoff = Aoffs[0]; 823 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 824 Boff = Boffs[0]; 825 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 826 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 827 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 828 ierr = ISDestroy(Me);CHKERRQ(ierr); 829 ierr = ISDestroy(Notme);CHKERRQ(ierr); 830 831 PetscFunctionReturn(0); 832 } 833 EXTERN_C_END 834 835 #undef __FUNCT__ 836 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 837 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 838 { 839 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 840 PetscErrorCode ierr; 841 842 PetscFunctionBegin; 843 /* do nondiagonal part */ 844 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 845 /* send it on its way */ 846 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 847 /* do local part */ 848 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 849 /* receive remote parts */ 850 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 851 PetscFunctionReturn(0); 852 } 853 854 /* 855 This only works correctly for square matrices where the subblock A->A is the 856 diagonal block 857 */ 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 860 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 861 { 862 PetscErrorCode ierr; 863 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 864 865 PetscFunctionBegin; 866 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 867 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 868 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 869 } 870 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatScale_MPIAIJ" 876 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 877 { 878 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 883 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatDestroy_MPIAIJ" 889 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 890 { 891 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 892 PetscErrorCode ierr; 893 894 PetscFunctionBegin; 895 #if defined(PETSC_USE_LOG) 896 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 897 #endif 898 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 899 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 900 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 901 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 902 #if defined (PETSC_USE_CTABLE) 903 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 904 #else 905 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 906 #endif 907 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 908 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 909 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 910 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 911 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 912 ierr = PetscFree(aij);CHKERRQ(ierr); 913 914 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 922 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 923 PetscFunctionReturn(0); 924 } 925 926 #undef __FUNCT__ 927 #define __FUNCT__ "MatView_MPIAIJ_Binary" 928 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 929 { 930 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 931 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 932 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 933 PetscErrorCode ierr; 934 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 935 int fd; 936 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 937 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 938 PetscScalar *column_values; 939 940 PetscFunctionBegin; 941 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 942 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 943 nz = A->nz + B->nz; 944 if (!rank) { 945 header[0] = MAT_FILE_COOKIE; 946 header[1] = mat->rmap->N; 947 header[2] = mat->cmap->N; 948 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 949 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 950 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 951 /* get largest number of rows any processor has */ 952 rlen = mat->rmap->n; 953 range = mat->rmap->range; 954 for (i=1; i<size; i++) { 955 rlen = PetscMax(rlen,range[i+1] - range[i]); 956 } 957 } else { 958 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 959 rlen = mat->rmap->n; 960 } 961 962 /* load up the local row counts */ 963 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 964 for (i=0; i<mat->rmap->n; i++) { 965 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 966 } 967 968 /* store the row lengths to the file */ 969 if (!rank) { 970 MPI_Status status; 971 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 972 for (i=1; i<size; i++) { 973 rlen = range[i+1] - range[i]; 974 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 975 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 976 } 977 } else { 978 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 979 } 980 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 981 982 /* load up the local column indices */ 983 nzmax = nz; /* )th processor needs space a largest processor needs */ 984 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 985 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 986 cnt = 0; 987 for (i=0; i<mat->rmap->n; i++) { 988 for (j=B->i[i]; j<B->i[i+1]; j++) { 989 if ( (col = garray[B->j[j]]) > cstart) break; 990 column_indices[cnt++] = col; 991 } 992 for (k=A->i[i]; k<A->i[i+1]; k++) { 993 column_indices[cnt++] = A->j[k] + cstart; 994 } 995 for (; j<B->i[i+1]; j++) { 996 column_indices[cnt++] = garray[B->j[j]]; 997 } 998 } 999 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1000 1001 /* store the column indices to the file */ 1002 if (!rank) { 1003 MPI_Status status; 1004 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1005 for (i=1; i<size; i++) { 1006 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1007 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1008 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1009 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1010 } 1011 } else { 1012 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1013 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1014 } 1015 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1016 1017 /* load up the local column values */ 1018 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1019 cnt = 0; 1020 for (i=0; i<mat->rmap->n; i++) { 1021 for (j=B->i[i]; j<B->i[i+1]; j++) { 1022 if ( garray[B->j[j]] > cstart) break; 1023 column_values[cnt++] = B->a[j]; 1024 } 1025 for (k=A->i[i]; k<A->i[i+1]; k++) { 1026 column_values[cnt++] = A->a[k]; 1027 } 1028 for (; j<B->i[i+1]; j++) { 1029 column_values[cnt++] = B->a[j]; 1030 } 1031 } 1032 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1033 1034 /* store the column values to the file */ 1035 if (!rank) { 1036 MPI_Status status; 1037 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1038 for (i=1; i<size; i++) { 1039 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1040 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1041 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1042 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1043 } 1044 } else { 1045 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1046 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1047 } 1048 ierr = PetscFree(column_values);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1054 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1055 { 1056 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1057 PetscErrorCode ierr; 1058 PetscMPIInt rank = aij->rank,size = aij->size; 1059 PetscTruth isdraw,iascii,isbinary; 1060 PetscViewer sviewer; 1061 PetscViewerFormat format; 1062 1063 PetscFunctionBegin; 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1065 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1066 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1067 if (iascii) { 1068 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1069 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1070 MatInfo info; 1071 PetscTruth inodes; 1072 1073 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1074 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1075 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1076 if (!inodes) { 1077 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1078 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1079 } else { 1080 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1081 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1082 } 1083 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1084 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1085 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1086 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1087 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1089 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1092 PetscInt inodecount,inodelimit,*inodes; 1093 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1094 if (inodes) { 1095 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1096 } else { 1097 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1098 } 1099 PetscFunctionReturn(0); 1100 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1101 PetscFunctionReturn(0); 1102 } 1103 } else if (isbinary) { 1104 if (size == 1) { 1105 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1106 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1107 } else { 1108 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1109 } 1110 PetscFunctionReturn(0); 1111 } else if (isdraw) { 1112 PetscDraw draw; 1113 PetscTruth isnull; 1114 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1115 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1116 } 1117 1118 if (size == 1) { 1119 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1120 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1121 } else { 1122 /* assemble the entire matrix onto first processor. */ 1123 Mat A; 1124 Mat_SeqAIJ *Aloc; 1125 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1126 MatScalar *a; 1127 1128 if (mat->rmap->N > 1024) { 1129 PetscTruth flg = PETSC_FALSE; 1130 1131 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1132 if (!flg) { 1133 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1134 } 1135 } 1136 1137 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1138 if (!rank) { 1139 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1140 } else { 1141 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1142 } 1143 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1144 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1145 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1146 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1147 1148 /* copy over the A part */ 1149 Aloc = (Mat_SeqAIJ*)aij->A->data; 1150 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1151 row = mat->rmap->rstart; 1152 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1153 for (i=0; i<m; i++) { 1154 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1155 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1156 } 1157 aj = Aloc->j; 1158 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1159 1160 /* copy over the B part */ 1161 Aloc = (Mat_SeqAIJ*)aij->B->data; 1162 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1163 row = mat->rmap->rstart; 1164 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1165 ct = cols; 1166 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1167 for (i=0; i<m; i++) { 1168 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1169 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1170 } 1171 ierr = PetscFree(ct);CHKERRQ(ierr); 1172 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1174 /* 1175 Everyone has to call to draw the matrix since the graphics waits are 1176 synchronized across all processors that share the PetscDraw object 1177 */ 1178 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1179 if (!rank) { 1180 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1181 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1182 } 1183 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1184 ierr = MatDestroy(A);CHKERRQ(ierr); 1185 } 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "MatView_MPIAIJ" 1191 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1192 { 1193 PetscErrorCode ierr; 1194 PetscTruth iascii,isdraw,issocket,isbinary; 1195 1196 PetscFunctionBegin; 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1200 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1201 if (iascii || isdraw || isbinary || issocket) { 1202 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1203 } else { 1204 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1205 } 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatSOR_MPIAIJ" 1211 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1212 { 1213 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1214 PetscErrorCode ierr; 1215 Vec bb1 = 0; 1216 PetscTruth hasop; 1217 1218 PetscFunctionBegin; 1219 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1220 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1221 } 1222 1223 if (flag == SOR_APPLY_UPPER) { 1224 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1229 if (flag & SOR_ZERO_INITIAL_GUESS) { 1230 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1231 its--; 1232 } 1233 1234 while (its--) { 1235 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1236 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1237 1238 /* update rhs: bb1 = bb - B*x */ 1239 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1240 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1241 1242 /* local sweep */ 1243 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1244 } 1245 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1246 if (flag & SOR_ZERO_INITIAL_GUESS) { 1247 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1248 its--; 1249 } 1250 while (its--) { 1251 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1253 1254 /* update rhs: bb1 = bb - B*x */ 1255 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1256 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1257 1258 /* local sweep */ 1259 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1260 } 1261 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1262 if (flag & SOR_ZERO_INITIAL_GUESS) { 1263 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1264 its--; 1265 } 1266 while (its--) { 1267 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 1270 /* update rhs: bb1 = bb - B*x */ 1271 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1272 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1273 1274 /* local sweep */ 1275 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1276 } 1277 } else if (flag & SOR_EISENSTAT) { 1278 Vec xx1; 1279 1280 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1281 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1282 1283 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1285 if (!mat->diag) { 1286 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1287 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1288 } 1289 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1290 if (hasop) { 1291 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1292 } else { 1293 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1294 } 1295 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1296 1297 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1298 1299 /* local sweep */ 1300 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1301 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1302 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1303 } else { 1304 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1305 } 1306 1307 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatPermute_MPIAIJ" 1313 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1314 { 1315 MPI_Comm comm,pcomm; 1316 PetscInt first,local_size,nrows; 1317 const PetscInt *rows; 1318 PetscMPIInt size; 1319 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1324 /* make a collective version of 'rowp' */ 1325 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1326 if (pcomm==comm) { 1327 crowp = rowp; 1328 } else { 1329 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1330 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1331 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1332 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1333 } 1334 /* collect the global row permutation and invert it */ 1335 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1336 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1337 if (pcomm!=comm) { 1338 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1339 } 1340 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1341 /* get the local target indices */ 1342 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1343 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1344 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1345 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1346 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1347 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1348 /* the column permutation is so much easier; 1349 make a local version of 'colp' and invert it */ 1350 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1351 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1352 if (size==1) { 1353 lcolp = colp; 1354 } else { 1355 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1356 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1357 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1358 } 1359 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1360 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1361 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1362 if (size>1) { 1363 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1364 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1365 } 1366 /* now we just get the submatrix */ 1367 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1368 /* clean up */ 1369 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1370 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1376 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1377 { 1378 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1379 Mat A = mat->A,B = mat->B; 1380 PetscErrorCode ierr; 1381 PetscReal isend[5],irecv[5]; 1382 1383 PetscFunctionBegin; 1384 info->block_size = 1.0; 1385 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1386 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1387 isend[3] = info->memory; isend[4] = info->mallocs; 1388 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1389 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1390 isend[3] += info->memory; isend[4] += info->mallocs; 1391 if (flag == MAT_LOCAL) { 1392 info->nz_used = isend[0]; 1393 info->nz_allocated = isend[1]; 1394 info->nz_unneeded = isend[2]; 1395 info->memory = isend[3]; 1396 info->mallocs = isend[4]; 1397 } else if (flag == MAT_GLOBAL_MAX) { 1398 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1399 info->nz_used = irecv[0]; 1400 info->nz_allocated = irecv[1]; 1401 info->nz_unneeded = irecv[2]; 1402 info->memory = irecv[3]; 1403 info->mallocs = irecv[4]; 1404 } else if (flag == MAT_GLOBAL_SUM) { 1405 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1406 info->nz_used = irecv[0]; 1407 info->nz_allocated = irecv[1]; 1408 info->nz_unneeded = irecv[2]; 1409 info->memory = irecv[3]; 1410 info->mallocs = irecv[4]; 1411 } 1412 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1413 info->fill_ratio_needed = 0; 1414 info->factor_mallocs = 0; 1415 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "MatSetOption_MPIAIJ" 1421 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1422 { 1423 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1424 PetscErrorCode ierr; 1425 1426 PetscFunctionBegin; 1427 switch (op) { 1428 case MAT_NEW_NONZERO_LOCATIONS: 1429 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1430 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1431 case MAT_KEEP_NONZERO_PATTERN: 1432 case MAT_NEW_NONZERO_LOCATION_ERR: 1433 case MAT_USE_INODES: 1434 case MAT_IGNORE_ZERO_ENTRIES: 1435 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1436 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1437 break; 1438 case MAT_ROW_ORIENTED: 1439 a->roworiented = flg; 1440 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1441 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1442 break; 1443 case MAT_NEW_DIAGONALS: 1444 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1445 break; 1446 case MAT_IGNORE_OFF_PROC_ENTRIES: 1447 a->donotstash = PETSC_TRUE; 1448 break; 1449 case MAT_SYMMETRIC: 1450 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1451 break; 1452 case MAT_STRUCTURALLY_SYMMETRIC: 1453 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1454 break; 1455 case MAT_HERMITIAN: 1456 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1457 break; 1458 case MAT_SYMMETRY_ETERNAL: 1459 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1460 break; 1461 default: 1462 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1463 } 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatGetRow_MPIAIJ" 1469 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1470 { 1471 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1472 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1473 PetscErrorCode ierr; 1474 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1475 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1476 PetscInt *cmap,*idx_p; 1477 1478 PetscFunctionBegin; 1479 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1480 mat->getrowactive = PETSC_TRUE; 1481 1482 if (!mat->rowvalues && (idx || v)) { 1483 /* 1484 allocate enough space to hold information from the longest row. 1485 */ 1486 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1487 PetscInt max = 1,tmp; 1488 for (i=0; i<matin->rmap->n; i++) { 1489 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1490 if (max < tmp) { max = tmp; } 1491 } 1492 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1493 } 1494 1495 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1496 lrow = row - rstart; 1497 1498 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1499 if (!v) {pvA = 0; pvB = 0;} 1500 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1501 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1502 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1503 nztot = nzA + nzB; 1504 1505 cmap = mat->garray; 1506 if (v || idx) { 1507 if (nztot) { 1508 /* Sort by increasing column numbers, assuming A and B already sorted */ 1509 PetscInt imark = -1; 1510 if (v) { 1511 *v = v_p = mat->rowvalues; 1512 for (i=0; i<nzB; i++) { 1513 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1514 else break; 1515 } 1516 imark = i; 1517 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1518 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1519 } 1520 if (idx) { 1521 *idx = idx_p = mat->rowindices; 1522 if (imark > -1) { 1523 for (i=0; i<imark; i++) { 1524 idx_p[i] = cmap[cworkB[i]]; 1525 } 1526 } else { 1527 for (i=0; i<nzB; i++) { 1528 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1529 else break; 1530 } 1531 imark = i; 1532 } 1533 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1534 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1535 } 1536 } else { 1537 if (idx) *idx = 0; 1538 if (v) *v = 0; 1539 } 1540 } 1541 *nz = nztot; 1542 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1543 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1544 PetscFunctionReturn(0); 1545 } 1546 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1549 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1550 { 1551 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1552 1553 PetscFunctionBegin; 1554 if (!aij->getrowactive) { 1555 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1556 } 1557 aij->getrowactive = PETSC_FALSE; 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatNorm_MPIAIJ" 1563 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1564 { 1565 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1566 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1567 PetscErrorCode ierr; 1568 PetscInt i,j,cstart = mat->cmap->rstart; 1569 PetscReal sum = 0.0; 1570 MatScalar *v; 1571 1572 PetscFunctionBegin; 1573 if (aij->size == 1) { 1574 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1575 } else { 1576 if (type == NORM_FROBENIUS) { 1577 v = amat->a; 1578 for (i=0; i<amat->nz; i++) { 1579 #if defined(PETSC_USE_COMPLEX) 1580 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1581 #else 1582 sum += (*v)*(*v); v++; 1583 #endif 1584 } 1585 v = bmat->a; 1586 for (i=0; i<bmat->nz; i++) { 1587 #if defined(PETSC_USE_COMPLEX) 1588 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1589 #else 1590 sum += (*v)*(*v); v++; 1591 #endif 1592 } 1593 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1594 *norm = sqrt(*norm); 1595 } else if (type == NORM_1) { /* max column norm */ 1596 PetscReal *tmp,*tmp2; 1597 PetscInt *jj,*garray = aij->garray; 1598 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1599 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1600 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1601 *norm = 0.0; 1602 v = amat->a; jj = amat->j; 1603 for (j=0; j<amat->nz; j++) { 1604 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1605 } 1606 v = bmat->a; jj = bmat->j; 1607 for (j=0; j<bmat->nz; j++) { 1608 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1609 } 1610 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1611 for (j=0; j<mat->cmap->N; j++) { 1612 if (tmp2[j] > *norm) *norm = tmp2[j]; 1613 } 1614 ierr = PetscFree(tmp);CHKERRQ(ierr); 1615 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1616 } else if (type == NORM_INFINITY) { /* max row norm */ 1617 PetscReal ntemp = 0.0; 1618 for (j=0; j<aij->A->rmap->n; j++) { 1619 v = amat->a + amat->i[j]; 1620 sum = 0.0; 1621 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1622 sum += PetscAbsScalar(*v); v++; 1623 } 1624 v = bmat->a + bmat->i[j]; 1625 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1626 sum += PetscAbsScalar(*v); v++; 1627 } 1628 if (sum > ntemp) ntemp = sum; 1629 } 1630 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1631 } else { 1632 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1633 } 1634 } 1635 PetscFunctionReturn(0); 1636 } 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatTranspose_MPIAIJ" 1640 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1641 { 1642 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1643 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1644 PetscErrorCode ierr; 1645 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1646 PetscInt cstart=A->cmap->rstart,ncol; 1647 Mat B; 1648 MatScalar *array; 1649 1650 PetscFunctionBegin; 1651 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1652 1653 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1654 ai = Aloc->i; aj = Aloc->j; 1655 bi = Bloc->i; bj = Bloc->j; 1656 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1657 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1658 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1659 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1660 for (i=0; i<ai[ma]; i++){ 1661 d_nnz[aj[i]] ++; 1662 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1663 } 1664 1665 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1666 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1667 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1668 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1669 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1670 } else { 1671 B = *matout; 1672 } 1673 1674 /* copy over the A part */ 1675 array = Aloc->a; 1676 row = A->rmap->rstart; 1677 for (i=0; i<ma; i++) { 1678 ncol = ai[i+1]-ai[i]; 1679 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1680 row++; array += ncol; aj += ncol; 1681 } 1682 aj = Aloc->j; 1683 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1684 1685 /* copy over the B part */ 1686 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1687 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1688 array = Bloc->a; 1689 row = A->rmap->rstart; 1690 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1691 cols_tmp = cols; 1692 for (i=0; i<mb; i++) { 1693 ncol = bi[i+1]-bi[i]; 1694 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1695 row++; array += ncol; cols_tmp += ncol; 1696 } 1697 ierr = PetscFree(cols);CHKERRQ(ierr); 1698 1699 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1701 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1702 *matout = B; 1703 } else { 1704 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1705 } 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1711 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1712 { 1713 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1714 Mat a = aij->A,b = aij->B; 1715 PetscErrorCode ierr; 1716 PetscInt s1,s2,s3; 1717 1718 PetscFunctionBegin; 1719 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1720 if (rr) { 1721 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1722 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1723 /* Overlap communication with computation. */ 1724 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 } 1726 if (ll) { 1727 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1728 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1729 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1730 } 1731 /* scale the diagonal block */ 1732 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1733 1734 if (rr) { 1735 /* Do a scatter end and then right scale the off-diagonal block */ 1736 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1737 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1738 } 1739 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1745 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1746 { 1747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1748 PetscErrorCode ierr; 1749 1750 PetscFunctionBegin; 1751 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1752 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1753 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1754 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1755 PetscFunctionReturn(0); 1756 } 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1759 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1760 { 1761 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1762 PetscErrorCode ierr; 1763 1764 PetscFunctionBegin; 1765 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1766 PetscFunctionReturn(0); 1767 } 1768 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "MatEqual_MPIAIJ" 1771 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1772 { 1773 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1774 Mat a,b,c,d; 1775 PetscTruth flg; 1776 PetscErrorCode ierr; 1777 1778 PetscFunctionBegin; 1779 a = matA->A; b = matA->B; 1780 c = matB->A; d = matB->B; 1781 1782 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1783 if (flg) { 1784 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1785 } 1786 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1787 PetscFunctionReturn(0); 1788 } 1789 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "MatCopy_MPIAIJ" 1792 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1793 { 1794 PetscErrorCode ierr; 1795 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1796 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1797 1798 PetscFunctionBegin; 1799 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1800 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1801 /* because of the column compression in the off-processor part of the matrix a->B, 1802 the number of columns in a->B and b->B may be different, hence we cannot call 1803 the MatCopy() directly on the two parts. If need be, we can provide a more 1804 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1805 then copying the submatrices */ 1806 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1807 } else { 1808 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1809 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1816 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1817 { 1818 PetscErrorCode ierr; 1819 1820 PetscFunctionBegin; 1821 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatAXPY_MPIAIJ" 1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1828 { 1829 PetscErrorCode ierr; 1830 PetscInt i; 1831 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1832 PetscBLASInt bnz,one=1; 1833 Mat_SeqAIJ *x,*y; 1834 1835 PetscFunctionBegin; 1836 if (str == SAME_NONZERO_PATTERN) { 1837 PetscScalar alpha = a; 1838 x = (Mat_SeqAIJ *)xx->A->data; 1839 y = (Mat_SeqAIJ *)yy->A->data; 1840 bnz = PetscBLASIntCast(x->nz); 1841 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1842 x = (Mat_SeqAIJ *)xx->B->data; 1843 y = (Mat_SeqAIJ *)yy->B->data; 1844 bnz = PetscBLASIntCast(x->nz); 1845 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1846 } else if (str == SUBSET_NONZERO_PATTERN) { 1847 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1848 1849 x = (Mat_SeqAIJ *)xx->B->data; 1850 y = (Mat_SeqAIJ *)yy->B->data; 1851 if (y->xtoy && y->XtoY != xx->B) { 1852 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1853 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1854 } 1855 if (!y->xtoy) { /* get xtoy */ 1856 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1857 y->XtoY = xx->B; 1858 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1859 } 1860 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1861 } else { 1862 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1863 } 1864 PetscFunctionReturn(0); 1865 } 1866 1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatConjugate_MPIAIJ" 1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1872 { 1873 #if defined(PETSC_USE_COMPLEX) 1874 PetscErrorCode ierr; 1875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1876 1877 PetscFunctionBegin; 1878 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1879 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1880 #else 1881 PetscFunctionBegin; 1882 #endif 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatRealPart_MPIAIJ" 1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1889 { 1890 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1891 PetscErrorCode ierr; 1892 1893 PetscFunctionBegin; 1894 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1895 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1902 { 1903 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1908 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #ifdef PETSC_HAVE_PBGL 1913 1914 #include <boost/parallel/mpi/bsp_process_group.hpp> 1915 #include <boost/graph/distributed/ilu_default_graph.hpp> 1916 #include <boost/graph/distributed/ilu_0_block.hpp> 1917 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1918 #include <boost/graph/distributed/petsc/interface.hpp> 1919 #include <boost/multi_array.hpp> 1920 #include <boost/parallel/distributed_property_map->hpp> 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1924 /* 1925 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1926 */ 1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1928 { 1929 namespace petsc = boost::distributed::petsc; 1930 1931 namespace graph_dist = boost::graph::distributed; 1932 using boost::graph::distributed::ilu_default::process_group_type; 1933 using boost::graph::ilu_permuted; 1934 1935 PetscTruth row_identity, col_identity; 1936 PetscContainer c; 1937 PetscInt m, n, M, N; 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1942 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1943 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1944 if (!row_identity || !col_identity) { 1945 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1946 } 1947 1948 process_group_type pg; 1949 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1950 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1951 lgraph_type& level_graph = *lgraph_p; 1952 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1953 1954 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1955 ilu_permuted(level_graph); 1956 1957 /* put together the new matrix */ 1958 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1959 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1960 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1961 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1962 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1963 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1965 1966 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1967 ierr = PetscContainerSetPointer(c, lgraph_p); 1968 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1975 { 1976 PetscFunctionBegin; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatSolve_MPIAIJ" 1982 /* 1983 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1984 */ 1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1986 { 1987 namespace graph_dist = boost::graph::distributed; 1988 1989 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1990 lgraph_type* lgraph_p; 1991 PetscContainer c; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1996 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1997 ierr = VecCopy(b, x);CHKERRQ(ierr); 1998 1999 PetscScalar* array_x; 2000 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2001 PetscInt sx; 2002 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2003 2004 PetscScalar* array_b; 2005 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2006 PetscInt sb; 2007 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2008 2009 lgraph_type& level_graph = *lgraph_p; 2010 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2011 2012 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2013 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2014 ref_x(array_x, boost::extents[num_vertices(graph)]); 2015 2016 typedef boost::iterator_property_map<array_ref_type::iterator, 2017 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2018 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2019 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2020 2021 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2022 2023 PetscFunctionReturn(0); 2024 } 2025 #endif 2026 2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2028 PetscInt nzlocal,nsends,nrecvs; 2029 PetscMPIInt *send_rank,*recv_rank; 2030 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2031 PetscScalar *sbuf_a,**rbuf_a; 2032 PetscErrorCode (*MatDestroy)(Mat); 2033 } Mat_Redundant; 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2038 { 2039 PetscErrorCode ierr; 2040 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2041 PetscInt i; 2042 2043 PetscFunctionBegin; 2044 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2045 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2046 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2047 for (i=0; i<redund->nrecvs; i++){ 2048 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2049 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2050 } 2051 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2052 ierr = PetscFree(redund);CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatDestroy_MatRedundant" 2058 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2059 { 2060 PetscErrorCode ierr; 2061 PetscContainer container; 2062 Mat_Redundant *redund=PETSC_NULL; 2063 2064 PetscFunctionBegin; 2065 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2066 if (container) { 2067 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2068 } else { 2069 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2070 } 2071 A->ops->destroy = redund->MatDestroy; 2072 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2073 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2074 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2081 { 2082 PetscMPIInt rank,size; 2083 MPI_Comm comm=((PetscObject)mat)->comm; 2084 PetscErrorCode ierr; 2085 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2086 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2087 PetscInt *rowrange=mat->rmap->range; 2088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2089 Mat A=aij->A,B=aij->B,C=*matredundant; 2090 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2091 PetscScalar *sbuf_a; 2092 PetscInt nzlocal=a->nz+b->nz; 2093 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2094 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2095 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2096 MatScalar *aworkA,*aworkB; 2097 PetscScalar *vals; 2098 PetscMPIInt tag1,tag2,tag3,imdex; 2099 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2100 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2101 MPI_Status recv_status,*send_status; 2102 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2103 PetscInt **rbuf_j=PETSC_NULL; 2104 PetscScalar **rbuf_a=PETSC_NULL; 2105 Mat_Redundant *redund=PETSC_NULL; 2106 PetscContainer container; 2107 2108 PetscFunctionBegin; 2109 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2110 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2111 2112 if (reuse == MAT_REUSE_MATRIX) { 2113 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2114 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2115 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2116 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2117 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2118 if (container) { 2119 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2120 } else { 2121 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2122 } 2123 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2124 2125 nsends = redund->nsends; 2126 nrecvs = redund->nrecvs; 2127 send_rank = redund->send_rank; 2128 recv_rank = redund->recv_rank; 2129 sbuf_nz = redund->sbuf_nz; 2130 rbuf_nz = redund->rbuf_nz; 2131 sbuf_j = redund->sbuf_j; 2132 sbuf_a = redund->sbuf_a; 2133 rbuf_j = redund->rbuf_j; 2134 rbuf_a = redund->rbuf_a; 2135 } 2136 2137 if (reuse == MAT_INITIAL_MATRIX){ 2138 PetscMPIInt subrank,subsize; 2139 PetscInt nleftover,np_subcomm; 2140 /* get the destination processors' id send_rank, nsends and nrecvs */ 2141 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2142 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2143 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2144 np_subcomm = size/nsubcomm; 2145 nleftover = size - nsubcomm*np_subcomm; 2146 nsends = 0; nrecvs = 0; 2147 for (i=0; i<size; i++){ /* i=rank*/ 2148 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2149 send_rank[nsends] = i; nsends++; 2150 recv_rank[nrecvs++] = i; 2151 } 2152 } 2153 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2154 i = size-nleftover-1; 2155 j = 0; 2156 while (j < nsubcomm - nleftover){ 2157 send_rank[nsends++] = i; 2158 i--; j++; 2159 } 2160 } 2161 2162 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2163 for (i=0; i<nleftover; i++){ 2164 recv_rank[nrecvs++] = size-nleftover+i; 2165 } 2166 } 2167 2168 /* allocate sbuf_j, sbuf_a */ 2169 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2170 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2171 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2172 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2173 2174 /* copy mat's local entries into the buffers */ 2175 if (reuse == MAT_INITIAL_MATRIX){ 2176 rownz_max = 0; 2177 rptr = sbuf_j; 2178 cols = sbuf_j + rend-rstart + 1; 2179 vals = sbuf_a; 2180 rptr[0] = 0; 2181 for (i=0; i<rend-rstart; i++){ 2182 row = i + rstart; 2183 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2184 ncols = nzA + nzB; 2185 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2186 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2187 /* load the column indices for this row into cols */ 2188 lwrite = 0; 2189 for (l=0; l<nzB; l++) { 2190 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2191 vals[lwrite] = aworkB[l]; 2192 cols[lwrite++] = ctmp; 2193 } 2194 } 2195 for (l=0; l<nzA; l++){ 2196 vals[lwrite] = aworkA[l]; 2197 cols[lwrite++] = cstart + cworkA[l]; 2198 } 2199 for (l=0; l<nzB; l++) { 2200 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2201 vals[lwrite] = aworkB[l]; 2202 cols[lwrite++] = ctmp; 2203 } 2204 } 2205 vals += ncols; 2206 cols += ncols; 2207 rptr[i+1] = rptr[i] + ncols; 2208 if (rownz_max < ncols) rownz_max = ncols; 2209 } 2210 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2211 } else { /* only copy matrix values into sbuf_a */ 2212 rptr = sbuf_j; 2213 vals = sbuf_a; 2214 rptr[0] = 0; 2215 for (i=0; i<rend-rstart; i++){ 2216 row = i + rstart; 2217 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2218 ncols = nzA + nzB; 2219 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2220 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2221 lwrite = 0; 2222 for (l=0; l<nzB; l++) { 2223 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2224 } 2225 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2226 for (l=0; l<nzB; l++) { 2227 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2228 } 2229 vals += ncols; 2230 rptr[i+1] = rptr[i] + ncols; 2231 } 2232 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2233 2234 /* send nzlocal to others, and recv other's nzlocal */ 2235 /*--------------------------------------------------*/ 2236 if (reuse == MAT_INITIAL_MATRIX){ 2237 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2238 s_waits2 = s_waits3 + nsends; 2239 s_waits1 = s_waits2 + nsends; 2240 r_waits1 = s_waits1 + nsends; 2241 r_waits2 = r_waits1 + nrecvs; 2242 r_waits3 = r_waits2 + nrecvs; 2243 } else { 2244 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2245 r_waits3 = s_waits3 + nsends; 2246 } 2247 2248 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2249 if (reuse == MAT_INITIAL_MATRIX){ 2250 /* get new tags to keep the communication clean */ 2251 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2252 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2253 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2254 2255 /* post receives of other's nzlocal */ 2256 for (i=0; i<nrecvs; i++){ 2257 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2258 } 2259 /* send nzlocal to others */ 2260 for (i=0; i<nsends; i++){ 2261 sbuf_nz[i] = nzlocal; 2262 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2263 } 2264 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2265 count = nrecvs; 2266 while (count) { 2267 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2268 recv_rank[imdex] = recv_status.MPI_SOURCE; 2269 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2270 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2271 2272 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2273 rbuf_nz[imdex] += i + 2; 2274 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2275 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2276 count--; 2277 } 2278 /* wait on sends of nzlocal */ 2279 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2280 /* send mat->i,j to others, and recv from other's */ 2281 /*------------------------------------------------*/ 2282 for (i=0; i<nsends; i++){ 2283 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2284 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2285 } 2286 /* wait on receives of mat->i,j */ 2287 /*------------------------------*/ 2288 count = nrecvs; 2289 while (count) { 2290 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2291 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2292 count--; 2293 } 2294 /* wait on sends of mat->i,j */ 2295 /*---------------------------*/ 2296 if (nsends) { 2297 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2298 } 2299 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2300 2301 /* post receives, send and receive mat->a */ 2302 /*----------------------------------------*/ 2303 for (imdex=0; imdex<nrecvs; imdex++) { 2304 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2305 } 2306 for (i=0; i<nsends; i++){ 2307 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2308 } 2309 count = nrecvs; 2310 while (count) { 2311 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2312 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2313 count--; 2314 } 2315 if (nsends) { 2316 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2317 } 2318 2319 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2320 2321 /* create redundant matrix */ 2322 /*-------------------------*/ 2323 if (reuse == MAT_INITIAL_MATRIX){ 2324 /* compute rownz_max for preallocation */ 2325 for (imdex=0; imdex<nrecvs; imdex++){ 2326 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2327 rptr = rbuf_j[imdex]; 2328 for (i=0; i<j; i++){ 2329 ncols = rptr[i+1] - rptr[i]; 2330 if (rownz_max < ncols) rownz_max = ncols; 2331 } 2332 } 2333 2334 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2335 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2336 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2337 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2338 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2339 } else { 2340 C = *matredundant; 2341 } 2342 2343 /* insert local matrix entries */ 2344 rptr = sbuf_j; 2345 cols = sbuf_j + rend-rstart + 1; 2346 vals = sbuf_a; 2347 for (i=0; i<rend-rstart; i++){ 2348 row = i + rstart; 2349 ncols = rptr[i+1] - rptr[i]; 2350 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2351 vals += ncols; 2352 cols += ncols; 2353 } 2354 /* insert received matrix entries */ 2355 for (imdex=0; imdex<nrecvs; imdex++){ 2356 rstart = rowrange[recv_rank[imdex]]; 2357 rend = rowrange[recv_rank[imdex]+1]; 2358 rptr = rbuf_j[imdex]; 2359 cols = rbuf_j[imdex] + rend-rstart + 1; 2360 vals = rbuf_a[imdex]; 2361 for (i=0; i<rend-rstart; i++){ 2362 row = i + rstart; 2363 ncols = rptr[i+1] - rptr[i]; 2364 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2365 vals += ncols; 2366 cols += ncols; 2367 } 2368 } 2369 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2370 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2371 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2372 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2373 if (reuse == MAT_INITIAL_MATRIX){ 2374 PetscContainer container; 2375 *matredundant = C; 2376 /* create a supporting struct and attach it to C for reuse */ 2377 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2378 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2379 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2380 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2381 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2382 2383 redund->nzlocal = nzlocal; 2384 redund->nsends = nsends; 2385 redund->nrecvs = nrecvs; 2386 redund->send_rank = send_rank; 2387 redund->recv_rank = recv_rank; 2388 redund->sbuf_nz = sbuf_nz; 2389 redund->rbuf_nz = rbuf_nz; 2390 redund->sbuf_j = sbuf_j; 2391 redund->sbuf_a = sbuf_a; 2392 redund->rbuf_j = rbuf_j; 2393 redund->rbuf_a = rbuf_a; 2394 2395 redund->MatDestroy = C->ops->destroy; 2396 C->ops->destroy = MatDestroy_MatRedundant; 2397 } 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2404 { 2405 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2406 PetscErrorCode ierr; 2407 PetscInt i,*idxb = 0; 2408 PetscScalar *va,*vb; 2409 Vec vtmp; 2410 2411 PetscFunctionBegin; 2412 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2413 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2414 if (idx) { 2415 for (i=0; i<A->rmap->n; i++) { 2416 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2417 } 2418 } 2419 2420 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2421 if (idx) { 2422 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2423 } 2424 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2425 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2426 2427 for (i=0; i<A->rmap->n; i++){ 2428 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2429 va[i] = vb[i]; 2430 if (idx) idx[i] = a->garray[idxb[i]]; 2431 } 2432 } 2433 2434 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2435 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2436 if (idxb) { 2437 ierr = PetscFree(idxb);CHKERRQ(ierr); 2438 } 2439 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2446 { 2447 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2448 PetscErrorCode ierr; 2449 PetscInt i,*idxb = 0; 2450 PetscScalar *va,*vb; 2451 Vec vtmp; 2452 2453 PetscFunctionBegin; 2454 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2455 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2456 if (idx) { 2457 for (i=0; i<A->cmap->n; i++) { 2458 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2459 } 2460 } 2461 2462 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2463 if (idx) { 2464 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2465 } 2466 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2467 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2468 2469 for (i=0; i<A->rmap->n; i++){ 2470 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2471 va[i] = vb[i]; 2472 if (idx) idx[i] = a->garray[idxb[i]]; 2473 } 2474 } 2475 2476 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2477 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2478 if (idxb) { 2479 ierr = PetscFree(idxb);CHKERRQ(ierr); 2480 } 2481 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2488 { 2489 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2490 PetscInt n = A->rmap->n; 2491 PetscInt cstart = A->cmap->rstart; 2492 PetscInt *cmap = mat->garray; 2493 PetscInt *diagIdx, *offdiagIdx; 2494 Vec diagV, offdiagV; 2495 PetscScalar *a, *diagA, *offdiagA; 2496 PetscInt r; 2497 PetscErrorCode ierr; 2498 2499 PetscFunctionBegin; 2500 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2501 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2502 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2503 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2504 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2505 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2506 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2507 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2508 for(r = 0; r < n; ++r) { 2509 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2510 a[r] = diagA[r]; 2511 idx[r] = cstart + diagIdx[r]; 2512 } else { 2513 a[r] = offdiagA[r]; 2514 idx[r] = cmap[offdiagIdx[r]]; 2515 } 2516 } 2517 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2518 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2519 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2520 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2521 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2522 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2529 { 2530 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2531 PetscInt n = A->rmap->n; 2532 PetscInt cstart = A->cmap->rstart; 2533 PetscInt *cmap = mat->garray; 2534 PetscInt *diagIdx, *offdiagIdx; 2535 Vec diagV, offdiagV; 2536 PetscScalar *a, *diagA, *offdiagA; 2537 PetscInt r; 2538 PetscErrorCode ierr; 2539 2540 PetscFunctionBegin; 2541 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2542 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2543 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2544 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2545 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2546 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2547 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2548 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2549 for(r = 0; r < n; ++r) { 2550 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2551 a[r] = diagA[r]; 2552 idx[r] = cstart + diagIdx[r]; 2553 } else { 2554 a[r] = offdiagA[r]; 2555 idx[r] = cmap[offdiagIdx[r]]; 2556 } 2557 } 2558 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2559 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2560 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2561 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2562 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2563 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2570 { 2571 PetscErrorCode ierr; 2572 Mat *dummy; 2573 2574 PetscFunctionBegin; 2575 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2576 *newmat = *dummy; 2577 ierr = PetscFree(dummy);CHKERRQ(ierr); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2582 /* -------------------------------------------------------------------*/ 2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2584 MatGetRow_MPIAIJ, 2585 MatRestoreRow_MPIAIJ, 2586 MatMult_MPIAIJ, 2587 /* 4*/ MatMultAdd_MPIAIJ, 2588 MatMultTranspose_MPIAIJ, 2589 MatMultTransposeAdd_MPIAIJ, 2590 #ifdef PETSC_HAVE_PBGL 2591 MatSolve_MPIAIJ, 2592 #else 2593 0, 2594 #endif 2595 0, 2596 0, 2597 /*10*/ 0, 2598 0, 2599 0, 2600 MatSOR_MPIAIJ, 2601 MatTranspose_MPIAIJ, 2602 /*15*/ MatGetInfo_MPIAIJ, 2603 MatEqual_MPIAIJ, 2604 MatGetDiagonal_MPIAIJ, 2605 MatDiagonalScale_MPIAIJ, 2606 MatNorm_MPIAIJ, 2607 /*20*/ MatAssemblyBegin_MPIAIJ, 2608 MatAssemblyEnd_MPIAIJ, 2609 MatSetOption_MPIAIJ, 2610 MatZeroEntries_MPIAIJ, 2611 /*24*/ MatZeroRows_MPIAIJ, 2612 0, 2613 #ifdef PETSC_HAVE_PBGL 2614 0, 2615 #else 2616 0, 2617 #endif 2618 0, 2619 0, 2620 /*29*/ MatSetUpPreallocation_MPIAIJ, 2621 #ifdef PETSC_HAVE_PBGL 2622 0, 2623 #else 2624 0, 2625 #endif 2626 0, 2627 0, 2628 0, 2629 /*34*/ MatDuplicate_MPIAIJ, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*39*/ MatAXPY_MPIAIJ, 2635 MatGetSubMatrices_MPIAIJ, 2636 MatIncreaseOverlap_MPIAIJ, 2637 MatGetValues_MPIAIJ, 2638 MatCopy_MPIAIJ, 2639 /*44*/ MatGetRowMax_MPIAIJ, 2640 MatScale_MPIAIJ, 2641 0, 2642 0, 2643 0, 2644 /*49*/ MatSetBlockSize_MPIAIJ, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*54*/ MatFDColoringCreate_MPIAIJ, 2650 0, 2651 MatSetUnfactored_MPIAIJ, 2652 MatPermute_MPIAIJ, 2653 0, 2654 /*59*/ MatGetSubMatrix_MPIAIJ, 2655 MatDestroy_MPIAIJ, 2656 MatView_MPIAIJ, 2657 0, 2658 0, 2659 /*64*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2665 MatGetRowMinAbs_MPIAIJ, 2666 0, 2667 MatSetColoring_MPIAIJ, 2668 #if defined(PETSC_HAVE_ADIC) 2669 MatSetValuesAdic_MPIAIJ, 2670 #else 2671 0, 2672 #endif 2673 MatSetValuesAdifor_MPIAIJ, 2674 /*75*/ MatFDColoringApply_AIJ, 2675 0, 2676 0, 2677 0, 2678 0, 2679 /*80*/ 0, 2680 0, 2681 0, 2682 /*83*/ MatLoad_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2689 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2690 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2691 MatPtAP_Basic, 2692 MatPtAPSymbolic_MPIAIJ, 2693 /*94*/ MatPtAPNumeric_MPIAIJ, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /*99*/ 0, 2699 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2700 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2701 MatConjugate_MPIAIJ, 2702 0, 2703 /*104*/MatSetValuesRow_MPIAIJ, 2704 MatRealPart_MPIAIJ, 2705 MatImaginaryPart_MPIAIJ, 2706 0, 2707 0, 2708 /*109*/0, 2709 MatGetRedundantMatrix_MPIAIJ, 2710 MatGetRowMin_MPIAIJ, 2711 0, 2712 0, 2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2714 0, 2715 0, 2716 0, 2717 0, 2718 0 2719 }; 2720 2721 /* ----------------------------------------------------------------------------------------*/ 2722 2723 EXTERN_C_BEGIN 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2726 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2727 { 2728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2729 PetscErrorCode ierr; 2730 2731 PetscFunctionBegin; 2732 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2733 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2734 PetscFunctionReturn(0); 2735 } 2736 EXTERN_C_END 2737 2738 EXTERN_C_BEGIN 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2741 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2742 { 2743 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2748 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751 EXTERN_C_END 2752 2753 #include "petscpc.h" 2754 EXTERN_C_BEGIN 2755 #undef __FUNCT__ 2756 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2757 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2758 { 2759 Mat_MPIAIJ *b; 2760 PetscErrorCode ierr; 2761 PetscInt i; 2762 2763 PetscFunctionBegin; 2764 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2765 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2766 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2767 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2768 2769 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2770 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2771 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2772 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2773 if (d_nnz) { 2774 for (i=0; i<B->rmap->n; i++) { 2775 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2776 } 2777 } 2778 if (o_nnz) { 2779 for (i=0; i<B->rmap->n; i++) { 2780 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2781 } 2782 } 2783 b = (Mat_MPIAIJ*)B->data; 2784 2785 if (!B->preallocated) { 2786 /* Explicitly create 2 MATSEQAIJ matrices. */ 2787 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2788 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2789 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2791 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2792 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2793 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2794 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2795 } 2796 2797 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2798 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2799 B->preallocated = PETSC_TRUE; 2800 PetscFunctionReturn(0); 2801 } 2802 EXTERN_C_END 2803 2804 #undef __FUNCT__ 2805 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2806 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2807 { 2808 Mat mat; 2809 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2810 PetscErrorCode ierr; 2811 2812 PetscFunctionBegin; 2813 *newmat = 0; 2814 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2815 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2816 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2817 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2818 a = (Mat_MPIAIJ*)mat->data; 2819 2820 mat->factor = matin->factor; 2821 mat->rmap->bs = matin->rmap->bs; 2822 mat->assembled = PETSC_TRUE; 2823 mat->insertmode = NOT_SET_VALUES; 2824 mat->preallocated = PETSC_TRUE; 2825 2826 a->size = oldmat->size; 2827 a->rank = oldmat->rank; 2828 a->donotstash = oldmat->donotstash; 2829 a->roworiented = oldmat->roworiented; 2830 a->rowindices = 0; 2831 a->rowvalues = 0; 2832 a->getrowactive = PETSC_FALSE; 2833 2834 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2835 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2836 2837 if (oldmat->colmap) { 2838 #if defined (PETSC_USE_CTABLE) 2839 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2840 #else 2841 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2842 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2843 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2844 #endif 2845 } else a->colmap = 0; 2846 if (oldmat->garray) { 2847 PetscInt len; 2848 len = oldmat->B->cmap->n; 2849 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2850 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2851 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2852 } else a->garray = 0; 2853 2854 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2855 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2856 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2857 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2858 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2859 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2860 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2861 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2862 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2863 *newmat = mat; 2864 PetscFunctionReturn(0); 2865 } 2866 2867 #undef __FUNCT__ 2868 #define __FUNCT__ "MatLoad_MPIAIJ" 2869 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2870 { 2871 Mat A; 2872 PetscScalar *vals,*svals; 2873 MPI_Comm comm = ((PetscObject)viewer)->comm; 2874 MPI_Status status; 2875 PetscErrorCode ierr; 2876 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2877 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2878 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2879 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2880 PetscInt cend,cstart,n,*rowners; 2881 int fd; 2882 2883 PetscFunctionBegin; 2884 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2885 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2886 if (!rank) { 2887 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2888 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2889 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2890 } 2891 2892 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2893 M = header[1]; N = header[2]; 2894 /* determine ownership of all rows */ 2895 m = M/size + ((M % size) > rank); 2896 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2897 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2898 2899 /* First process needs enough room for process with most rows */ 2900 if (!rank) { 2901 mmax = rowners[1]; 2902 for (i=2; i<size; i++) { 2903 mmax = PetscMax(mmax,rowners[i]); 2904 } 2905 } else mmax = m; 2906 2907 rowners[0] = 0; 2908 for (i=2; i<=size; i++) { 2909 rowners[i] += rowners[i-1]; 2910 } 2911 rstart = rowners[rank]; 2912 rend = rowners[rank+1]; 2913 2914 /* distribute row lengths to all processors */ 2915 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2916 if (!rank) { 2917 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2918 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2919 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2920 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2921 for (j=0; j<m; j++) { 2922 procsnz[0] += ourlens[j]; 2923 } 2924 for (i=1; i<size; i++) { 2925 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2926 /* calculate the number of nonzeros on each processor */ 2927 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2928 procsnz[i] += rowlengths[j]; 2929 } 2930 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2931 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2932 } 2933 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2934 } else { 2935 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2936 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2937 } 2938 2939 if (!rank) { 2940 /* determine max buffer needed and allocate it */ 2941 maxnz = 0; 2942 for (i=0; i<size; i++) { 2943 maxnz = PetscMax(maxnz,procsnz[i]); 2944 } 2945 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2946 2947 /* read in my part of the matrix column indices */ 2948 nz = procsnz[0]; 2949 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2950 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2951 2952 /* read in every one elses and ship off */ 2953 for (i=1; i<size; i++) { 2954 nz = procsnz[i]; 2955 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2956 mpicnt = PetscMPIIntCast(nz); 2957 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2958 } 2959 ierr = PetscFree(cols);CHKERRQ(ierr); 2960 } else { 2961 /* determine buffer space needed for message */ 2962 nz = 0; 2963 for (i=0; i<m; i++) { 2964 nz += ourlens[i]; 2965 } 2966 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2967 2968 /* receive message of column indices*/ 2969 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2970 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2971 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2972 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2973 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2974 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2975 } 2976 2977 /* determine column ownership if matrix is not square */ 2978 if (N != M) { 2979 n = N/size + ((N % size) > rank); 2980 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2981 cstart = cend - n; 2982 } else { 2983 cstart = rstart; 2984 cend = rend; 2985 n = cend - cstart; 2986 } 2987 2988 /* loop over local rows, determining number of off diagonal entries */ 2989 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2990 jj = 0; 2991 for (i=0; i<m; i++) { 2992 for (j=0; j<ourlens[i]; j++) { 2993 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2994 jj++; 2995 } 2996 } 2997 2998 /* create our matrix */ 2999 for (i=0; i<m; i++) { 3000 ourlens[i] -= offlens[i]; 3001 } 3002 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3003 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 3004 ierr = MatSetType(A,type);CHKERRQ(ierr); 3005 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3006 3007 for (i=0; i<m; i++) { 3008 ourlens[i] += offlens[i]; 3009 } 3010 3011 if (!rank) { 3012 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3013 3014 /* read in my part of the matrix numerical values */ 3015 nz = procsnz[0]; 3016 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3017 3018 /* insert into matrix */ 3019 jj = rstart; 3020 smycols = mycols; 3021 svals = vals; 3022 for (i=0; i<m; i++) { 3023 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3024 smycols += ourlens[i]; 3025 svals += ourlens[i]; 3026 jj++; 3027 } 3028 3029 /* read in other processors and ship out */ 3030 for (i=1; i<size; i++) { 3031 nz = procsnz[i]; 3032 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3033 mpicnt = PetscMPIIntCast(nz); 3034 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3035 } 3036 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3037 } else { 3038 /* receive numeric values */ 3039 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3040 3041 /* receive message of values*/ 3042 mpicnt = PetscMPIIntCast(nz); 3043 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3044 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3045 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3046 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3047 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3048 3049 /* insert into matrix */ 3050 jj = rstart; 3051 smycols = mycols; 3052 svals = vals; 3053 for (i=0; i<m; i++) { 3054 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3055 smycols += ourlens[i]; 3056 svals += ourlens[i]; 3057 jj++; 3058 } 3059 } 3060 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3061 ierr = PetscFree(vals);CHKERRQ(ierr); 3062 ierr = PetscFree(mycols);CHKERRQ(ierr); 3063 ierr = PetscFree(rowners);CHKERRQ(ierr); 3064 3065 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3066 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3067 *newmat = A; 3068 PetscFunctionReturn(0); 3069 } 3070 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3073 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3074 { 3075 PetscErrorCode ierr; 3076 IS iscol_local; 3077 PetscInt csize; 3078 3079 PetscFunctionBegin; 3080 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3081 if (call == MAT_REUSE_MATRIX) { 3082 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3083 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3084 } else { 3085 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3086 } 3087 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3088 if (call == MAT_INITIAL_MATRIX) { 3089 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3090 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3091 } 3092 PetscFunctionReturn(0); 3093 } 3094 3095 #undef __FUNCT__ 3096 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3097 /* 3098 Not great since it makes two copies of the submatrix, first an SeqAIJ 3099 in local and then by concatenating the local matrices the end result. 3100 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3101 3102 Note: This requires a sequential iscol with all indices. 3103 */ 3104 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3105 { 3106 PetscErrorCode ierr; 3107 PetscMPIInt rank,size; 3108 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3109 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3110 Mat *local,M,Mreuse; 3111 MatScalar *vwork,*aa; 3112 MPI_Comm comm = ((PetscObject)mat)->comm; 3113 Mat_SeqAIJ *aij; 3114 3115 3116 PetscFunctionBegin; 3117 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3118 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3119 3120 if (call == MAT_REUSE_MATRIX) { 3121 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3122 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3123 local = &Mreuse; 3124 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3125 } else { 3126 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3127 Mreuse = *local; 3128 ierr = PetscFree(local);CHKERRQ(ierr); 3129 } 3130 3131 /* 3132 m - number of local rows 3133 n - number of columns (same on all processors) 3134 rstart - first row in new global matrix generated 3135 */ 3136 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3137 if (call == MAT_INITIAL_MATRIX) { 3138 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3139 ii = aij->i; 3140 jj = aij->j; 3141 3142 /* 3143 Determine the number of non-zeros in the diagonal and off-diagonal 3144 portions of the matrix in order to do correct preallocation 3145 */ 3146 3147 /* first get start and end of "diagonal" columns */ 3148 if (csize == PETSC_DECIDE) { 3149 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3150 if (mglobal == n) { /* square matrix */ 3151 nlocal = m; 3152 } else { 3153 nlocal = n/size + ((n % size) > rank); 3154 } 3155 } else { 3156 nlocal = csize; 3157 } 3158 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3159 rstart = rend - nlocal; 3160 if (rank == size - 1 && rend != n) { 3161 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3162 } 3163 3164 /* next, compute all the lengths */ 3165 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3166 olens = dlens + m; 3167 for (i=0; i<m; i++) { 3168 jend = ii[i+1] - ii[i]; 3169 olen = 0; 3170 dlen = 0; 3171 for (j=0; j<jend; j++) { 3172 if (*jj < rstart || *jj >= rend) olen++; 3173 else dlen++; 3174 jj++; 3175 } 3176 olens[i] = olen; 3177 dlens[i] = dlen; 3178 } 3179 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3180 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3181 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3182 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3183 ierr = PetscFree(dlens);CHKERRQ(ierr); 3184 } else { 3185 PetscInt ml,nl; 3186 3187 M = *newmat; 3188 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3189 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3190 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3191 /* 3192 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3193 rather than the slower MatSetValues(). 3194 */ 3195 M->was_assembled = PETSC_TRUE; 3196 M->assembled = PETSC_FALSE; 3197 } 3198 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3199 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3200 ii = aij->i; 3201 jj = aij->j; 3202 aa = aij->a; 3203 for (i=0; i<m; i++) { 3204 row = rstart + i; 3205 nz = ii[i+1] - ii[i]; 3206 cwork = jj; jj += nz; 3207 vwork = aa; aa += nz; 3208 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3209 } 3210 3211 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3212 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3213 *newmat = M; 3214 3215 /* save submatrix used in processor for next request */ 3216 if (call == MAT_INITIAL_MATRIX) { 3217 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3218 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3219 } 3220 3221 PetscFunctionReturn(0); 3222 } 3223 3224 EXTERN_C_BEGIN 3225 #undef __FUNCT__ 3226 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3227 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3228 { 3229 PetscInt m,cstart, cend,j,nnz,i,d; 3230 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3231 const PetscInt *JJ; 3232 PetscScalar *values; 3233 PetscErrorCode ierr; 3234 3235 PetscFunctionBegin; 3236 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3237 3238 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3239 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3240 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3241 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3242 m = B->rmap->n; 3243 cstart = B->cmap->rstart; 3244 cend = B->cmap->rend; 3245 rstart = B->rmap->rstart; 3246 3247 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3248 3249 #if defined(PETSC_USE_DEBUGGING) 3250 for (i=0; i<m; i++) { 3251 nnz = Ii[i+1]- Ii[i]; 3252 JJ = J + Ii[i]; 3253 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3254 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3255 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3256 } 3257 #endif 3258 3259 for (i=0; i<m; i++) { 3260 nnz = Ii[i+1]- Ii[i]; 3261 JJ = J + Ii[i]; 3262 nnz_max = PetscMax(nnz_max,nnz); 3263 d = 0; 3264 for (j=0; j<nnz; j++) { 3265 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3266 } 3267 d_nnz[i] = d; 3268 o_nnz[i] = nnz - d; 3269 } 3270 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3271 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3272 3273 if (v) values = (PetscScalar*)v; 3274 else { 3275 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3276 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3277 } 3278 3279 for (i=0; i<m; i++) { 3280 ii = i + rstart; 3281 nnz = Ii[i+1]- Ii[i]; 3282 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3283 } 3284 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3285 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3286 3287 if (!v) { 3288 ierr = PetscFree(values);CHKERRQ(ierr); 3289 } 3290 PetscFunctionReturn(0); 3291 } 3292 EXTERN_C_END 3293 3294 #undef __FUNCT__ 3295 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3296 /*@ 3297 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3298 (the default parallel PETSc format). 3299 3300 Collective on MPI_Comm 3301 3302 Input Parameters: 3303 + B - the matrix 3304 . i - the indices into j for the start of each local row (starts with zero) 3305 . j - the column indices for each local row (starts with zero) 3306 - v - optional values in the matrix 3307 3308 Level: developer 3309 3310 Notes: 3311 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3312 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3313 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3314 3315 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3316 3317 The format which is used for the sparse matrix input, is equivalent to a 3318 row-major ordering.. i.e for the following matrix, the input data expected is 3319 as shown: 3320 3321 1 0 0 3322 2 0 3 P0 3323 ------- 3324 4 5 6 P1 3325 3326 Process0 [P0]: rows_owned=[0,1] 3327 i = {0,1,3} [size = nrow+1 = 2+1] 3328 j = {0,0,2} [size = nz = 6] 3329 v = {1,2,3} [size = nz = 6] 3330 3331 Process1 [P1]: rows_owned=[2] 3332 i = {0,3} [size = nrow+1 = 1+1] 3333 j = {0,1,2} [size = nz = 6] 3334 v = {4,5,6} [size = nz = 6] 3335 3336 .keywords: matrix, aij, compressed row, sparse, parallel 3337 3338 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3339 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3340 @*/ 3341 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3342 { 3343 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3344 3345 PetscFunctionBegin; 3346 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3347 if (f) { 3348 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3349 } 3350 PetscFunctionReturn(0); 3351 } 3352 3353 #undef __FUNCT__ 3354 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3355 /*@C 3356 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3357 (the default parallel PETSc format). For good matrix assembly performance 3358 the user should preallocate the matrix storage by setting the parameters 3359 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3360 performance can be increased by more than a factor of 50. 3361 3362 Collective on MPI_Comm 3363 3364 Input Parameters: 3365 + A - the matrix 3366 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3367 (same value is used for all local rows) 3368 . d_nnz - array containing the number of nonzeros in the various rows of the 3369 DIAGONAL portion of the local submatrix (possibly different for each row) 3370 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3371 The size of this array is equal to the number of local rows, i.e 'm'. 3372 You must leave room for the diagonal entry even if it is zero. 3373 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3374 submatrix (same value is used for all local rows). 3375 - o_nnz - array containing the number of nonzeros in the various rows of the 3376 OFF-DIAGONAL portion of the local submatrix (possibly different for 3377 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3378 structure. The size of this array is equal to the number 3379 of local rows, i.e 'm'. 3380 3381 If the *_nnz parameter is given then the *_nz parameter is ignored 3382 3383 The AIJ format (also called the Yale sparse matrix format or 3384 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3385 storage. The stored row and column indices begin with zero. See the users manual for details. 3386 3387 The parallel matrix is partitioned such that the first m0 rows belong to 3388 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3389 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3390 3391 The DIAGONAL portion of the local submatrix of a processor can be defined 3392 as the submatrix which is obtained by extraction the part corresponding 3393 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3394 first row that belongs to the processor, and r2 is the last row belonging 3395 to the this processor. This is a square mxm matrix. The remaining portion 3396 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3397 3398 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3399 3400 You can call MatGetInfo() to get information on how effective the preallocation was; 3401 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3402 You can also run with the option -info and look for messages with the string 3403 malloc in them to see if additional memory allocation was needed. 3404 3405 Example usage: 3406 3407 Consider the following 8x8 matrix with 34 non-zero values, that is 3408 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3409 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3410 as follows: 3411 3412 .vb 3413 1 2 0 | 0 3 0 | 0 4 3414 Proc0 0 5 6 | 7 0 0 | 8 0 3415 9 0 10 | 11 0 0 | 12 0 3416 ------------------------------------- 3417 13 0 14 | 15 16 17 | 0 0 3418 Proc1 0 18 0 | 19 20 21 | 0 0 3419 0 0 0 | 22 23 0 | 24 0 3420 ------------------------------------- 3421 Proc2 25 26 27 | 0 0 28 | 29 0 3422 30 0 0 | 31 32 33 | 0 34 3423 .ve 3424 3425 This can be represented as a collection of submatrices as: 3426 3427 .vb 3428 A B C 3429 D E F 3430 G H I 3431 .ve 3432 3433 Where the submatrices A,B,C are owned by proc0, D,E,F are 3434 owned by proc1, G,H,I are owned by proc2. 3435 3436 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3437 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3438 The 'M','N' parameters are 8,8, and have the same values on all procs. 3439 3440 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3441 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3442 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3443 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3444 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3445 matrix, ans [DF] as another SeqAIJ matrix. 3446 3447 When d_nz, o_nz parameters are specified, d_nz storage elements are 3448 allocated for every row of the local diagonal submatrix, and o_nz 3449 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3450 One way to choose d_nz and o_nz is to use the max nonzerors per local 3451 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3452 In this case, the values of d_nz,o_nz are: 3453 .vb 3454 proc0 : dnz = 2, o_nz = 2 3455 proc1 : dnz = 3, o_nz = 2 3456 proc2 : dnz = 1, o_nz = 4 3457 .ve 3458 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3459 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3460 for proc3. i.e we are using 12+15+10=37 storage locations to store 3461 34 values. 3462 3463 When d_nnz, o_nnz parameters are specified, the storage is specified 3464 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3465 In the above case the values for d_nnz,o_nnz are: 3466 .vb 3467 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3468 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3469 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3470 .ve 3471 Here the space allocated is sum of all the above values i.e 34, and 3472 hence pre-allocation is perfect. 3473 3474 Level: intermediate 3475 3476 .keywords: matrix, aij, compressed row, sparse, parallel 3477 3478 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3479 MPIAIJ, MatGetInfo() 3480 @*/ 3481 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3482 { 3483 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3484 3485 PetscFunctionBegin; 3486 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3487 if (f) { 3488 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3489 } 3490 PetscFunctionReturn(0); 3491 } 3492 3493 #undef __FUNCT__ 3494 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3495 /*@ 3496 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3497 CSR format the local rows. 3498 3499 Collective on MPI_Comm 3500 3501 Input Parameters: 3502 + comm - MPI communicator 3503 . m - number of local rows (Cannot be PETSC_DECIDE) 3504 . n - This value should be the same as the local size used in creating the 3505 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3506 calculated if N is given) For square matrices n is almost always m. 3507 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3508 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3509 . i - row indices 3510 . j - column indices 3511 - a - matrix values 3512 3513 Output Parameter: 3514 . mat - the matrix 3515 3516 Level: intermediate 3517 3518 Notes: 3519 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3520 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3521 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3522 3523 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3524 3525 The format which is used for the sparse matrix input, is equivalent to a 3526 row-major ordering.. i.e for the following matrix, the input data expected is 3527 as shown: 3528 3529 1 0 0 3530 2 0 3 P0 3531 ------- 3532 4 5 6 P1 3533 3534 Process0 [P0]: rows_owned=[0,1] 3535 i = {0,1,3} [size = nrow+1 = 2+1] 3536 j = {0,0,2} [size = nz = 6] 3537 v = {1,2,3} [size = nz = 6] 3538 3539 Process1 [P1]: rows_owned=[2] 3540 i = {0,3} [size = nrow+1 = 1+1] 3541 j = {0,1,2} [size = nz = 6] 3542 v = {4,5,6} [size = nz = 6] 3543 3544 .keywords: matrix, aij, compressed row, sparse, parallel 3545 3546 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3547 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3548 @*/ 3549 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3550 { 3551 PetscErrorCode ierr; 3552 3553 PetscFunctionBegin; 3554 if (i[0]) { 3555 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3556 } 3557 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3558 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3559 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3560 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3561 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3562 PetscFunctionReturn(0); 3563 } 3564 3565 #undef __FUNCT__ 3566 #define __FUNCT__ "MatCreateMPIAIJ" 3567 /*@C 3568 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3569 (the default parallel PETSc format). For good matrix assembly performance 3570 the user should preallocate the matrix storage by setting the parameters 3571 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3572 performance can be increased by more than a factor of 50. 3573 3574 Collective on MPI_Comm 3575 3576 Input Parameters: 3577 + comm - MPI communicator 3578 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3579 This value should be the same as the local size used in creating the 3580 y vector for the matrix-vector product y = Ax. 3581 . n - This value should be the same as the local size used in creating the 3582 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3583 calculated if N is given) For square matrices n is almost always m. 3584 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3585 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3586 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3587 (same value is used for all local rows) 3588 . d_nnz - array containing the number of nonzeros in the various rows of the 3589 DIAGONAL portion of the local submatrix (possibly different for each row) 3590 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3591 The size of this array is equal to the number of local rows, i.e 'm'. 3592 You must leave room for the diagonal entry even if it is zero. 3593 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3594 submatrix (same value is used for all local rows). 3595 - o_nnz - array containing the number of nonzeros in the various rows of the 3596 OFF-DIAGONAL portion of the local submatrix (possibly different for 3597 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3598 structure. The size of this array is equal to the number 3599 of local rows, i.e 'm'. 3600 3601 Output Parameter: 3602 . A - the matrix 3603 3604 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3605 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3606 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3607 3608 Notes: 3609 If the *_nnz parameter is given then the *_nz parameter is ignored 3610 3611 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3612 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3613 storage requirements for this matrix. 3614 3615 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3616 processor than it must be used on all processors that share the object for 3617 that argument. 3618 3619 The user MUST specify either the local or global matrix dimensions 3620 (possibly both). 3621 3622 The parallel matrix is partitioned across processors such that the 3623 first m0 rows belong to process 0, the next m1 rows belong to 3624 process 1, the next m2 rows belong to process 2 etc.. where 3625 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3626 values corresponding to [m x N] submatrix. 3627 3628 The columns are logically partitioned with the n0 columns belonging 3629 to 0th partition, the next n1 columns belonging to the next 3630 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3631 3632 The DIAGONAL portion of the local submatrix on any given processor 3633 is the submatrix corresponding to the rows and columns m,n 3634 corresponding to the given processor. i.e diagonal matrix on 3635 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3636 etc. The remaining portion of the local submatrix [m x (N-n)] 3637 constitute the OFF-DIAGONAL portion. The example below better 3638 illustrates this concept. 3639 3640 For a square global matrix we define each processor's diagonal portion 3641 to be its local rows and the corresponding columns (a square submatrix); 3642 each processor's off-diagonal portion encompasses the remainder of the 3643 local matrix (a rectangular submatrix). 3644 3645 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3646 3647 When calling this routine with a single process communicator, a matrix of 3648 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3649 type of communicator, use the construction mechanism: 3650 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3651 3652 By default, this format uses inodes (identical nodes) when possible. 3653 We search for consecutive rows with the same nonzero structure, thereby 3654 reusing matrix information to achieve increased efficiency. 3655 3656 Options Database Keys: 3657 + -mat_no_inode - Do not use inodes 3658 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3659 - -mat_aij_oneindex - Internally use indexing starting at 1 3660 rather than 0. Note that when calling MatSetValues(), 3661 the user still MUST index entries starting at 0! 3662 3663 3664 Example usage: 3665 3666 Consider the following 8x8 matrix with 34 non-zero values, that is 3667 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3668 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3669 as follows: 3670 3671 .vb 3672 1 2 0 | 0 3 0 | 0 4 3673 Proc0 0 5 6 | 7 0 0 | 8 0 3674 9 0 10 | 11 0 0 | 12 0 3675 ------------------------------------- 3676 13 0 14 | 15 16 17 | 0 0 3677 Proc1 0 18 0 | 19 20 21 | 0 0 3678 0 0 0 | 22 23 0 | 24 0 3679 ------------------------------------- 3680 Proc2 25 26 27 | 0 0 28 | 29 0 3681 30 0 0 | 31 32 33 | 0 34 3682 .ve 3683 3684 This can be represented as a collection of submatrices as: 3685 3686 .vb 3687 A B C 3688 D E F 3689 G H I 3690 .ve 3691 3692 Where the submatrices A,B,C are owned by proc0, D,E,F are 3693 owned by proc1, G,H,I are owned by proc2. 3694 3695 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3696 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3697 The 'M','N' parameters are 8,8, and have the same values on all procs. 3698 3699 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3700 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3701 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3702 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3703 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3704 matrix, ans [DF] as another SeqAIJ matrix. 3705 3706 When d_nz, o_nz parameters are specified, d_nz storage elements are 3707 allocated for every row of the local diagonal submatrix, and o_nz 3708 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3709 One way to choose d_nz and o_nz is to use the max nonzerors per local 3710 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3711 In this case, the values of d_nz,o_nz are: 3712 .vb 3713 proc0 : dnz = 2, o_nz = 2 3714 proc1 : dnz = 3, o_nz = 2 3715 proc2 : dnz = 1, o_nz = 4 3716 .ve 3717 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3718 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3719 for proc3. i.e we are using 12+15+10=37 storage locations to store 3720 34 values. 3721 3722 When d_nnz, o_nnz parameters are specified, the storage is specified 3723 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3724 In the above case the values for d_nnz,o_nnz are: 3725 .vb 3726 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3727 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3728 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3729 .ve 3730 Here the space allocated is sum of all the above values i.e 34, and 3731 hence pre-allocation is perfect. 3732 3733 Level: intermediate 3734 3735 .keywords: matrix, aij, compressed row, sparse, parallel 3736 3737 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3738 MPIAIJ, MatCreateMPIAIJWithArrays() 3739 @*/ 3740 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3741 { 3742 PetscErrorCode ierr; 3743 PetscMPIInt size; 3744 3745 PetscFunctionBegin; 3746 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3747 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3748 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3749 if (size > 1) { 3750 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3751 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3752 } else { 3753 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3754 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3755 } 3756 PetscFunctionReturn(0); 3757 } 3758 3759 #undef __FUNCT__ 3760 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3761 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3762 { 3763 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3764 3765 PetscFunctionBegin; 3766 *Ad = a->A; 3767 *Ao = a->B; 3768 *colmap = a->garray; 3769 PetscFunctionReturn(0); 3770 } 3771 3772 #undef __FUNCT__ 3773 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3774 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3775 { 3776 PetscErrorCode ierr; 3777 PetscInt i; 3778 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3779 3780 PetscFunctionBegin; 3781 if (coloring->ctype == IS_COLORING_GLOBAL) { 3782 ISColoringValue *allcolors,*colors; 3783 ISColoring ocoloring; 3784 3785 /* set coloring for diagonal portion */ 3786 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3787 3788 /* set coloring for off-diagonal portion */ 3789 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3790 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3791 for (i=0; i<a->B->cmap->n; i++) { 3792 colors[i] = allcolors[a->garray[i]]; 3793 } 3794 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3795 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3796 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3797 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3798 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3799 ISColoringValue *colors; 3800 PetscInt *larray; 3801 ISColoring ocoloring; 3802 3803 /* set coloring for diagonal portion */ 3804 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3805 for (i=0; i<a->A->cmap->n; i++) { 3806 larray[i] = i + A->cmap->rstart; 3807 } 3808 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3809 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3810 for (i=0; i<a->A->cmap->n; i++) { 3811 colors[i] = coloring->colors[larray[i]]; 3812 } 3813 ierr = PetscFree(larray);CHKERRQ(ierr); 3814 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3815 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3816 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3817 3818 /* set coloring for off-diagonal portion */ 3819 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3820 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3821 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3822 for (i=0; i<a->B->cmap->n; i++) { 3823 colors[i] = coloring->colors[larray[i]]; 3824 } 3825 ierr = PetscFree(larray);CHKERRQ(ierr); 3826 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3827 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3828 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3829 } else { 3830 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3831 } 3832 3833 PetscFunctionReturn(0); 3834 } 3835 3836 #if defined(PETSC_HAVE_ADIC) 3837 #undef __FUNCT__ 3838 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3839 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3840 { 3841 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3842 PetscErrorCode ierr; 3843 3844 PetscFunctionBegin; 3845 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3846 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3847 PetscFunctionReturn(0); 3848 } 3849 #endif 3850 3851 #undef __FUNCT__ 3852 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3853 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3854 { 3855 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3856 PetscErrorCode ierr; 3857 3858 PetscFunctionBegin; 3859 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3860 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3861 PetscFunctionReturn(0); 3862 } 3863 3864 #undef __FUNCT__ 3865 #define __FUNCT__ "MatMerge" 3866 /*@ 3867 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3868 matrices from each processor 3869 3870 Collective on MPI_Comm 3871 3872 Input Parameters: 3873 + comm - the communicators the parallel matrix will live on 3874 . inmat - the input sequential matrices 3875 . n - number of local columns (or PETSC_DECIDE) 3876 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3877 3878 Output Parameter: 3879 . outmat - the parallel matrix generated 3880 3881 Level: advanced 3882 3883 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3884 3885 @*/ 3886 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3887 { 3888 PetscErrorCode ierr; 3889 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3890 PetscInt *indx; 3891 PetscScalar *values; 3892 3893 PetscFunctionBegin; 3894 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3895 if (scall == MAT_INITIAL_MATRIX){ 3896 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3897 if (n == PETSC_DECIDE){ 3898 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3899 } 3900 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3901 rstart -= m; 3902 3903 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3904 for (i=0;i<m;i++) { 3905 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3906 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3907 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3908 } 3909 /* This routine will ONLY return MPIAIJ type matrix */ 3910 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3911 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3912 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3913 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3914 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3915 3916 } else if (scall == MAT_REUSE_MATRIX){ 3917 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3918 } else { 3919 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3920 } 3921 3922 for (i=0;i<m;i++) { 3923 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3924 Ii = i + rstart; 3925 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3926 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3927 } 3928 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3929 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3930 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3931 3932 PetscFunctionReturn(0); 3933 } 3934 3935 #undef __FUNCT__ 3936 #define __FUNCT__ "MatFileSplit" 3937 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3938 { 3939 PetscErrorCode ierr; 3940 PetscMPIInt rank; 3941 PetscInt m,N,i,rstart,nnz; 3942 size_t len; 3943 const PetscInt *indx; 3944 PetscViewer out; 3945 char *name; 3946 Mat B; 3947 const PetscScalar *values; 3948 3949 PetscFunctionBegin; 3950 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3951 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3952 /* Should this be the type of the diagonal block of A? */ 3953 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3954 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3955 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3956 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3957 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3958 for (i=0;i<m;i++) { 3959 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3960 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3961 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3962 } 3963 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3964 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3965 3966 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3967 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3968 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3969 sprintf(name,"%s.%d",outfile,rank); 3970 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3971 ierr = PetscFree(name); 3972 ierr = MatView(B,out);CHKERRQ(ierr); 3973 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3974 ierr = MatDestroy(B);CHKERRQ(ierr); 3975 PetscFunctionReturn(0); 3976 } 3977 3978 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3979 #undef __FUNCT__ 3980 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3981 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3982 { 3983 PetscErrorCode ierr; 3984 Mat_Merge_SeqsToMPI *merge; 3985 PetscContainer container; 3986 3987 PetscFunctionBegin; 3988 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3989 if (container) { 3990 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3991 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3992 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3993 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3994 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3999 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4000 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4001 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4002 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4003 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4004 4005 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4006 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4007 } 4008 ierr = PetscFree(merge);CHKERRQ(ierr); 4009 4010 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4011 PetscFunctionReturn(0); 4012 } 4013 4014 #include "../src/mat/utils/freespace.h" 4015 #include "petscbt.h" 4016 4017 #undef __FUNCT__ 4018 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4019 /*@C 4020 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4021 matrices from each processor 4022 4023 Collective on MPI_Comm 4024 4025 Input Parameters: 4026 + comm - the communicators the parallel matrix will live on 4027 . seqmat - the input sequential matrices 4028 . m - number of local rows (or PETSC_DECIDE) 4029 . n - number of local columns (or PETSC_DECIDE) 4030 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4031 4032 Output Parameter: 4033 . mpimat - the parallel matrix generated 4034 4035 Level: advanced 4036 4037 Notes: 4038 The dimensions of the sequential matrix in each processor MUST be the same. 4039 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4040 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4041 @*/ 4042 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4043 { 4044 PetscErrorCode ierr; 4045 MPI_Comm comm=((PetscObject)mpimat)->comm; 4046 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4047 PetscMPIInt size,rank,taga,*len_s; 4048 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4049 PetscInt proc,m; 4050 PetscInt **buf_ri,**buf_rj; 4051 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4052 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4053 MPI_Request *s_waits,*r_waits; 4054 MPI_Status *status; 4055 MatScalar *aa=a->a; 4056 MatScalar **abuf_r,*ba_i; 4057 Mat_Merge_SeqsToMPI *merge; 4058 PetscContainer container; 4059 4060 PetscFunctionBegin; 4061 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4062 4063 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4064 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4065 4066 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4067 if (container) { 4068 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4069 } 4070 bi = merge->bi; 4071 bj = merge->bj; 4072 buf_ri = merge->buf_ri; 4073 buf_rj = merge->buf_rj; 4074 4075 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4076 owners = merge->rowmap->range; 4077 len_s = merge->len_s; 4078 4079 /* send and recv matrix values */ 4080 /*-----------------------------*/ 4081 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4082 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4083 4084 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4085 for (proc=0,k=0; proc<size; proc++){ 4086 if (!len_s[proc]) continue; 4087 i = owners[proc]; 4088 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4089 k++; 4090 } 4091 4092 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4093 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4094 ierr = PetscFree(status);CHKERRQ(ierr); 4095 4096 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4097 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4098 4099 /* insert mat values of mpimat */ 4100 /*----------------------------*/ 4101 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4102 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4103 4104 for (k=0; k<merge->nrecv; k++){ 4105 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4106 nrows = *(buf_ri_k[k]); 4107 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4108 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4109 } 4110 4111 /* set values of ba */ 4112 m = merge->rowmap->n; 4113 for (i=0; i<m; i++) { 4114 arow = owners[rank] + i; 4115 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4116 bnzi = bi[i+1] - bi[i]; 4117 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4118 4119 /* add local non-zero vals of this proc's seqmat into ba */ 4120 anzi = ai[arow+1] - ai[arow]; 4121 aj = a->j + ai[arow]; 4122 aa = a->a + ai[arow]; 4123 nextaj = 0; 4124 for (j=0; nextaj<anzi; j++){ 4125 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4126 ba_i[j] += aa[nextaj++]; 4127 } 4128 } 4129 4130 /* add received vals into ba */ 4131 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4132 /* i-th row */ 4133 if (i == *nextrow[k]) { 4134 anzi = *(nextai[k]+1) - *nextai[k]; 4135 aj = buf_rj[k] + *(nextai[k]); 4136 aa = abuf_r[k] + *(nextai[k]); 4137 nextaj = 0; 4138 for (j=0; nextaj<anzi; j++){ 4139 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4140 ba_i[j] += aa[nextaj++]; 4141 } 4142 } 4143 nextrow[k]++; nextai[k]++; 4144 } 4145 } 4146 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4147 } 4148 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4149 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4150 4151 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4152 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4153 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4154 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4155 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4156 PetscFunctionReturn(0); 4157 } 4158 4159 #undef __FUNCT__ 4160 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4161 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4162 { 4163 PetscErrorCode ierr; 4164 Mat B_mpi; 4165 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4166 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4167 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4168 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4169 PetscInt len,proc,*dnz,*onz; 4170 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4171 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4172 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4173 MPI_Status *status; 4174 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4175 PetscBT lnkbt; 4176 Mat_Merge_SeqsToMPI *merge; 4177 PetscContainer container; 4178 4179 PetscFunctionBegin; 4180 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4181 4182 /* make sure it is a PETSc comm */ 4183 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4184 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4185 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4186 4187 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4188 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4189 4190 /* determine row ownership */ 4191 /*---------------------------------------------------------*/ 4192 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4193 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4194 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4195 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4196 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4197 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4198 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4199 4200 m = merge->rowmap->n; 4201 M = merge->rowmap->N; 4202 owners = merge->rowmap->range; 4203 4204 /* determine the number of messages to send, their lengths */ 4205 /*---------------------------------------------------------*/ 4206 len_s = merge->len_s; 4207 4208 len = 0; /* length of buf_si[] */ 4209 merge->nsend = 0; 4210 for (proc=0; proc<size; proc++){ 4211 len_si[proc] = 0; 4212 if (proc == rank){ 4213 len_s[proc] = 0; 4214 } else { 4215 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4216 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4217 } 4218 if (len_s[proc]) { 4219 merge->nsend++; 4220 nrows = 0; 4221 for (i=owners[proc]; i<owners[proc+1]; i++){ 4222 if (ai[i+1] > ai[i]) nrows++; 4223 } 4224 len_si[proc] = 2*(nrows+1); 4225 len += len_si[proc]; 4226 } 4227 } 4228 4229 /* determine the number and length of messages to receive for ij-structure */ 4230 /*-------------------------------------------------------------------------*/ 4231 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4232 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4233 4234 /* post the Irecv of j-structure */ 4235 /*-------------------------------*/ 4236 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4237 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4238 4239 /* post the Isend of j-structure */ 4240 /*--------------------------------*/ 4241 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4242 4243 for (proc=0, k=0; proc<size; proc++){ 4244 if (!len_s[proc]) continue; 4245 i = owners[proc]; 4246 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4247 k++; 4248 } 4249 4250 /* receives and sends of j-structure are complete */ 4251 /*------------------------------------------------*/ 4252 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4253 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4254 4255 /* send and recv i-structure */ 4256 /*---------------------------*/ 4257 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4258 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4259 4260 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4261 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4262 for (proc=0,k=0; proc<size; proc++){ 4263 if (!len_s[proc]) continue; 4264 /* form outgoing message for i-structure: 4265 buf_si[0]: nrows to be sent 4266 [1:nrows]: row index (global) 4267 [nrows+1:2*nrows+1]: i-structure index 4268 */ 4269 /*-------------------------------------------*/ 4270 nrows = len_si[proc]/2 - 1; 4271 buf_si_i = buf_si + nrows+1; 4272 buf_si[0] = nrows; 4273 buf_si_i[0] = 0; 4274 nrows = 0; 4275 for (i=owners[proc]; i<owners[proc+1]; i++){ 4276 anzi = ai[i+1] - ai[i]; 4277 if (anzi) { 4278 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4279 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4280 nrows++; 4281 } 4282 } 4283 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4284 k++; 4285 buf_si += len_si[proc]; 4286 } 4287 4288 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4289 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4290 4291 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4292 for (i=0; i<merge->nrecv; i++){ 4293 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4294 } 4295 4296 ierr = PetscFree(len_si);CHKERRQ(ierr); 4297 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4298 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4299 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4300 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4301 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4302 ierr = PetscFree(status);CHKERRQ(ierr); 4303 4304 /* compute a local seq matrix in each processor */ 4305 /*----------------------------------------------*/ 4306 /* allocate bi array and free space for accumulating nonzero column info */ 4307 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4308 bi[0] = 0; 4309 4310 /* create and initialize a linked list */ 4311 nlnk = N+1; 4312 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4313 4314 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4315 len = 0; 4316 len = ai[owners[rank+1]] - ai[owners[rank]]; 4317 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4318 current_space = free_space; 4319 4320 /* determine symbolic info for each local row */ 4321 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4322 4323 for (k=0; k<merge->nrecv; k++){ 4324 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4325 nrows = *buf_ri_k[k]; 4326 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4327 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4328 } 4329 4330 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4331 len = 0; 4332 for (i=0;i<m;i++) { 4333 bnzi = 0; 4334 /* add local non-zero cols of this proc's seqmat into lnk */ 4335 arow = owners[rank] + i; 4336 anzi = ai[arow+1] - ai[arow]; 4337 aj = a->j + ai[arow]; 4338 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4339 bnzi += nlnk; 4340 /* add received col data into lnk */ 4341 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4342 if (i == *nextrow[k]) { /* i-th row */ 4343 anzi = *(nextai[k]+1) - *nextai[k]; 4344 aj = buf_rj[k] + *nextai[k]; 4345 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4346 bnzi += nlnk; 4347 nextrow[k]++; nextai[k]++; 4348 } 4349 } 4350 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4351 4352 /* if free space is not available, make more free space */ 4353 if (current_space->local_remaining<bnzi) { 4354 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4355 nspacedouble++; 4356 } 4357 /* copy data into free space, then initialize lnk */ 4358 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4359 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4360 4361 current_space->array += bnzi; 4362 current_space->local_used += bnzi; 4363 current_space->local_remaining -= bnzi; 4364 4365 bi[i+1] = bi[i] + bnzi; 4366 } 4367 4368 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4369 4370 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4371 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4372 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4373 4374 /* create symbolic parallel matrix B_mpi */ 4375 /*---------------------------------------*/ 4376 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4377 if (n==PETSC_DECIDE) { 4378 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4379 } else { 4380 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4381 } 4382 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4383 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4384 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4385 4386 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4387 B_mpi->assembled = PETSC_FALSE; 4388 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4389 merge->bi = bi; 4390 merge->bj = bj; 4391 merge->buf_ri = buf_ri; 4392 merge->buf_rj = buf_rj; 4393 merge->coi = PETSC_NULL; 4394 merge->coj = PETSC_NULL; 4395 merge->owners_co = PETSC_NULL; 4396 4397 /* attach the supporting struct to B_mpi for reuse */ 4398 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4399 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4400 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4401 *mpimat = B_mpi; 4402 4403 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4404 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4405 PetscFunctionReturn(0); 4406 } 4407 4408 #undef __FUNCT__ 4409 #define __FUNCT__ "MatMerge_SeqsToMPI" 4410 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4411 { 4412 PetscErrorCode ierr; 4413 4414 PetscFunctionBegin; 4415 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4416 if (scall == MAT_INITIAL_MATRIX){ 4417 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4418 } 4419 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4420 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4421 PetscFunctionReturn(0); 4422 } 4423 4424 #undef __FUNCT__ 4425 #define __FUNCT__ "MatGetLocalMat" 4426 /*@ 4427 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4428 4429 Not Collective 4430 4431 Input Parameters: 4432 + A - the matrix 4433 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4434 4435 Output Parameter: 4436 . A_loc - the local sequential matrix generated 4437 4438 Level: developer 4439 4440 @*/ 4441 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4442 { 4443 PetscErrorCode ierr; 4444 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4445 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4446 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4447 MatScalar *aa=a->a,*ba=b->a,*cam; 4448 PetscScalar *ca; 4449 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4450 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4451 4452 PetscFunctionBegin; 4453 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4454 if (scall == MAT_INITIAL_MATRIX){ 4455 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4456 ci[0] = 0; 4457 for (i=0; i<am; i++){ 4458 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4459 } 4460 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4461 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4462 k = 0; 4463 for (i=0; i<am; i++) { 4464 ncols_o = bi[i+1] - bi[i]; 4465 ncols_d = ai[i+1] - ai[i]; 4466 /* off-diagonal portion of A */ 4467 for (jo=0; jo<ncols_o; jo++) { 4468 col = cmap[*bj]; 4469 if (col >= cstart) break; 4470 cj[k] = col; bj++; 4471 ca[k++] = *ba++; 4472 } 4473 /* diagonal portion of A */ 4474 for (j=0; j<ncols_d; j++) { 4475 cj[k] = cstart + *aj++; 4476 ca[k++] = *aa++; 4477 } 4478 /* off-diagonal portion of A */ 4479 for (j=jo; j<ncols_o; j++) { 4480 cj[k] = cmap[*bj++]; 4481 ca[k++] = *ba++; 4482 } 4483 } 4484 /* put together the new matrix */ 4485 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4486 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4487 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4488 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4489 mat->free_a = PETSC_TRUE; 4490 mat->free_ij = PETSC_TRUE; 4491 mat->nonew = 0; 4492 } else if (scall == MAT_REUSE_MATRIX){ 4493 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4494 ci = mat->i; cj = mat->j; cam = mat->a; 4495 for (i=0; i<am; i++) { 4496 /* off-diagonal portion of A */ 4497 ncols_o = bi[i+1] - bi[i]; 4498 for (jo=0; jo<ncols_o; jo++) { 4499 col = cmap[*bj]; 4500 if (col >= cstart) break; 4501 *cam++ = *ba++; bj++; 4502 } 4503 /* diagonal portion of A */ 4504 ncols_d = ai[i+1] - ai[i]; 4505 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4506 /* off-diagonal portion of A */ 4507 for (j=jo; j<ncols_o; j++) { 4508 *cam++ = *ba++; bj++; 4509 } 4510 } 4511 } else { 4512 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4513 } 4514 4515 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4516 PetscFunctionReturn(0); 4517 } 4518 4519 #undef __FUNCT__ 4520 #define __FUNCT__ "MatGetLocalMatCondensed" 4521 /*@C 4522 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4523 4524 Not Collective 4525 4526 Input Parameters: 4527 + A - the matrix 4528 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4529 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4530 4531 Output Parameter: 4532 . A_loc - the local sequential matrix generated 4533 4534 Level: developer 4535 4536 @*/ 4537 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4538 { 4539 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4540 PetscErrorCode ierr; 4541 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4542 IS isrowa,iscola; 4543 Mat *aloc; 4544 4545 PetscFunctionBegin; 4546 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4547 if (!row){ 4548 start = A->rmap->rstart; end = A->rmap->rend; 4549 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4550 } else { 4551 isrowa = *row; 4552 } 4553 if (!col){ 4554 start = A->cmap->rstart; 4555 cmap = a->garray; 4556 nzA = a->A->cmap->n; 4557 nzB = a->B->cmap->n; 4558 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4559 ncols = 0; 4560 for (i=0; i<nzB; i++) { 4561 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4562 else break; 4563 } 4564 imark = i; 4565 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4566 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4567 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4568 ierr = PetscFree(idx);CHKERRQ(ierr); 4569 } else { 4570 iscola = *col; 4571 } 4572 if (scall != MAT_INITIAL_MATRIX){ 4573 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4574 aloc[0] = *A_loc; 4575 } 4576 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4577 *A_loc = aloc[0]; 4578 ierr = PetscFree(aloc);CHKERRQ(ierr); 4579 if (!row){ 4580 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4581 } 4582 if (!col){ 4583 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4584 } 4585 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4586 PetscFunctionReturn(0); 4587 } 4588 4589 #undef __FUNCT__ 4590 #define __FUNCT__ "MatGetBrowsOfAcols" 4591 /*@C 4592 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4593 4594 Collective on Mat 4595 4596 Input Parameters: 4597 + A,B - the matrices in mpiaij format 4598 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4599 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4600 4601 Output Parameter: 4602 + rowb, colb - index sets of rows and columns of B to extract 4603 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4604 - B_seq - the sequential matrix generated 4605 4606 Level: developer 4607 4608 @*/ 4609 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4610 { 4611 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4612 PetscErrorCode ierr; 4613 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4614 IS isrowb,iscolb; 4615 Mat *bseq; 4616 4617 PetscFunctionBegin; 4618 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4619 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4620 } 4621 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4622 4623 if (scall == MAT_INITIAL_MATRIX){ 4624 start = A->cmap->rstart; 4625 cmap = a->garray; 4626 nzA = a->A->cmap->n; 4627 nzB = a->B->cmap->n; 4628 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4629 ncols = 0; 4630 for (i=0; i<nzB; i++) { /* row < local row index */ 4631 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4632 else break; 4633 } 4634 imark = i; 4635 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4636 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4637 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4638 ierr = PetscFree(idx);CHKERRQ(ierr); 4639 *brstart = imark; 4640 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4641 } else { 4642 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4643 isrowb = *rowb; iscolb = *colb; 4644 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4645 bseq[0] = *B_seq; 4646 } 4647 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4648 *B_seq = bseq[0]; 4649 ierr = PetscFree(bseq);CHKERRQ(ierr); 4650 if (!rowb){ 4651 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4652 } else { 4653 *rowb = isrowb; 4654 } 4655 if (!colb){ 4656 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4657 } else { 4658 *colb = iscolb; 4659 } 4660 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4661 PetscFunctionReturn(0); 4662 } 4663 4664 #undef __FUNCT__ 4665 #define __FUNCT__ "MatGetBrowsOfAoCols" 4666 /*@C 4667 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4668 of the OFF-DIAGONAL portion of local A 4669 4670 Collective on Mat 4671 4672 Input Parameters: 4673 + A,B - the matrices in mpiaij format 4674 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4675 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4676 . startsj_r - similar to startsj for receives 4677 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4678 4679 Output Parameter: 4680 + B_oth - the sequential matrix generated 4681 4682 Level: developer 4683 4684 @*/ 4685 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4686 { 4687 VecScatter_MPI_General *gen_to,*gen_from; 4688 PetscErrorCode ierr; 4689 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4690 Mat_SeqAIJ *b_oth; 4691 VecScatter ctx=a->Mvctx; 4692 MPI_Comm comm=((PetscObject)ctx)->comm; 4693 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4694 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4695 PetscScalar *rvalues,*svalues; 4696 MatScalar *b_otha,*bufa,*bufA; 4697 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4698 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4699 MPI_Status *sstatus,rstatus; 4700 PetscMPIInt jj; 4701 PetscInt *cols,sbs,rbs; 4702 PetscScalar *vals; 4703 4704 PetscFunctionBegin; 4705 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4706 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4707 } 4708 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4709 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4710 4711 gen_to = (VecScatter_MPI_General*)ctx->todata; 4712 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4713 rvalues = gen_from->values; /* holds the length of receiving row */ 4714 svalues = gen_to->values; /* holds the length of sending row */ 4715 nrecvs = gen_from->n; 4716 nsends = gen_to->n; 4717 4718 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4719 srow = gen_to->indices; /* local row index to be sent */ 4720 sstarts = gen_to->starts; 4721 sprocs = gen_to->procs; 4722 sstatus = gen_to->sstatus; 4723 sbs = gen_to->bs; 4724 rstarts = gen_from->starts; 4725 rprocs = gen_from->procs; 4726 rbs = gen_from->bs; 4727 4728 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4729 if (scall == MAT_INITIAL_MATRIX){ 4730 /* i-array */ 4731 /*---------*/ 4732 /* post receives */ 4733 for (i=0; i<nrecvs; i++){ 4734 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4735 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4736 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4737 } 4738 4739 /* pack the outgoing message */ 4740 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4741 sstartsj[0] = 0; rstartsj[0] = 0; 4742 len = 0; /* total length of j or a array to be sent */ 4743 k = 0; 4744 for (i=0; i<nsends; i++){ 4745 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4746 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4747 for (j=0; j<nrows; j++) { 4748 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4749 for (l=0; l<sbs; l++){ 4750 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4751 rowlen[j*sbs+l] = ncols; 4752 len += ncols; 4753 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4754 } 4755 k++; 4756 } 4757 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4758 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4759 } 4760 /* recvs and sends of i-array are completed */ 4761 i = nrecvs; 4762 while (i--) { 4763 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4764 } 4765 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4766 4767 /* allocate buffers for sending j and a arrays */ 4768 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4769 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4770 4771 /* create i-array of B_oth */ 4772 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4773 b_othi[0] = 0; 4774 len = 0; /* total length of j or a array to be received */ 4775 k = 0; 4776 for (i=0; i<nrecvs; i++){ 4777 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4778 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4779 for (j=0; j<nrows; j++) { 4780 b_othi[k+1] = b_othi[k] + rowlen[j]; 4781 len += rowlen[j]; k++; 4782 } 4783 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4784 } 4785 4786 /* allocate space for j and a arrrays of B_oth */ 4787 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4788 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4789 4790 /* j-array */ 4791 /*---------*/ 4792 /* post receives of j-array */ 4793 for (i=0; i<nrecvs; i++){ 4794 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4795 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4796 } 4797 4798 /* pack the outgoing message j-array */ 4799 k = 0; 4800 for (i=0; i<nsends; i++){ 4801 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4802 bufJ = bufj+sstartsj[i]; 4803 for (j=0; j<nrows; j++) { 4804 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4805 for (ll=0; ll<sbs; ll++){ 4806 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4807 for (l=0; l<ncols; l++){ 4808 *bufJ++ = cols[l]; 4809 } 4810 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4811 } 4812 } 4813 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4814 } 4815 4816 /* recvs and sends of j-array are completed */ 4817 i = nrecvs; 4818 while (i--) { 4819 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4820 } 4821 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4822 } else if (scall == MAT_REUSE_MATRIX){ 4823 sstartsj = *startsj; 4824 rstartsj = *startsj_r; 4825 bufa = *bufa_ptr; 4826 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4827 b_otha = b_oth->a; 4828 } else { 4829 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4830 } 4831 4832 /* a-array */ 4833 /*---------*/ 4834 /* post receives of a-array */ 4835 for (i=0; i<nrecvs; i++){ 4836 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4837 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4838 } 4839 4840 /* pack the outgoing message a-array */ 4841 k = 0; 4842 for (i=0; i<nsends; i++){ 4843 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4844 bufA = bufa+sstartsj[i]; 4845 for (j=0; j<nrows; j++) { 4846 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4847 for (ll=0; ll<sbs; ll++){ 4848 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4849 for (l=0; l<ncols; l++){ 4850 *bufA++ = vals[l]; 4851 } 4852 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4853 } 4854 } 4855 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4856 } 4857 /* recvs and sends of a-array are completed */ 4858 i = nrecvs; 4859 while (i--) { 4860 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4861 } 4862 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4863 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4864 4865 if (scall == MAT_INITIAL_MATRIX){ 4866 /* put together the new matrix */ 4867 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4868 4869 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4870 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4871 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4872 b_oth->free_a = PETSC_TRUE; 4873 b_oth->free_ij = PETSC_TRUE; 4874 b_oth->nonew = 0; 4875 4876 ierr = PetscFree(bufj);CHKERRQ(ierr); 4877 if (!startsj || !bufa_ptr){ 4878 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4879 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4880 } else { 4881 *startsj = sstartsj; 4882 *startsj_r = rstartsj; 4883 *bufa_ptr = bufa; 4884 } 4885 } 4886 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4887 PetscFunctionReturn(0); 4888 } 4889 4890 #undef __FUNCT__ 4891 #define __FUNCT__ "MatGetCommunicationStructs" 4892 /*@C 4893 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4894 4895 Not Collective 4896 4897 Input Parameters: 4898 . A - The matrix in mpiaij format 4899 4900 Output Parameter: 4901 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4902 . colmap - A map from global column index to local index into lvec 4903 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4904 4905 Level: developer 4906 4907 @*/ 4908 #if defined (PETSC_USE_CTABLE) 4909 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4910 #else 4911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4912 #endif 4913 { 4914 Mat_MPIAIJ *a; 4915 4916 PetscFunctionBegin; 4917 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4918 PetscValidPointer(lvec, 2) 4919 PetscValidPointer(colmap, 3) 4920 PetscValidPointer(multScatter, 4) 4921 a = (Mat_MPIAIJ *) A->data; 4922 if (lvec) *lvec = a->lvec; 4923 if (colmap) *colmap = a->colmap; 4924 if (multScatter) *multScatter = a->Mvctx; 4925 PetscFunctionReturn(0); 4926 } 4927 4928 EXTERN_C_BEGIN 4929 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4930 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4931 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4932 EXTERN_C_END 4933 4934 #undef __FUNCT__ 4935 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4936 /* 4937 Computes (B'*A')' since computing B*A directly is untenable 4938 4939 n p p 4940 ( ) ( ) ( ) 4941 m ( A ) * n ( B ) = m ( C ) 4942 ( ) ( ) ( ) 4943 4944 */ 4945 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4946 { 4947 PetscErrorCode ierr; 4948 Mat At,Bt,Ct; 4949 4950 PetscFunctionBegin; 4951 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4952 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4953 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4954 ierr = MatDestroy(At);CHKERRQ(ierr); 4955 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4956 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4957 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4958 PetscFunctionReturn(0); 4959 } 4960 4961 #undef __FUNCT__ 4962 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4963 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4964 { 4965 PetscErrorCode ierr; 4966 PetscInt m=A->rmap->n,n=B->cmap->n; 4967 Mat Cmat; 4968 4969 PetscFunctionBegin; 4970 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4971 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4972 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4973 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4974 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4975 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4976 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4977 *C = Cmat; 4978 PetscFunctionReturn(0); 4979 } 4980 4981 /* ----------------------------------------------------------------*/ 4982 #undef __FUNCT__ 4983 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4984 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4985 { 4986 PetscErrorCode ierr; 4987 4988 PetscFunctionBegin; 4989 if (scall == MAT_INITIAL_MATRIX){ 4990 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4991 } 4992 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4993 PetscFunctionReturn(0); 4994 } 4995 4996 EXTERN_C_BEGIN 4997 #if defined(PETSC_HAVE_MUMPS) 4998 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4999 #endif 5000 #if defined(PETSC_HAVE_PASTIX) 5001 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5002 #endif 5003 #if defined(PETSC_HAVE_SUPERLU_DIST) 5004 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5005 #endif 5006 #if defined(PETSC_HAVE_SPOOLES) 5007 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5008 #endif 5009 EXTERN_C_END 5010 5011 /*MC 5012 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5013 5014 Options Database Keys: 5015 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5016 5017 Level: beginner 5018 5019 .seealso: MatCreateMPIAIJ() 5020 M*/ 5021 5022 EXTERN_C_BEGIN 5023 #undef __FUNCT__ 5024 #define __FUNCT__ "MatCreate_MPIAIJ" 5025 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5026 { 5027 Mat_MPIAIJ *b; 5028 PetscErrorCode ierr; 5029 PetscMPIInt size; 5030 5031 PetscFunctionBegin; 5032 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5033 5034 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5035 B->data = (void*)b; 5036 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5037 B->rmap->bs = 1; 5038 B->assembled = PETSC_FALSE; 5039 B->mapping = 0; 5040 5041 B->insertmode = NOT_SET_VALUES; 5042 b->size = size; 5043 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5044 5045 /* build cache for off array entries formed */ 5046 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5047 b->donotstash = PETSC_FALSE; 5048 b->colmap = 0; 5049 b->garray = 0; 5050 b->roworiented = PETSC_TRUE; 5051 5052 /* stuff used for matrix vector multiply */ 5053 b->lvec = PETSC_NULL; 5054 b->Mvctx = PETSC_NULL; 5055 5056 /* stuff for MatGetRow() */ 5057 b->rowindices = 0; 5058 b->rowvalues = 0; 5059 b->getrowactive = PETSC_FALSE; 5060 5061 #if defined(PETSC_HAVE_SPOOLES) 5062 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5063 "MatGetFactor_mpiaij_spooles", 5064 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5065 #endif 5066 #if defined(PETSC_HAVE_MUMPS) 5067 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5068 "MatGetFactor_mpiaij_mumps", 5069 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5070 #endif 5071 #if defined(PETSC_HAVE_PASTIX) 5072 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5073 "MatGetFactor_mpiaij_pastix", 5074 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5075 #endif 5076 #if defined(PETSC_HAVE_SUPERLU_DIST) 5077 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5078 "MatGetFactor_mpiaij_superlu_dist", 5079 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5080 #endif 5081 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5082 "MatStoreValues_MPIAIJ", 5083 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5084 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5085 "MatRetrieveValues_MPIAIJ", 5086 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5087 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5088 "MatGetDiagonalBlock_MPIAIJ", 5089 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5090 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5091 "MatIsTranspose_MPIAIJ", 5092 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5094 "MatMPIAIJSetPreallocation_MPIAIJ", 5095 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5096 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5097 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5098 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5100 "MatDiagonalScaleLocal_MPIAIJ", 5101 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5102 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5103 "MatConvert_MPIAIJ_MPICSRPERM", 5104 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5106 "MatConvert_MPIAIJ_MPICRL", 5107 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5108 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5109 "MatConvert_MPIAIJ_MPISBAIJ", 5110 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5112 "MatMatMult_MPIDense_MPIAIJ", 5113 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5114 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5115 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5116 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5117 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5118 "MatMatMultNumeric_MPIDense_MPIAIJ", 5119 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5120 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5121 PetscFunctionReturn(0); 5122 } 5123 EXTERN_C_END 5124 5125 #undef __FUNCT__ 5126 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5127 /*@ 5128 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5129 and "off-diagonal" part of the matrix in CSR format. 5130 5131 Collective on MPI_Comm 5132 5133 Input Parameters: 5134 + comm - MPI communicator 5135 . m - number of local rows (Cannot be PETSC_DECIDE) 5136 . n - This value should be the same as the local size used in creating the 5137 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5138 calculated if N is given) For square matrices n is almost always m. 5139 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5140 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5141 . i - row indices for "diagonal" portion of matrix 5142 . j - column indices 5143 . a - matrix values 5144 . oi - row indices for "off-diagonal" portion of matrix 5145 . oj - column indices 5146 - oa - matrix values 5147 5148 Output Parameter: 5149 . mat - the matrix 5150 5151 Level: advanced 5152 5153 Notes: 5154 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5155 5156 The i and j indices are 0 based 5157 5158 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5159 5160 This sets local rows and cannot be used to set off-processor values. 5161 5162 You cannot later use MatSetValues() to change values in this matrix. 5163 5164 .keywords: matrix, aij, compressed row, sparse, parallel 5165 5166 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5167 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5168 @*/ 5169 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5170 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5171 { 5172 PetscErrorCode ierr; 5173 Mat_MPIAIJ *maij; 5174 5175 PetscFunctionBegin; 5176 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5177 if (i[0]) { 5178 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5179 } 5180 if (oi[0]) { 5181 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5182 } 5183 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5184 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5185 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5186 maij = (Mat_MPIAIJ*) (*mat)->data; 5187 maij->donotstash = PETSC_TRUE; 5188 (*mat)->preallocated = PETSC_TRUE; 5189 5190 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5191 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5192 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5193 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5194 5195 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5196 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5197 5198 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5199 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5200 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5201 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5202 5203 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5204 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5205 PetscFunctionReturn(0); 5206 } 5207 5208 /* 5209 Special version for direct calls from Fortran 5210 */ 5211 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5212 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5213 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5214 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5215 #endif 5216 5217 /* Change these macros so can be used in void function */ 5218 #undef CHKERRQ 5219 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5220 #undef SETERRQ2 5221 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5222 #undef SETERRQ 5223 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5224 5225 EXTERN_C_BEGIN 5226 #undef __FUNCT__ 5227 #define __FUNCT__ "matsetvaluesmpiaij_" 5228 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5229 { 5230 Mat mat = *mmat; 5231 PetscInt m = *mm, n = *mn; 5232 InsertMode addv = *maddv; 5233 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5234 PetscScalar value; 5235 PetscErrorCode ierr; 5236 5237 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5238 if (mat->insertmode == NOT_SET_VALUES) { 5239 mat->insertmode = addv; 5240 } 5241 #if defined(PETSC_USE_DEBUG) 5242 else if (mat->insertmode != addv) { 5243 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5244 } 5245 #endif 5246 { 5247 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5248 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5249 PetscTruth roworiented = aij->roworiented; 5250 5251 /* Some Variables required in the macro */ 5252 Mat A = aij->A; 5253 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5254 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5255 MatScalar *aa = a->a; 5256 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5257 Mat B = aij->B; 5258 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5259 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5260 MatScalar *ba = b->a; 5261 5262 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5263 PetscInt nonew = a->nonew; 5264 MatScalar *ap1,*ap2; 5265 5266 PetscFunctionBegin; 5267 for (i=0; i<m; i++) { 5268 if (im[i] < 0) continue; 5269 #if defined(PETSC_USE_DEBUG) 5270 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5271 #endif 5272 if (im[i] >= rstart && im[i] < rend) { 5273 row = im[i] - rstart; 5274 lastcol1 = -1; 5275 rp1 = aj + ai[row]; 5276 ap1 = aa + ai[row]; 5277 rmax1 = aimax[row]; 5278 nrow1 = ailen[row]; 5279 low1 = 0; 5280 high1 = nrow1; 5281 lastcol2 = -1; 5282 rp2 = bj + bi[row]; 5283 ap2 = ba + bi[row]; 5284 rmax2 = bimax[row]; 5285 nrow2 = bilen[row]; 5286 low2 = 0; 5287 high2 = nrow2; 5288 5289 for (j=0; j<n; j++) { 5290 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5291 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5292 if (in[j] >= cstart && in[j] < cend){ 5293 col = in[j] - cstart; 5294 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5295 } else if (in[j] < 0) continue; 5296 #if defined(PETSC_USE_DEBUG) 5297 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5298 #endif 5299 else { 5300 if (mat->was_assembled) { 5301 if (!aij->colmap) { 5302 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5303 } 5304 #if defined (PETSC_USE_CTABLE) 5305 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5306 col--; 5307 #else 5308 col = aij->colmap[in[j]] - 1; 5309 #endif 5310 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5311 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5312 col = in[j]; 5313 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5314 B = aij->B; 5315 b = (Mat_SeqAIJ*)B->data; 5316 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5317 rp2 = bj + bi[row]; 5318 ap2 = ba + bi[row]; 5319 rmax2 = bimax[row]; 5320 nrow2 = bilen[row]; 5321 low2 = 0; 5322 high2 = nrow2; 5323 bm = aij->B->rmap->n; 5324 ba = b->a; 5325 } 5326 } else col = in[j]; 5327 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5328 } 5329 } 5330 } else { 5331 if (!aij->donotstash) { 5332 if (roworiented) { 5333 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5334 } else { 5335 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5336 } 5337 } 5338 } 5339 }} 5340 PetscFunctionReturnVoid(); 5341 } 5342 EXTERN_C_END 5343 5344