1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatDistribute_MPIAIJ" 7 /* 8 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 9 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 10 11 Only for square matrices 12 */ 13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 14 { 15 PetscMPIInt rank,size; 16 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 17 PetscErrorCode ierr; 18 Mat mat; 19 Mat_SeqAIJ *gmata; 20 PetscMPIInt tag; 21 MPI_Status status; 22 PetscTruth aij; 23 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 24 25 PetscFunctionBegin; 26 CHKMEMQ; 27 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 28 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 29 if (!rank) { 30 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 31 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 32 } 33 if (reuse == MAT_INITIAL_MATRIX) { 34 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 35 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 36 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 37 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 38 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 39 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 40 rowners[0] = 0; 41 for (i=2; i<=size; i++) { 42 rowners[i] += rowners[i-1]; 43 } 44 rstart = rowners[rank]; 45 rend = rowners[rank+1]; 46 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 47 if (!rank) { 48 gmata = (Mat_SeqAIJ*) gmat->data; 49 /* send row lengths to all processors */ 50 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 51 for (i=1; i<size; i++) { 52 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 53 } 54 /* determine number diagonal and off-diagonal counts */ 55 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 56 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 57 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 58 jj = 0; 59 for (i=0; i<m; i++) { 60 for (j=0; j<dlens[i]; j++) { 61 if (gmata->j[jj] < rstart) ld[i]++; 62 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 63 jj++; 64 } 65 } 66 /* send column indices to other processes */ 67 for (i=1; i<size; i++) { 68 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 69 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 70 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 } 72 73 /* send numerical values to other processes */ 74 for (i=1; i<size; i++) { 75 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 76 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 77 } 78 gmataa = gmata->a; 79 gmataj = gmata->j; 80 81 } else { 82 /* receive row lengths */ 83 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 84 /* receive column indices */ 85 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 86 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 87 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 88 /* determine number diagonal and off-diagonal counts */ 89 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 90 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 91 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 92 jj = 0; 93 for (i=0; i<m; i++) { 94 for (j=0; j<dlens[i]; j++) { 95 if (gmataj[jj] < rstart) ld[i]++; 96 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 97 jj++; 98 } 99 } 100 /* receive numerical values */ 101 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 103 } 104 /* set preallocation */ 105 for (i=0; i<m; i++) { 106 dlens[i] -= olens[i]; 107 } 108 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 109 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 110 111 for (i=0; i<m; i++) { 112 dlens[i] += olens[i]; 113 } 114 cnt = 0; 115 for (i=0; i<m; i++) { 116 row = rstart + i; 117 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 118 cnt += dlens[i]; 119 } 120 if (rank) { 121 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 122 } 123 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 124 ierr = PetscFree(rowners);CHKERRQ(ierr); 125 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 126 *inmat = mat; 127 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 128 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 129 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 130 mat = *inmat; 131 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 132 if (!rank) { 133 /* send numerical values to other processes */ 134 gmata = (Mat_SeqAIJ*) gmat->data; 135 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 136 gmataa = gmata->a; 137 for (i=1; i<size; i++) { 138 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 139 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 140 } 141 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 142 } else { 143 /* receive numerical values from process 0*/ 144 nz = Ad->nz + Ao->nz; 145 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 146 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 147 } 148 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 149 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 150 ad = Ad->a; 151 ao = Ao->a; 152 if (mat->rmap->n) { 153 i = 0; 154 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 155 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 156 } 157 for (i=1; i<mat->rmap->n; i++) { 158 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 159 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 160 } 161 i--; 162 if (mat->rmap->n) { 163 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 164 } 165 if (rank) { 166 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 167 } 168 } 169 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 CHKMEMQ; 172 PetscFunctionReturn(0); 173 } 174 175 /* 176 Local utility routine that creates a mapping from the global column 177 number to the local number in the off-diagonal part of the local 178 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 179 a slightly higher hash table cost; without it it is not scalable (each processor 180 has an order N integer array but is fast to acess. 181 */ 182 #undef __FUNCT__ 183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 187 PetscErrorCode ierr; 188 PetscInt n = aij->B->cmap->n,i; 189 190 PetscFunctionBegin; 191 #if defined (PETSC_USE_CTABLE) 192 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 193 for (i=0; i<n; i++){ 194 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 195 } 196 #else 197 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 199 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 201 #endif 202 PetscFunctionReturn(0); 203 } 204 205 206 #define CHUNKSIZE 15 207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 208 { \ 209 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 210 lastcol1 = col;\ 211 while (high1-low1 > 5) { \ 212 t = (low1+high1)/2; \ 213 if (rp1[t] > col) high1 = t; \ 214 else low1 = t; \ 215 } \ 216 for (_i=low1; _i<high1; _i++) { \ 217 if (rp1[_i] > col) break; \ 218 if (rp1[_i] == col) { \ 219 if (addv == ADD_VALUES) ap1[_i] += value; \ 220 else ap1[_i] = value; \ 221 goto a_noinsert; \ 222 } \ 223 } \ 224 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 225 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 226 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 227 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 228 N = nrow1++ - 1; a->nz++; high1++; \ 229 /* shift up all the later entries in this row */ \ 230 for (ii=N; ii>=_i; ii--) { \ 231 rp1[ii+1] = rp1[ii]; \ 232 ap1[ii+1] = ap1[ii]; \ 233 } \ 234 rp1[_i] = col; \ 235 ap1[_i] = value; \ 236 a_noinsert: ; \ 237 ailen[row] = nrow1; \ 238 } 239 240 241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 242 { \ 243 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 244 lastcol2 = col;\ 245 while (high2-low2 > 5) { \ 246 t = (low2+high2)/2; \ 247 if (rp2[t] > col) high2 = t; \ 248 else low2 = t; \ 249 } \ 250 for (_i=low2; _i<high2; _i++) { \ 251 if (rp2[_i] > col) break; \ 252 if (rp2[_i] == col) { \ 253 if (addv == ADD_VALUES) ap2[_i] += value; \ 254 else ap2[_i] = value; \ 255 goto b_noinsert; \ 256 } \ 257 } \ 258 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 261 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 262 N = nrow2++ - 1; b->nz++; high2++; \ 263 /* shift up all the later entries in this row */ \ 264 for (ii=N; ii>=_i; ii--) { \ 265 rp2[ii+1] = rp2[ii]; \ 266 ap2[ii+1] = ap2[ii]; \ 267 } \ 268 rp2[_i] = col; \ 269 ap2[_i] = value; \ 270 b_noinsert: ; \ 271 bilen[row] = nrow2; \ 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 277 { 278 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 280 PetscErrorCode ierr; 281 PetscInt l,*garray = mat->garray,diag; 282 283 PetscFunctionBegin; 284 /* code only works for square matrices A */ 285 286 /* find size of row to the left of the diagonal part */ 287 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 288 row = row - diag; 289 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 290 if (garray[b->j[b->i[row]+l]] > diag) break; 291 } 292 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 293 294 /* diagonal part */ 295 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 296 297 /* right of diagonal part */ 298 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIAIJ" 304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscScalar value; 308 PetscErrorCode ierr; 309 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 310 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 311 PetscTruth roworiented = aij->roworiented; 312 313 /* Some Variables required in the macro */ 314 Mat A = aij->A; 315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 316 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 317 MatScalar *aa = a->a; 318 PetscTruth ignorezeroentries = a->ignorezeroentries; 319 Mat B = aij->B; 320 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 321 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 322 MatScalar *ba = b->a; 323 324 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 325 PetscInt nonew = a->nonew; 326 MatScalar *ap1,*ap2; 327 328 PetscFunctionBegin; 329 if (v) PetscValidScalarPointer(v,6); 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 544 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 550 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 551 { 552 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 553 PetscErrorCode ierr; 554 555 PetscFunctionBegin; 556 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 557 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatZeroRows_MPIAIJ" 563 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 564 { 565 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 568 PetscInt i,*owners = A->rmap->range; 569 PetscInt *nprocs,j,idx,nsends,row; 570 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 571 PetscInt *rvalues,count,base,slen,*source; 572 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 573 MPI_Comm comm = ((PetscObject)A)->comm; 574 MPI_Request *send_waits,*recv_waits; 575 MPI_Status recv_status,*send_status; 576 #if defined(PETSC_DEBUG) 577 PetscTruth found = PETSC_FALSE; 578 #endif 579 580 PetscFunctionBegin; 581 /* first count number of contributors to each processor */ 582 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 583 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 584 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 585 j = 0; 586 for (i=0; i<N; i++) { 587 if (lastidx > (idx = rows[i])) j = 0; 588 lastidx = idx; 589 for (; j<size; j++) { 590 if (idx >= owners[j] && idx < owners[j+1]) { 591 nprocs[2*j]++; 592 nprocs[2*j+1] = 1; 593 owner[i] = j; 594 #if defined(PETSC_DEBUG) 595 found = PETSC_TRUE; 596 #endif 597 break; 598 } 599 } 600 #if defined(PETSC_DEBUG) 601 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 602 found = PETSC_FALSE; 603 #endif 604 } 605 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 606 607 /* inform other processors of number of messages and max length*/ 608 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 609 610 /* post receives: */ 611 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 612 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 613 for (i=0; i<nrecvs; i++) { 614 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 615 } 616 617 /* do sends: 618 1) starts[i] gives the starting index in svalues for stuff going to 619 the ith processor 620 */ 621 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 622 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 623 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 624 starts[0] = 0; 625 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 626 for (i=0; i<N; i++) { 627 svalues[starts[owner[i]]++] = rows[i]; 628 } 629 630 starts[0] = 0; 631 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 632 count = 0; 633 for (i=0; i<size; i++) { 634 if (nprocs[2*i+1]) { 635 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 636 } 637 } 638 ierr = PetscFree(starts);CHKERRQ(ierr); 639 640 base = owners[rank]; 641 642 /* wait on receives */ 643 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 644 count = nrecvs; slen = 0; 645 while (count) { 646 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 647 /* unpack receives into our local space */ 648 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 649 source[imdex] = recv_status.MPI_SOURCE; 650 lens[imdex] = n; 651 slen += n; 652 count--; 653 } 654 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 655 656 /* move the data into the send scatter */ 657 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 658 count = 0; 659 for (i=0; i<nrecvs; i++) { 660 values = rvalues + i*nmax; 661 for (j=0; j<lens[i]; j++) { 662 lrows[count++] = values[j] - base; 663 } 664 } 665 ierr = PetscFree(rvalues);CHKERRQ(ierr); 666 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 667 ierr = PetscFree(owner);CHKERRQ(ierr); 668 ierr = PetscFree(nprocs);CHKERRQ(ierr); 669 670 /* actually zap the local rows */ 671 /* 672 Zero the required rows. If the "diagonal block" of the matrix 673 is square and the user wishes to set the diagonal we use separate 674 code so that MatSetValues() is not called for each diagonal allocating 675 new memory, thus calling lots of mallocs and slowing things down. 676 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatMultAdd_MPIAIJ" 745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 746 { 747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 752 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 753 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 761 { 762 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 763 PetscErrorCode ierr; 764 PetscTruth merged; 765 766 PetscFunctionBegin; 767 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 768 /* do nondiagonal part */ 769 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 770 if (!merged) { 771 /* send it on its way */ 772 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 /* do local part */ 774 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 775 /* receive remote parts: note this assumes the values are not actually */ 776 /* added in yy until the next line, */ 777 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 } else { 779 /* do local part */ 780 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 781 /* send it on its way */ 782 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 783 /* values actually were received in the Begin() but we need to call this nop */ 784 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 } 786 PetscFunctionReturn(0); 787 } 788 789 EXTERN_C_BEGIN 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 793 { 794 MPI_Comm comm; 795 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 796 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 797 IS Me,Notme; 798 PetscErrorCode ierr; 799 PetscInt M,N,first,last,*notme,i; 800 PetscMPIInt size; 801 802 PetscFunctionBegin; 803 804 /* Easy test: symmetric diagonal block */ 805 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 806 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 807 if (!*f) PetscFunctionReturn(0); 808 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 809 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 810 if (size == 1) PetscFunctionReturn(0); 811 812 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 813 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 814 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 815 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 816 for (i=0; i<first; i++) notme[i] = i; 817 for (i=last; i<M; i++) notme[i-last+first] = i; 818 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 819 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 820 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 821 Aoff = Aoffs[0]; 822 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 823 Boff = Boffs[0]; 824 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 825 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 826 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 827 ierr = ISDestroy(Me);CHKERRQ(ierr); 828 ierr = ISDestroy(Notme);CHKERRQ(ierr); 829 830 PetscFunctionReturn(0); 831 } 832 EXTERN_C_END 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 837 { 838 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 839 PetscErrorCode ierr; 840 841 PetscFunctionBegin; 842 /* do nondiagonal part */ 843 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 844 /* send it on its way */ 845 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 846 /* do local part */ 847 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 848 /* receive remote parts */ 849 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 /* 854 This only works correctly for square matrices where the subblock A->A is the 855 diagonal block 856 */ 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 860 { 861 PetscErrorCode ierr; 862 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 863 864 PetscFunctionBegin; 865 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 866 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 868 } 869 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatScale_MPIAIJ" 875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876 { 877 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_MPIAIJ" 888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_LOG) 895 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896 #endif 897 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 899 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 900 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 901 #if defined (PETSC_USE_CTABLE) 902 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903 #else 904 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905 #endif 906 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 907 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 908 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 909 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 910 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911 ierr = PetscFree(aij);CHKERRQ(ierr); 912 913 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatView_MPIAIJ_Binary" 927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 928 { 929 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 930 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 931 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 932 PetscErrorCode ierr; 933 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 934 int fd; 935 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 936 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 937 PetscScalar *column_values; 938 939 PetscFunctionBegin; 940 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 941 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 942 nz = A->nz + B->nz; 943 if (!rank) { 944 header[0] = MAT_FILE_COOKIE; 945 header[1] = mat->rmap->N; 946 header[2] = mat->cmap->N; 947 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 948 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 949 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 950 /* get largest number of rows any processor has */ 951 rlen = mat->rmap->n; 952 range = mat->rmap->range; 953 for (i=1; i<size; i++) { 954 rlen = PetscMax(rlen,range[i+1] - range[i]); 955 } 956 } else { 957 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 958 rlen = mat->rmap->n; 959 } 960 961 /* load up the local row counts */ 962 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 963 for (i=0; i<mat->rmap->n; i++) { 964 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 965 } 966 967 /* store the row lengths to the file */ 968 if (!rank) { 969 MPI_Status status; 970 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 971 for (i=1; i<size; i++) { 972 rlen = range[i+1] - range[i]; 973 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 974 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 975 } 976 } else { 977 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 978 } 979 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 980 981 /* load up the local column indices */ 982 nzmax = nz; /* )th processor needs space a largest processor needs */ 983 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 984 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 985 cnt = 0; 986 for (i=0; i<mat->rmap->n; i++) { 987 for (j=B->i[i]; j<B->i[i+1]; j++) { 988 if ( (col = garray[B->j[j]]) > cstart) break; 989 column_indices[cnt++] = col; 990 } 991 for (k=A->i[i]; k<A->i[i+1]; k++) { 992 column_indices[cnt++] = A->j[k] + cstart; 993 } 994 for (; j<B->i[i+1]; j++) { 995 column_indices[cnt++] = garray[B->j[j]]; 996 } 997 } 998 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 999 1000 /* store the column indices to the file */ 1001 if (!rank) { 1002 MPI_Status status; 1003 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1004 for (i=1; i<size; i++) { 1005 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1006 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1007 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1008 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1009 } 1010 } else { 1011 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1012 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1013 } 1014 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1015 1016 /* load up the local column values */ 1017 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1018 cnt = 0; 1019 for (i=0; i<mat->rmap->n; i++) { 1020 for (j=B->i[i]; j<B->i[i+1]; j++) { 1021 if ( garray[B->j[j]] > cstart) break; 1022 column_values[cnt++] = B->a[j]; 1023 } 1024 for (k=A->i[i]; k<A->i[i+1]; k++) { 1025 column_values[cnt++] = A->a[k]; 1026 } 1027 for (; j<B->i[i+1]; j++) { 1028 column_values[cnt++] = B->a[j]; 1029 } 1030 } 1031 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1032 1033 /* store the column values to the file */ 1034 if (!rank) { 1035 MPI_Status status; 1036 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1037 for (i=1; i<size; i++) { 1038 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1039 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1040 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1041 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1042 } 1043 } else { 1044 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1045 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1046 } 1047 ierr = PetscFree(column_values);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1054 { 1055 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1056 PetscErrorCode ierr; 1057 PetscMPIInt rank = aij->rank,size = aij->size; 1058 PetscTruth isdraw,iascii,isbinary; 1059 PetscViewer sviewer; 1060 PetscViewerFormat format; 1061 1062 PetscFunctionBegin; 1063 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1065 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1066 if (iascii) { 1067 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1068 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1069 MatInfo info; 1070 PetscTruth inodes; 1071 1072 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1073 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1074 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1075 if (!inodes) { 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1077 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1078 } else { 1079 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1080 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1081 } 1082 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1083 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1084 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1086 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1088 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1091 PetscInt inodecount,inodelimit,*inodes; 1092 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1093 if (inodes) { 1094 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1095 } else { 1096 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1097 } 1098 PetscFunctionReturn(0); 1099 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1100 PetscFunctionReturn(0); 1101 } 1102 } else if (isbinary) { 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1108 } 1109 PetscFunctionReturn(0); 1110 } else if (isdraw) { 1111 PetscDraw draw; 1112 PetscTruth isnull; 1113 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1114 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1115 } 1116 1117 if (size == 1) { 1118 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1119 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1120 } else { 1121 /* assemble the entire matrix onto first processor. */ 1122 Mat A; 1123 Mat_SeqAIJ *Aloc; 1124 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1125 MatScalar *a; 1126 1127 if (mat->rmap->N > 1024) { 1128 PetscTruth flg = PETSC_FALSE; 1129 1130 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1131 if (!flg) { 1132 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1133 } 1134 } 1135 1136 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1137 if (!rank) { 1138 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1139 } else { 1140 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1141 } 1142 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1143 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1144 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1145 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1146 1147 /* copy over the A part */ 1148 Aloc = (Mat_SeqAIJ*)aij->A->data; 1149 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1150 row = mat->rmap->rstart; 1151 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1155 } 1156 aj = Aloc->j; 1157 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1158 1159 /* copy over the B part */ 1160 Aloc = (Mat_SeqAIJ*)aij->B->data; 1161 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1162 row = mat->rmap->rstart; 1163 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1164 ct = cols; 1165 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1166 for (i=0; i<m; i++) { 1167 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1168 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1169 } 1170 ierr = PetscFree(ct);CHKERRQ(ierr); 1171 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 /* 1174 Everyone has to call to draw the matrix since the graphics waits are 1175 synchronized across all processors that share the PetscDraw object 1176 */ 1177 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1180 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1181 } 1182 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 ierr = MatDestroy(A);CHKERRQ(ierr); 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_MPIAIJ" 1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1191 { 1192 PetscErrorCode ierr; 1193 PetscTruth iascii,isdraw,issocket,isbinary; 1194 1195 PetscFunctionBegin; 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1200 if (iascii || isdraw || isbinary || issocket) { 1201 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1202 } else { 1203 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1204 } 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatSOR_MPIAIJ" 1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1211 { 1212 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1213 PetscErrorCode ierr; 1214 Vec bb1 = 0; 1215 PetscTruth hasop; 1216 1217 PetscFunctionBegin; 1218 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1219 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1220 } 1221 1222 if (flag == SOR_APPLY_UPPER) { 1223 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1228 if (flag & SOR_ZERO_INITIAL_GUESS) { 1229 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1230 its--; 1231 } 1232 1233 while (its--) { 1234 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1236 1237 /* update rhs: bb1 = bb - B*x */ 1238 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1239 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1240 1241 /* local sweep */ 1242 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1243 } 1244 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1245 if (flag & SOR_ZERO_INITIAL_GUESS) { 1246 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1247 its--; 1248 } 1249 while (its--) { 1250 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1251 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 1253 /* update rhs: bb1 = bb - B*x */ 1254 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1255 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1256 1257 /* local sweep */ 1258 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1259 } 1260 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1261 if (flag & SOR_ZERO_INITIAL_GUESS) { 1262 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1263 its--; 1264 } 1265 while (its--) { 1266 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 1269 /* update rhs: bb1 = bb - B*x */ 1270 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1271 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1272 1273 /* local sweep */ 1274 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1275 } 1276 } else if (flag & SOR_EISENSTAT) { 1277 Vec xx1; 1278 1279 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1280 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1281 1282 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 if (!mat->diag) { 1285 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1286 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1287 } 1288 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1289 if (hasop) { 1290 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1291 } else { 1292 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1293 } 1294 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1295 1296 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1297 1298 /* local sweep */ 1299 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1300 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1301 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1302 } else { 1303 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1304 } 1305 1306 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatPermute_MPIAIJ" 1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1313 { 1314 MPI_Comm comm,pcomm; 1315 PetscInt first,local_size,nrows; 1316 const PetscInt *rows; 1317 PetscMPIInt size; 1318 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1319 PetscErrorCode ierr; 1320 1321 PetscFunctionBegin; 1322 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1323 /* make a collective version of 'rowp' */ 1324 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1325 if (pcomm==comm) { 1326 crowp = rowp; 1327 } else { 1328 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1329 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1330 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1331 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1332 } 1333 /* collect the global row permutation and invert it */ 1334 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1335 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1336 if (pcomm!=comm) { 1337 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1338 } 1339 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1340 /* get the local target indices */ 1341 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1342 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1343 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1344 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1345 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1346 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1347 /* the column permutation is so much easier; 1348 make a local version of 'colp' and invert it */ 1349 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1350 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1351 if (size==1) { 1352 lcolp = colp; 1353 } else { 1354 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1355 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1357 } 1358 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1359 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1360 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1361 if (size>1) { 1362 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1363 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1364 } 1365 /* now we just get the submatrix */ 1366 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1367 /* clean up */ 1368 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1369 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1376 { 1377 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1378 Mat A = mat->A,B = mat->B; 1379 PetscErrorCode ierr; 1380 PetscReal isend[5],irecv[5]; 1381 1382 PetscFunctionBegin; 1383 info->block_size = 1.0; 1384 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1385 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1386 isend[3] = info->memory; isend[4] = info->mallocs; 1387 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1388 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1389 isend[3] += info->memory; isend[4] += info->mallocs; 1390 if (flag == MAT_LOCAL) { 1391 info->nz_used = isend[0]; 1392 info->nz_allocated = isend[1]; 1393 info->nz_unneeded = isend[2]; 1394 info->memory = isend[3]; 1395 info->mallocs = isend[4]; 1396 } else if (flag == MAT_GLOBAL_MAX) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } else if (flag == MAT_GLOBAL_SUM) { 1404 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1405 info->nz_used = irecv[0]; 1406 info->nz_allocated = irecv[1]; 1407 info->nz_unneeded = irecv[2]; 1408 info->memory = irecv[3]; 1409 info->mallocs = irecv[4]; 1410 } 1411 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1412 info->fill_ratio_needed = 0; 1413 info->factor_mallocs = 0; 1414 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatSetOption_MPIAIJ" 1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1421 { 1422 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1423 PetscErrorCode ierr; 1424 1425 PetscFunctionBegin; 1426 switch (op) { 1427 case MAT_NEW_NONZERO_LOCATIONS: 1428 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1429 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1430 case MAT_KEEP_NONZERO_PATTERN: 1431 case MAT_NEW_NONZERO_LOCATION_ERR: 1432 case MAT_USE_INODES: 1433 case MAT_IGNORE_ZERO_ENTRIES: 1434 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1435 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1436 break; 1437 case MAT_ROW_ORIENTED: 1438 a->roworiented = flg; 1439 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1440 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1441 break; 1442 case MAT_NEW_DIAGONALS: 1443 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1444 break; 1445 case MAT_IGNORE_OFF_PROC_ENTRIES: 1446 a->donotstash = PETSC_TRUE; 1447 break; 1448 case MAT_SYMMETRIC: 1449 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1450 break; 1451 case MAT_STRUCTURALLY_SYMMETRIC: 1452 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1453 break; 1454 case MAT_HERMITIAN: 1455 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1456 break; 1457 case MAT_SYMMETRY_ETERNAL: 1458 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1459 break; 1460 default: 1461 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatGetRow_MPIAIJ" 1468 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1469 { 1470 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1471 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1472 PetscErrorCode ierr; 1473 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1474 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1475 PetscInt *cmap,*idx_p; 1476 1477 PetscFunctionBegin; 1478 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1479 mat->getrowactive = PETSC_TRUE; 1480 1481 if (!mat->rowvalues && (idx || v)) { 1482 /* 1483 allocate enough space to hold information from the longest row. 1484 */ 1485 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1486 PetscInt max = 1,tmp; 1487 for (i=0; i<matin->rmap->n; i++) { 1488 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1489 if (max < tmp) { max = tmp; } 1490 } 1491 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1492 } 1493 1494 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1495 lrow = row - rstart; 1496 1497 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1498 if (!v) {pvA = 0; pvB = 0;} 1499 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1500 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1501 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1502 nztot = nzA + nzB; 1503 1504 cmap = mat->garray; 1505 if (v || idx) { 1506 if (nztot) { 1507 /* Sort by increasing column numbers, assuming A and B already sorted */ 1508 PetscInt imark = -1; 1509 if (v) { 1510 *v = v_p = mat->rowvalues; 1511 for (i=0; i<nzB; i++) { 1512 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1513 else break; 1514 } 1515 imark = i; 1516 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1517 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1518 } 1519 if (idx) { 1520 *idx = idx_p = mat->rowindices; 1521 if (imark > -1) { 1522 for (i=0; i<imark; i++) { 1523 idx_p[i] = cmap[cworkB[i]]; 1524 } 1525 } else { 1526 for (i=0; i<nzB; i++) { 1527 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1528 else break; 1529 } 1530 imark = i; 1531 } 1532 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1533 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1534 } 1535 } else { 1536 if (idx) *idx = 0; 1537 if (v) *v = 0; 1538 } 1539 } 1540 *nz = nztot; 1541 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1542 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1548 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1549 { 1550 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1551 1552 PetscFunctionBegin; 1553 if (!aij->getrowactive) { 1554 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1555 } 1556 aij->getrowactive = PETSC_FALSE; 1557 PetscFunctionReturn(0); 1558 } 1559 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatNorm_MPIAIJ" 1562 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1563 { 1564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1565 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1566 PetscErrorCode ierr; 1567 PetscInt i,j,cstart = mat->cmap->rstart; 1568 PetscReal sum = 0.0; 1569 MatScalar *v; 1570 1571 PetscFunctionBegin; 1572 if (aij->size == 1) { 1573 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1574 } else { 1575 if (type == NORM_FROBENIUS) { 1576 v = amat->a; 1577 for (i=0; i<amat->nz; i++) { 1578 #if defined(PETSC_USE_COMPLEX) 1579 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1580 #else 1581 sum += (*v)*(*v); v++; 1582 #endif 1583 } 1584 v = bmat->a; 1585 for (i=0; i<bmat->nz; i++) { 1586 #if defined(PETSC_USE_COMPLEX) 1587 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1588 #else 1589 sum += (*v)*(*v); v++; 1590 #endif 1591 } 1592 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1593 *norm = sqrt(*norm); 1594 } else if (type == NORM_1) { /* max column norm */ 1595 PetscReal *tmp,*tmp2; 1596 PetscInt *jj,*garray = aij->garray; 1597 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1598 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1599 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1600 *norm = 0.0; 1601 v = amat->a; jj = amat->j; 1602 for (j=0; j<amat->nz; j++) { 1603 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1604 } 1605 v = bmat->a; jj = bmat->j; 1606 for (j=0; j<bmat->nz; j++) { 1607 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1608 } 1609 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1610 for (j=0; j<mat->cmap->N; j++) { 1611 if (tmp2[j] > *norm) *norm = tmp2[j]; 1612 } 1613 ierr = PetscFree(tmp);CHKERRQ(ierr); 1614 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1615 } else if (type == NORM_INFINITY) { /* max row norm */ 1616 PetscReal ntemp = 0.0; 1617 for (j=0; j<aij->A->rmap->n; j++) { 1618 v = amat->a + amat->i[j]; 1619 sum = 0.0; 1620 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1621 sum += PetscAbsScalar(*v); v++; 1622 } 1623 v = bmat->a + bmat->i[j]; 1624 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1625 sum += PetscAbsScalar(*v); v++; 1626 } 1627 if (sum > ntemp) ntemp = sum; 1628 } 1629 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1630 } else { 1631 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1632 } 1633 } 1634 PetscFunctionReturn(0); 1635 } 1636 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "MatTranspose_MPIAIJ" 1639 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1640 { 1641 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1642 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1643 PetscErrorCode ierr; 1644 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1645 PetscInt cstart=A->cmap->rstart,ncol; 1646 Mat B; 1647 MatScalar *array; 1648 1649 PetscFunctionBegin; 1650 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1651 1652 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1653 ai = Aloc->i; aj = Aloc->j; 1654 bi = Bloc->i; bj = Bloc->j; 1655 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1656 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1657 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1658 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1659 for (i=0; i<ai[ma]; i++){ 1660 d_nnz[aj[i]] ++; 1661 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1662 } 1663 1664 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1665 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1666 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1667 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1668 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1669 } else { 1670 B = *matout; 1671 } 1672 1673 /* copy over the A part */ 1674 array = Aloc->a; 1675 row = A->rmap->rstart; 1676 for (i=0; i<ma; i++) { 1677 ncol = ai[i+1]-ai[i]; 1678 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1679 row++; array += ncol; aj += ncol; 1680 } 1681 aj = Aloc->j; 1682 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1683 1684 /* copy over the B part */ 1685 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1686 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1687 array = Bloc->a; 1688 row = A->rmap->rstart; 1689 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1690 cols_tmp = cols; 1691 for (i=0; i<mb; i++) { 1692 ncol = bi[i+1]-bi[i]; 1693 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1694 row++; array += ncol; cols_tmp += ncol; 1695 } 1696 ierr = PetscFree(cols);CHKERRQ(ierr); 1697 1698 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1699 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1701 *matout = B; 1702 } else { 1703 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1710 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1711 { 1712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1713 Mat a = aij->A,b = aij->B; 1714 PetscErrorCode ierr; 1715 PetscInt s1,s2,s3; 1716 1717 PetscFunctionBegin; 1718 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1719 if (rr) { 1720 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1721 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1722 /* Overlap communication with computation. */ 1723 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1724 } 1725 if (ll) { 1726 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1727 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1728 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1729 } 1730 /* scale the diagonal block */ 1731 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1732 1733 if (rr) { 1734 /* Do a scatter end and then right scale the off-diagonal block */ 1735 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1736 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1737 } 1738 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1744 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1745 { 1746 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1747 PetscErrorCode ierr; 1748 1749 PetscFunctionBegin; 1750 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1751 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1752 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1753 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1754 PetscFunctionReturn(0); 1755 } 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1758 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1759 { 1760 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1761 PetscErrorCode ierr; 1762 1763 PetscFunctionBegin; 1764 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1765 PetscFunctionReturn(0); 1766 } 1767 1768 #undef __FUNCT__ 1769 #define __FUNCT__ "MatEqual_MPIAIJ" 1770 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1771 { 1772 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1773 Mat a,b,c,d; 1774 PetscTruth flg; 1775 PetscErrorCode ierr; 1776 1777 PetscFunctionBegin; 1778 a = matA->A; b = matA->B; 1779 c = matB->A; d = matB->B; 1780 1781 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1782 if (flg) { 1783 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1784 } 1785 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1786 PetscFunctionReturn(0); 1787 } 1788 1789 #undef __FUNCT__ 1790 #define __FUNCT__ "MatCopy_MPIAIJ" 1791 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1792 { 1793 PetscErrorCode ierr; 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1795 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1796 1797 PetscFunctionBegin; 1798 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1799 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1800 /* because of the column compression in the off-processor part of the matrix a->B, 1801 the number of columns in a->B and b->B may be different, hence we cannot call 1802 the MatCopy() directly on the two parts. If need be, we can provide a more 1803 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1804 then copying the submatrices */ 1805 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1806 } else { 1807 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1808 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1809 } 1810 PetscFunctionReturn(0); 1811 } 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1815 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1816 { 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 #include "petscblaslapack.h" 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatAXPY_MPIAIJ" 1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1828 { 1829 PetscErrorCode ierr; 1830 PetscInt i; 1831 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1832 PetscBLASInt bnz,one=1; 1833 Mat_SeqAIJ *x,*y; 1834 1835 PetscFunctionBegin; 1836 if (str == SAME_NONZERO_PATTERN) { 1837 PetscScalar alpha = a; 1838 x = (Mat_SeqAIJ *)xx->A->data; 1839 y = (Mat_SeqAIJ *)yy->A->data; 1840 bnz = PetscBLASIntCast(x->nz); 1841 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1842 x = (Mat_SeqAIJ *)xx->B->data; 1843 y = (Mat_SeqAIJ *)yy->B->data; 1844 bnz = PetscBLASIntCast(x->nz); 1845 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1846 } else if (str == SUBSET_NONZERO_PATTERN) { 1847 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1848 1849 x = (Mat_SeqAIJ *)xx->B->data; 1850 y = (Mat_SeqAIJ *)yy->B->data; 1851 if (y->xtoy && y->XtoY != xx->B) { 1852 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1853 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1854 } 1855 if (!y->xtoy) { /* get xtoy */ 1856 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1857 y->XtoY = xx->B; 1858 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1859 } 1860 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1861 } else { 1862 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1863 } 1864 PetscFunctionReturn(0); 1865 } 1866 1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatConjugate_MPIAIJ" 1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1872 { 1873 #if defined(PETSC_USE_COMPLEX) 1874 PetscErrorCode ierr; 1875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1876 1877 PetscFunctionBegin; 1878 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1879 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1880 #else 1881 PetscFunctionBegin; 1882 #endif 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatRealPart_MPIAIJ" 1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1889 { 1890 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1891 PetscErrorCode ierr; 1892 1893 PetscFunctionBegin; 1894 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1895 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1902 { 1903 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1908 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #ifdef PETSC_HAVE_PBGL 1913 1914 #include <boost/parallel/mpi/bsp_process_group.hpp> 1915 #include <boost/graph/distributed/ilu_default_graph.hpp> 1916 #include <boost/graph/distributed/ilu_0_block.hpp> 1917 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1918 #include <boost/graph/distributed/petsc/interface.hpp> 1919 #include <boost/multi_array.hpp> 1920 #include <boost/parallel/distributed_property_map->hpp> 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1924 /* 1925 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1926 */ 1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1928 { 1929 namespace petsc = boost::distributed::petsc; 1930 1931 namespace graph_dist = boost::graph::distributed; 1932 using boost::graph::distributed::ilu_default::process_group_type; 1933 using boost::graph::ilu_permuted; 1934 1935 PetscTruth row_identity, col_identity; 1936 PetscContainer c; 1937 PetscInt m, n, M, N; 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1942 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1943 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1944 if (!row_identity || !col_identity) { 1945 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1946 } 1947 1948 process_group_type pg; 1949 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1950 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1951 lgraph_type& level_graph = *lgraph_p; 1952 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1953 1954 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1955 ilu_permuted(level_graph); 1956 1957 /* put together the new matrix */ 1958 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1959 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1960 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1961 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1962 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1963 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1965 1966 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1967 ierr = PetscContainerSetPointer(c, lgraph_p); 1968 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1975 { 1976 PetscFunctionBegin; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatSolve_MPIAIJ" 1982 /* 1983 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1984 */ 1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1986 { 1987 namespace graph_dist = boost::graph::distributed; 1988 1989 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1990 lgraph_type* lgraph_p; 1991 PetscContainer c; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1996 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1997 ierr = VecCopy(b, x);CHKERRQ(ierr); 1998 1999 PetscScalar* array_x; 2000 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2001 PetscInt sx; 2002 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2003 2004 PetscScalar* array_b; 2005 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2006 PetscInt sb; 2007 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2008 2009 lgraph_type& level_graph = *lgraph_p; 2010 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2011 2012 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2013 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2014 ref_x(array_x, boost::extents[num_vertices(graph)]); 2015 2016 typedef boost::iterator_property_map<array_ref_type::iterator, 2017 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2018 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2019 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2020 2021 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2022 2023 PetscFunctionReturn(0); 2024 } 2025 #endif 2026 2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2028 PetscInt nzlocal,nsends,nrecvs; 2029 PetscMPIInt *send_rank,*recv_rank; 2030 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2031 PetscScalar *sbuf_a,**rbuf_a; 2032 PetscErrorCode (*MatDestroy)(Mat); 2033 } Mat_Redundant; 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2038 { 2039 PetscErrorCode ierr; 2040 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2041 PetscInt i; 2042 2043 PetscFunctionBegin; 2044 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2045 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2046 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2047 for (i=0; i<redund->nrecvs; i++){ 2048 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2049 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2050 } 2051 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2052 ierr = PetscFree(redund);CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatDestroy_MatRedundant" 2058 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2059 { 2060 PetscErrorCode ierr; 2061 PetscContainer container; 2062 Mat_Redundant *redund=PETSC_NULL; 2063 2064 PetscFunctionBegin; 2065 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2066 if (container) { 2067 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2068 } else { 2069 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2070 } 2071 A->ops->destroy = redund->MatDestroy; 2072 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2073 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2074 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2081 { 2082 PetscMPIInt rank,size; 2083 MPI_Comm comm=((PetscObject)mat)->comm; 2084 PetscErrorCode ierr; 2085 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2086 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2087 PetscInt *rowrange=mat->rmap->range; 2088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2089 Mat A=aij->A,B=aij->B,C=*matredundant; 2090 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2091 PetscScalar *sbuf_a; 2092 PetscInt nzlocal=a->nz+b->nz; 2093 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2094 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2095 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2096 MatScalar *aworkA,*aworkB; 2097 PetscScalar *vals; 2098 PetscMPIInt tag1,tag2,tag3,imdex; 2099 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2100 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2101 MPI_Status recv_status,*send_status; 2102 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2103 PetscInt **rbuf_j=PETSC_NULL; 2104 PetscScalar **rbuf_a=PETSC_NULL; 2105 Mat_Redundant *redund=PETSC_NULL; 2106 PetscContainer container; 2107 2108 PetscFunctionBegin; 2109 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2110 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2111 2112 if (reuse == MAT_REUSE_MATRIX) { 2113 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2114 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2115 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2116 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2117 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2118 if (container) { 2119 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2120 } else { 2121 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2122 } 2123 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2124 2125 nsends = redund->nsends; 2126 nrecvs = redund->nrecvs; 2127 send_rank = redund->send_rank; 2128 recv_rank = redund->recv_rank; 2129 sbuf_nz = redund->sbuf_nz; 2130 rbuf_nz = redund->rbuf_nz; 2131 sbuf_j = redund->sbuf_j; 2132 sbuf_a = redund->sbuf_a; 2133 rbuf_j = redund->rbuf_j; 2134 rbuf_a = redund->rbuf_a; 2135 } 2136 2137 if (reuse == MAT_INITIAL_MATRIX){ 2138 PetscMPIInt subrank,subsize; 2139 PetscInt nleftover,np_subcomm; 2140 /* get the destination processors' id send_rank, nsends and nrecvs */ 2141 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2142 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2143 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2144 np_subcomm = size/nsubcomm; 2145 nleftover = size - nsubcomm*np_subcomm; 2146 nsends = 0; nrecvs = 0; 2147 for (i=0; i<size; i++){ /* i=rank*/ 2148 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2149 send_rank[nsends] = i; nsends++; 2150 recv_rank[nrecvs++] = i; 2151 } 2152 } 2153 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2154 i = size-nleftover-1; 2155 j = 0; 2156 while (j < nsubcomm - nleftover){ 2157 send_rank[nsends++] = i; 2158 i--; j++; 2159 } 2160 } 2161 2162 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2163 for (i=0; i<nleftover; i++){ 2164 recv_rank[nrecvs++] = size-nleftover+i; 2165 } 2166 } 2167 2168 /* allocate sbuf_j, sbuf_a */ 2169 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2170 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2171 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2172 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2173 2174 /* copy mat's local entries into the buffers */ 2175 if (reuse == MAT_INITIAL_MATRIX){ 2176 rownz_max = 0; 2177 rptr = sbuf_j; 2178 cols = sbuf_j + rend-rstart + 1; 2179 vals = sbuf_a; 2180 rptr[0] = 0; 2181 for (i=0; i<rend-rstart; i++){ 2182 row = i + rstart; 2183 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2184 ncols = nzA + nzB; 2185 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2186 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2187 /* load the column indices for this row into cols */ 2188 lwrite = 0; 2189 for (l=0; l<nzB; l++) { 2190 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2191 vals[lwrite] = aworkB[l]; 2192 cols[lwrite++] = ctmp; 2193 } 2194 } 2195 for (l=0; l<nzA; l++){ 2196 vals[lwrite] = aworkA[l]; 2197 cols[lwrite++] = cstart + cworkA[l]; 2198 } 2199 for (l=0; l<nzB; l++) { 2200 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2201 vals[lwrite] = aworkB[l]; 2202 cols[lwrite++] = ctmp; 2203 } 2204 } 2205 vals += ncols; 2206 cols += ncols; 2207 rptr[i+1] = rptr[i] + ncols; 2208 if (rownz_max < ncols) rownz_max = ncols; 2209 } 2210 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2211 } else { /* only copy matrix values into sbuf_a */ 2212 rptr = sbuf_j; 2213 vals = sbuf_a; 2214 rptr[0] = 0; 2215 for (i=0; i<rend-rstart; i++){ 2216 row = i + rstart; 2217 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2218 ncols = nzA + nzB; 2219 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2220 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2221 lwrite = 0; 2222 for (l=0; l<nzB; l++) { 2223 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2224 } 2225 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2226 for (l=0; l<nzB; l++) { 2227 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2228 } 2229 vals += ncols; 2230 rptr[i+1] = rptr[i] + ncols; 2231 } 2232 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2233 2234 /* send nzlocal to others, and recv other's nzlocal */ 2235 /*--------------------------------------------------*/ 2236 if (reuse == MAT_INITIAL_MATRIX){ 2237 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2238 s_waits2 = s_waits3 + nsends; 2239 s_waits1 = s_waits2 + nsends; 2240 r_waits1 = s_waits1 + nsends; 2241 r_waits2 = r_waits1 + nrecvs; 2242 r_waits3 = r_waits2 + nrecvs; 2243 } else { 2244 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2245 r_waits3 = s_waits3 + nsends; 2246 } 2247 2248 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2249 if (reuse == MAT_INITIAL_MATRIX){ 2250 /* get new tags to keep the communication clean */ 2251 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2252 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2253 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2254 2255 /* post receives of other's nzlocal */ 2256 for (i=0; i<nrecvs; i++){ 2257 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2258 } 2259 /* send nzlocal to others */ 2260 for (i=0; i<nsends; i++){ 2261 sbuf_nz[i] = nzlocal; 2262 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2263 } 2264 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2265 count = nrecvs; 2266 while (count) { 2267 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2268 recv_rank[imdex] = recv_status.MPI_SOURCE; 2269 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2270 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2271 2272 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2273 rbuf_nz[imdex] += i + 2; 2274 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2275 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2276 count--; 2277 } 2278 /* wait on sends of nzlocal */ 2279 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2280 /* send mat->i,j to others, and recv from other's */ 2281 /*------------------------------------------------*/ 2282 for (i=0; i<nsends; i++){ 2283 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2284 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2285 } 2286 /* wait on receives of mat->i,j */ 2287 /*------------------------------*/ 2288 count = nrecvs; 2289 while (count) { 2290 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2291 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2292 count--; 2293 } 2294 /* wait on sends of mat->i,j */ 2295 /*---------------------------*/ 2296 if (nsends) { 2297 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2298 } 2299 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2300 2301 /* post receives, send and receive mat->a */ 2302 /*----------------------------------------*/ 2303 for (imdex=0; imdex<nrecvs; imdex++) { 2304 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2305 } 2306 for (i=0; i<nsends; i++){ 2307 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2308 } 2309 count = nrecvs; 2310 while (count) { 2311 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2312 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2313 count--; 2314 } 2315 if (nsends) { 2316 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2317 } 2318 2319 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2320 2321 /* create redundant matrix */ 2322 /*-------------------------*/ 2323 if (reuse == MAT_INITIAL_MATRIX){ 2324 /* compute rownz_max for preallocation */ 2325 for (imdex=0; imdex<nrecvs; imdex++){ 2326 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2327 rptr = rbuf_j[imdex]; 2328 for (i=0; i<j; i++){ 2329 ncols = rptr[i+1] - rptr[i]; 2330 if (rownz_max < ncols) rownz_max = ncols; 2331 } 2332 } 2333 2334 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2335 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2336 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2337 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2338 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2339 } else { 2340 C = *matredundant; 2341 } 2342 2343 /* insert local matrix entries */ 2344 rptr = sbuf_j; 2345 cols = sbuf_j + rend-rstart + 1; 2346 vals = sbuf_a; 2347 for (i=0; i<rend-rstart; i++){ 2348 row = i + rstart; 2349 ncols = rptr[i+1] - rptr[i]; 2350 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2351 vals += ncols; 2352 cols += ncols; 2353 } 2354 /* insert received matrix entries */ 2355 for (imdex=0; imdex<nrecvs; imdex++){ 2356 rstart = rowrange[recv_rank[imdex]]; 2357 rend = rowrange[recv_rank[imdex]+1]; 2358 rptr = rbuf_j[imdex]; 2359 cols = rbuf_j[imdex] + rend-rstart + 1; 2360 vals = rbuf_a[imdex]; 2361 for (i=0; i<rend-rstart; i++){ 2362 row = i + rstart; 2363 ncols = rptr[i+1] - rptr[i]; 2364 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2365 vals += ncols; 2366 cols += ncols; 2367 } 2368 } 2369 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2370 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2371 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2372 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2373 if (reuse == MAT_INITIAL_MATRIX){ 2374 PetscContainer container; 2375 *matredundant = C; 2376 /* create a supporting struct and attach it to C for reuse */ 2377 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2378 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2379 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2380 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2381 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2382 2383 redund->nzlocal = nzlocal; 2384 redund->nsends = nsends; 2385 redund->nrecvs = nrecvs; 2386 redund->send_rank = send_rank; 2387 redund->recv_rank = recv_rank; 2388 redund->sbuf_nz = sbuf_nz; 2389 redund->rbuf_nz = rbuf_nz; 2390 redund->sbuf_j = sbuf_j; 2391 redund->sbuf_a = sbuf_a; 2392 redund->rbuf_j = rbuf_j; 2393 redund->rbuf_a = rbuf_a; 2394 2395 redund->MatDestroy = C->ops->destroy; 2396 C->ops->destroy = MatDestroy_MatRedundant; 2397 } 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2404 { 2405 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2406 PetscErrorCode ierr; 2407 PetscInt i,*idxb = 0; 2408 PetscScalar *va,*vb; 2409 Vec vtmp; 2410 2411 PetscFunctionBegin; 2412 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2413 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2414 if (idx) { 2415 for (i=0; i<A->rmap->n; i++) { 2416 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2417 } 2418 } 2419 2420 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2421 if (idx) { 2422 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2423 } 2424 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2425 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2426 2427 for (i=0; i<A->rmap->n; i++){ 2428 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2429 va[i] = vb[i]; 2430 if (idx) idx[i] = a->garray[idxb[i]]; 2431 } 2432 } 2433 2434 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2435 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2436 if (idxb) { 2437 ierr = PetscFree(idxb);CHKERRQ(ierr); 2438 } 2439 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2446 { 2447 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2448 PetscErrorCode ierr; 2449 PetscInt i,*idxb = 0; 2450 PetscScalar *va,*vb; 2451 Vec vtmp; 2452 2453 PetscFunctionBegin; 2454 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2455 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2456 if (idx) { 2457 for (i=0; i<A->cmap->n; i++) { 2458 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2459 } 2460 } 2461 2462 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2463 if (idx) { 2464 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2465 } 2466 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2467 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2468 2469 for (i=0; i<A->rmap->n; i++){ 2470 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2471 va[i] = vb[i]; 2472 if (idx) idx[i] = a->garray[idxb[i]]; 2473 } 2474 } 2475 2476 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2477 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2478 if (idxb) { 2479 ierr = PetscFree(idxb);CHKERRQ(ierr); 2480 } 2481 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2488 { 2489 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2490 PetscInt n = A->rmap->n; 2491 PetscInt cstart = A->cmap->rstart; 2492 PetscInt *cmap = mat->garray; 2493 PetscInt *diagIdx, *offdiagIdx; 2494 Vec diagV, offdiagV; 2495 PetscScalar *a, *diagA, *offdiagA; 2496 PetscInt r; 2497 PetscErrorCode ierr; 2498 2499 PetscFunctionBegin; 2500 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2501 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2502 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2503 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2504 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2505 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2506 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2507 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2508 for(r = 0; r < n; ++r) { 2509 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2510 a[r] = diagA[r]; 2511 idx[r] = cstart + diagIdx[r]; 2512 } else { 2513 a[r] = offdiagA[r]; 2514 idx[r] = cmap[offdiagIdx[r]]; 2515 } 2516 } 2517 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2518 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2519 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2520 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2521 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2522 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2529 { 2530 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2531 PetscInt n = A->rmap->n; 2532 PetscInt cstart = A->cmap->rstart; 2533 PetscInt *cmap = mat->garray; 2534 PetscInt *diagIdx, *offdiagIdx; 2535 Vec diagV, offdiagV; 2536 PetscScalar *a, *diagA, *offdiagA; 2537 PetscInt r; 2538 PetscErrorCode ierr; 2539 2540 PetscFunctionBegin; 2541 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2542 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2543 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2544 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2545 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2546 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2547 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2548 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2549 for(r = 0; r < n; ++r) { 2550 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2551 a[r] = diagA[r]; 2552 idx[r] = cstart + diagIdx[r]; 2553 } else { 2554 a[r] = offdiagA[r]; 2555 idx[r] = cmap[offdiagIdx[r]]; 2556 } 2557 } 2558 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2559 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2560 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2561 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2562 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2563 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2570 { 2571 PetscErrorCode ierr; 2572 Mat *dummy; 2573 2574 PetscFunctionBegin; 2575 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2576 *newmat = *dummy; 2577 ierr = PetscFree(dummy);CHKERRQ(ierr); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2582 /* -------------------------------------------------------------------*/ 2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2584 MatGetRow_MPIAIJ, 2585 MatRestoreRow_MPIAIJ, 2586 MatMult_MPIAIJ, 2587 /* 4*/ MatMultAdd_MPIAIJ, 2588 MatMultTranspose_MPIAIJ, 2589 MatMultTransposeAdd_MPIAIJ, 2590 #ifdef PETSC_HAVE_PBGL 2591 MatSolve_MPIAIJ, 2592 #else 2593 0, 2594 #endif 2595 0, 2596 0, 2597 /*10*/ 0, 2598 0, 2599 0, 2600 MatSOR_MPIAIJ, 2601 MatTranspose_MPIAIJ, 2602 /*15*/ MatGetInfo_MPIAIJ, 2603 MatEqual_MPIAIJ, 2604 MatGetDiagonal_MPIAIJ, 2605 MatDiagonalScale_MPIAIJ, 2606 MatNorm_MPIAIJ, 2607 /*20*/ MatAssemblyBegin_MPIAIJ, 2608 MatAssemblyEnd_MPIAIJ, 2609 MatSetOption_MPIAIJ, 2610 MatZeroEntries_MPIAIJ, 2611 /*24*/ MatZeroRows_MPIAIJ, 2612 0, 2613 #ifdef PETSC_HAVE_PBGL 2614 0, 2615 #else 2616 0, 2617 #endif 2618 0, 2619 0, 2620 /*29*/ MatSetUpPreallocation_MPIAIJ, 2621 #ifdef PETSC_HAVE_PBGL 2622 0, 2623 #else 2624 0, 2625 #endif 2626 0, 2627 0, 2628 0, 2629 /*34*/ MatDuplicate_MPIAIJ, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*39*/ MatAXPY_MPIAIJ, 2635 MatGetSubMatrices_MPIAIJ, 2636 MatIncreaseOverlap_MPIAIJ, 2637 MatGetValues_MPIAIJ, 2638 MatCopy_MPIAIJ, 2639 /*44*/ MatGetRowMax_MPIAIJ, 2640 MatScale_MPIAIJ, 2641 0, 2642 0, 2643 0, 2644 /*49*/ MatSetBlockSize_MPIAIJ, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*54*/ MatFDColoringCreate_MPIAIJ, 2650 0, 2651 MatSetUnfactored_MPIAIJ, 2652 MatPermute_MPIAIJ, 2653 0, 2654 /*59*/ MatGetSubMatrix_MPIAIJ, 2655 MatDestroy_MPIAIJ, 2656 MatView_MPIAIJ, 2657 0, 2658 0, 2659 /*64*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2665 MatGetRowMinAbs_MPIAIJ, 2666 0, 2667 MatSetColoring_MPIAIJ, 2668 #if defined(PETSC_HAVE_ADIC) 2669 MatSetValuesAdic_MPIAIJ, 2670 #else 2671 0, 2672 #endif 2673 MatSetValuesAdifor_MPIAIJ, 2674 /*75*/ MatFDColoringApply_AIJ, 2675 0, 2676 0, 2677 0, 2678 0, 2679 /*80*/ 0, 2680 0, 2681 0, 2682 /*83*/ MatLoad_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2689 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2690 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2691 MatPtAP_Basic, 2692 MatPtAPSymbolic_MPIAIJ, 2693 /*94*/ MatPtAPNumeric_MPIAIJ, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /*99*/ 0, 2699 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2700 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2701 MatConjugate_MPIAIJ, 2702 0, 2703 /*104*/MatSetValuesRow_MPIAIJ, 2704 MatRealPart_MPIAIJ, 2705 MatImaginaryPart_MPIAIJ, 2706 0, 2707 0, 2708 /*109*/0, 2709 MatGetRedundantMatrix_MPIAIJ, 2710 MatGetRowMin_MPIAIJ, 2711 0, 2712 0, 2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2714 0, 2715 0, 2716 0, 2717 0, 2718 0 2719 }; 2720 2721 /* ----------------------------------------------------------------------------------------*/ 2722 2723 EXTERN_C_BEGIN 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2726 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2727 { 2728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2729 PetscErrorCode ierr; 2730 2731 PetscFunctionBegin; 2732 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2733 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2734 PetscFunctionReturn(0); 2735 } 2736 EXTERN_C_END 2737 2738 EXTERN_C_BEGIN 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2741 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2742 { 2743 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2748 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751 EXTERN_C_END 2752 2753 #include "petscpc.h" 2754 EXTERN_C_BEGIN 2755 #undef __FUNCT__ 2756 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2757 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2758 { 2759 Mat_MPIAIJ *b; 2760 PetscErrorCode ierr; 2761 PetscInt i; 2762 2763 PetscFunctionBegin; 2764 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2765 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2766 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2767 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2768 2769 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2770 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2771 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2772 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2773 if (d_nnz) { 2774 for (i=0; i<B->rmap->n; i++) { 2775 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2776 } 2777 } 2778 if (o_nnz) { 2779 for (i=0; i<B->rmap->n; i++) { 2780 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2781 } 2782 } 2783 b = (Mat_MPIAIJ*)B->data; 2784 2785 if (!B->preallocated) { 2786 /* Explicitly create 2 MATSEQAIJ matrices. */ 2787 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2788 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2789 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2791 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2792 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2793 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2794 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2795 } 2796 2797 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2798 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2799 B->preallocated = PETSC_TRUE; 2800 PetscFunctionReturn(0); 2801 } 2802 EXTERN_C_END 2803 2804 #undef __FUNCT__ 2805 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2806 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2807 { 2808 Mat mat; 2809 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2810 PetscErrorCode ierr; 2811 2812 PetscFunctionBegin; 2813 *newmat = 0; 2814 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2815 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2816 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2817 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2818 a = (Mat_MPIAIJ*)mat->data; 2819 2820 mat->factor = matin->factor; 2821 mat->rmap->bs = matin->rmap->bs; 2822 mat->assembled = PETSC_TRUE; 2823 mat->insertmode = NOT_SET_VALUES; 2824 mat->preallocated = PETSC_TRUE; 2825 2826 a->size = oldmat->size; 2827 a->rank = oldmat->rank; 2828 a->donotstash = oldmat->donotstash; 2829 a->roworiented = oldmat->roworiented; 2830 a->rowindices = 0; 2831 a->rowvalues = 0; 2832 a->getrowactive = PETSC_FALSE; 2833 2834 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2835 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2836 2837 if (oldmat->colmap) { 2838 #if defined (PETSC_USE_CTABLE) 2839 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2840 #else 2841 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2842 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2843 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2844 #endif 2845 } else a->colmap = 0; 2846 if (oldmat->garray) { 2847 PetscInt len; 2848 len = oldmat->B->cmap->n; 2849 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2850 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2851 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2852 } else a->garray = 0; 2853 2854 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2855 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2856 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2857 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2858 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2859 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2860 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2861 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2862 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2863 *newmat = mat; 2864 PetscFunctionReturn(0); 2865 } 2866 2867 #undef __FUNCT__ 2868 #define __FUNCT__ "MatLoad_MPIAIJ" 2869 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2870 { 2871 Mat A; 2872 PetscScalar *vals,*svals; 2873 MPI_Comm comm = ((PetscObject)viewer)->comm; 2874 MPI_Status status; 2875 PetscErrorCode ierr; 2876 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2877 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2878 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2879 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2880 PetscInt cend,cstart,n,*rowners; 2881 int fd; 2882 2883 PetscFunctionBegin; 2884 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2885 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2886 if (!rank) { 2887 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2888 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2889 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2890 } 2891 2892 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2893 M = header[1]; N = header[2]; 2894 /* determine ownership of all rows */ 2895 m = M/size + ((M % size) > rank); 2896 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2897 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2898 2899 /* First process needs enough room for process with most rows */ 2900 if (!rank) { 2901 mmax = rowners[1]; 2902 for (i=2; i<size; i++) { 2903 mmax = PetscMax(mmax,rowners[i]); 2904 } 2905 } else mmax = m; 2906 2907 rowners[0] = 0; 2908 for (i=2; i<=size; i++) { 2909 rowners[i] += rowners[i-1]; 2910 } 2911 rstart = rowners[rank]; 2912 rend = rowners[rank+1]; 2913 2914 /* distribute row lengths to all processors */ 2915 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2916 if (!rank) { 2917 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2918 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2919 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2920 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2921 for (j=0; j<m; j++) { 2922 procsnz[0] += ourlens[j]; 2923 } 2924 for (i=1; i<size; i++) { 2925 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2926 /* calculate the number of nonzeros on each processor */ 2927 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2928 procsnz[i] += rowlengths[j]; 2929 } 2930 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2931 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2932 } 2933 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2934 } else { 2935 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2936 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2937 } 2938 2939 if (!rank) { 2940 /* determine max buffer needed and allocate it */ 2941 maxnz = 0; 2942 for (i=0; i<size; i++) { 2943 maxnz = PetscMax(maxnz,procsnz[i]); 2944 } 2945 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2946 2947 /* read in my part of the matrix column indices */ 2948 nz = procsnz[0]; 2949 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2950 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2951 2952 /* read in every one elses and ship off */ 2953 for (i=1; i<size; i++) { 2954 nz = procsnz[i]; 2955 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2956 mpicnt = PetscMPIIntCast(nz); 2957 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2958 } 2959 ierr = PetscFree(cols);CHKERRQ(ierr); 2960 } else { 2961 /* determine buffer space needed for message */ 2962 nz = 0; 2963 for (i=0; i<m; i++) { 2964 nz += ourlens[i]; 2965 } 2966 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2967 2968 /* receive message of column indices*/ 2969 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2970 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2971 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2972 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2973 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2974 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2975 } 2976 2977 /* determine column ownership if matrix is not square */ 2978 if (N != M) { 2979 n = N/size + ((N % size) > rank); 2980 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2981 cstart = cend - n; 2982 } else { 2983 cstart = rstart; 2984 cend = rend; 2985 n = cend - cstart; 2986 } 2987 2988 /* loop over local rows, determining number of off diagonal entries */ 2989 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2990 jj = 0; 2991 for (i=0; i<m; i++) { 2992 for (j=0; j<ourlens[i]; j++) { 2993 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2994 jj++; 2995 } 2996 } 2997 2998 /* create our matrix */ 2999 for (i=0; i<m; i++) { 3000 ourlens[i] -= offlens[i]; 3001 } 3002 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3003 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 3004 ierr = MatSetType(A,type);CHKERRQ(ierr); 3005 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3006 3007 for (i=0; i<m; i++) { 3008 ourlens[i] += offlens[i]; 3009 } 3010 3011 if (!rank) { 3012 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3013 3014 /* read in my part of the matrix numerical values */ 3015 nz = procsnz[0]; 3016 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3017 3018 /* insert into matrix */ 3019 jj = rstart; 3020 smycols = mycols; 3021 svals = vals; 3022 for (i=0; i<m; i++) { 3023 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3024 smycols += ourlens[i]; 3025 svals += ourlens[i]; 3026 jj++; 3027 } 3028 3029 /* read in other processors and ship out */ 3030 for (i=1; i<size; i++) { 3031 nz = procsnz[i]; 3032 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3033 mpicnt = PetscMPIIntCast(nz); 3034 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3035 } 3036 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3037 } else { 3038 /* receive numeric values */ 3039 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3040 3041 /* receive message of values*/ 3042 mpicnt = PetscMPIIntCast(nz); 3043 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3044 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3045 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3046 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3047 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3048 3049 /* insert into matrix */ 3050 jj = rstart; 3051 smycols = mycols; 3052 svals = vals; 3053 for (i=0; i<m; i++) { 3054 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3055 smycols += ourlens[i]; 3056 svals += ourlens[i]; 3057 jj++; 3058 } 3059 } 3060 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3061 ierr = PetscFree(vals);CHKERRQ(ierr); 3062 ierr = PetscFree(mycols);CHKERRQ(ierr); 3063 ierr = PetscFree(rowners);CHKERRQ(ierr); 3064 3065 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3066 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3067 *newmat = A; 3068 PetscFunctionReturn(0); 3069 } 3070 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3073 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3074 { 3075 PetscErrorCode ierr; 3076 IS iscol_local; 3077 PetscInt csize; 3078 3079 PetscFunctionBegin; 3080 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3081 if (call == MAT_REUSE_MATRIX) { 3082 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3083 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3084 } else { 3085 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3086 } 3087 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3088 if (call == MAT_INITIAL_MATRIX) { 3089 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3090 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3091 } 3092 PetscFunctionReturn(0); 3093 } 3094 3095 #undef __FUNCT__ 3096 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3097 /* 3098 Not great since it makes two copies of the submatrix, first an SeqAIJ 3099 in local and then by concatenating the local matrices the end result. 3100 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3101 3102 Note: This requires a sequential iscol with all indices. 3103 */ 3104 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3105 { 3106 PetscErrorCode ierr; 3107 PetscMPIInt rank,size; 3108 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3109 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3110 Mat *local,M,Mreuse; 3111 MatScalar *vwork,*aa; 3112 MPI_Comm comm = ((PetscObject)mat)->comm; 3113 Mat_SeqAIJ *aij; 3114 3115 3116 PetscFunctionBegin; 3117 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3118 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3119 3120 if (call == MAT_REUSE_MATRIX) { 3121 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3122 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3123 local = &Mreuse; 3124 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3125 } else { 3126 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3127 Mreuse = *local; 3128 ierr = PetscFree(local);CHKERRQ(ierr); 3129 } 3130 3131 /* 3132 m - number of local rows 3133 n - number of columns (same on all processors) 3134 rstart - first row in new global matrix generated 3135 */ 3136 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3137 if (call == MAT_INITIAL_MATRIX) { 3138 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3139 ii = aij->i; 3140 jj = aij->j; 3141 3142 /* 3143 Determine the number of non-zeros in the diagonal and off-diagonal 3144 portions of the matrix in order to do correct preallocation 3145 */ 3146 3147 /* first get start and end of "diagonal" columns */ 3148 if (csize == PETSC_DECIDE) { 3149 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3150 if (mglobal == n) { /* square matrix */ 3151 nlocal = m; 3152 } else { 3153 nlocal = n/size + ((n % size) > rank); 3154 } 3155 } else { 3156 nlocal = csize; 3157 } 3158 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3159 rstart = rend - nlocal; 3160 if (rank == size - 1 && rend != n) { 3161 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3162 } 3163 3164 /* next, compute all the lengths */ 3165 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3166 olens = dlens + m; 3167 for (i=0; i<m; i++) { 3168 jend = ii[i+1] - ii[i]; 3169 olen = 0; 3170 dlen = 0; 3171 for (j=0; j<jend; j++) { 3172 if (*jj < rstart || *jj >= rend) olen++; 3173 else dlen++; 3174 jj++; 3175 } 3176 olens[i] = olen; 3177 dlens[i] = dlen; 3178 } 3179 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3180 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3181 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3182 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3183 ierr = PetscFree(dlens);CHKERRQ(ierr); 3184 } else { 3185 PetscInt ml,nl; 3186 3187 M = *newmat; 3188 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3189 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3190 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3191 /* 3192 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3193 rather than the slower MatSetValues(). 3194 */ 3195 M->was_assembled = PETSC_TRUE; 3196 M->assembled = PETSC_FALSE; 3197 } 3198 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3199 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3200 ii = aij->i; 3201 jj = aij->j; 3202 aa = aij->a; 3203 for (i=0; i<m; i++) { 3204 row = rstart + i; 3205 nz = ii[i+1] - ii[i]; 3206 cwork = jj; jj += nz; 3207 vwork = aa; aa += nz; 3208 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3209 } 3210 3211 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3212 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3213 *newmat = M; 3214 3215 /* save submatrix used in processor for next request */ 3216 if (call == MAT_INITIAL_MATRIX) { 3217 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3218 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3219 } 3220 3221 PetscFunctionReturn(0); 3222 } 3223 3224 EXTERN_C_BEGIN 3225 #undef __FUNCT__ 3226 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3227 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3228 { 3229 PetscInt m,cstart, cend,j,nnz,i,d; 3230 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3231 const PetscInt *JJ; 3232 PetscScalar *values; 3233 PetscErrorCode ierr; 3234 3235 PetscFunctionBegin; 3236 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3237 3238 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3239 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3240 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3241 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3242 m = B->rmap->n; 3243 cstart = B->cmap->rstart; 3244 cend = B->cmap->rend; 3245 rstart = B->rmap->rstart; 3246 3247 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3248 3249 #if defined(PETSC_USE_DEBUGGING) 3250 for (i=0; i<m; i++) { 3251 nnz = Ii[i+1]- Ii[i]; 3252 JJ = J + Ii[i]; 3253 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3254 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3255 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3256 } 3257 #endif 3258 3259 for (i=0; i<m; i++) { 3260 nnz = Ii[i+1]- Ii[i]; 3261 JJ = J + Ii[i]; 3262 nnz_max = PetscMax(nnz_max,nnz); 3263 d = 0; 3264 for (j=0; j<nnz; j++) { 3265 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3266 } 3267 d_nnz[i] = d; 3268 o_nnz[i] = nnz - d; 3269 } 3270 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3271 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3272 3273 if (v) values = (PetscScalar*)v; 3274 else { 3275 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3276 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3277 } 3278 3279 for (i=0; i<m; i++) { 3280 ii = i + rstart; 3281 nnz = Ii[i+1]- Ii[i]; 3282 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3283 } 3284 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3285 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3286 3287 if (!v) { 3288 ierr = PetscFree(values);CHKERRQ(ierr); 3289 } 3290 PetscFunctionReturn(0); 3291 } 3292 EXTERN_C_END 3293 3294 #undef __FUNCT__ 3295 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3296 /*@ 3297 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3298 (the default parallel PETSc format). 3299 3300 Collective on MPI_Comm 3301 3302 Input Parameters: 3303 + B - the matrix 3304 . i - the indices into j for the start of each local row (starts with zero) 3305 . j - the column indices for each local row (starts with zero) 3306 - v - optional values in the matrix 3307 3308 Level: developer 3309 3310 Notes: 3311 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3312 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3313 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3314 3315 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3316 3317 The format which is used for the sparse matrix input, is equivalent to a 3318 row-major ordering.. i.e for the following matrix, the input data expected is 3319 as shown: 3320 3321 1 0 0 3322 2 0 3 P0 3323 ------- 3324 4 5 6 P1 3325 3326 Process0 [P0]: rows_owned=[0,1] 3327 i = {0,1,3} [size = nrow+1 = 2+1] 3328 j = {0,0,2} [size = nz = 6] 3329 v = {1,2,3} [size = nz = 6] 3330 3331 Process1 [P1]: rows_owned=[2] 3332 i = {0,3} [size = nrow+1 = 1+1] 3333 j = {0,1,2} [size = nz = 6] 3334 v = {4,5,6} [size = nz = 6] 3335 3336 .keywords: matrix, aij, compressed row, sparse, parallel 3337 3338 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3339 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3340 @*/ 3341 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3342 { 3343 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3344 3345 PetscFunctionBegin; 3346 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3347 if (f) { 3348 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3349 } 3350 PetscFunctionReturn(0); 3351 } 3352 3353 #undef __FUNCT__ 3354 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3355 /*@C 3356 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3357 (the default parallel PETSc format). For good matrix assembly performance 3358 the user should preallocate the matrix storage by setting the parameters 3359 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3360 performance can be increased by more than a factor of 50. 3361 3362 Collective on MPI_Comm 3363 3364 Input Parameters: 3365 + A - the matrix 3366 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3367 (same value is used for all local rows) 3368 . d_nnz - array containing the number of nonzeros in the various rows of the 3369 DIAGONAL portion of the local submatrix (possibly different for each row) 3370 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3371 The size of this array is equal to the number of local rows, i.e 'm'. 3372 You must leave room for the diagonal entry even if it is zero. 3373 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3374 submatrix (same value is used for all local rows). 3375 - o_nnz - array containing the number of nonzeros in the various rows of the 3376 OFF-DIAGONAL portion of the local submatrix (possibly different for 3377 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3378 structure. The size of this array is equal to the number 3379 of local rows, i.e 'm'. 3380 3381 If the *_nnz parameter is given then the *_nz parameter is ignored 3382 3383 The AIJ format (also called the Yale sparse matrix format or 3384 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3385 storage. The stored row and column indices begin with zero. See the users manual for details. 3386 3387 The parallel matrix is partitioned such that the first m0 rows belong to 3388 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3389 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3390 3391 The DIAGONAL portion of the local submatrix of a processor can be defined 3392 as the submatrix which is obtained by extraction the part corresponding 3393 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3394 first row that belongs to the processor, and r2 is the last row belonging 3395 to the this processor. This is a square mxm matrix. The remaining portion 3396 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3397 3398 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3399 3400 You can call MatGetInfo() to get information on how effective the preallocation was; 3401 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3402 You can also run with the option -info and look for messages with the string 3403 malloc in them to see if additional memory allocation was needed. 3404 3405 Example usage: 3406 3407 Consider the following 8x8 matrix with 34 non-zero values, that is 3408 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3409 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3410 as follows: 3411 3412 .vb 3413 1 2 0 | 0 3 0 | 0 4 3414 Proc0 0 5 6 | 7 0 0 | 8 0 3415 9 0 10 | 11 0 0 | 12 0 3416 ------------------------------------- 3417 13 0 14 | 15 16 17 | 0 0 3418 Proc1 0 18 0 | 19 20 21 | 0 0 3419 0 0 0 | 22 23 0 | 24 0 3420 ------------------------------------- 3421 Proc2 25 26 27 | 0 0 28 | 29 0 3422 30 0 0 | 31 32 33 | 0 34 3423 .ve 3424 3425 This can be represented as a collection of submatrices as: 3426 3427 .vb 3428 A B C 3429 D E F 3430 G H I 3431 .ve 3432 3433 Where the submatrices A,B,C are owned by proc0, D,E,F are 3434 owned by proc1, G,H,I are owned by proc2. 3435 3436 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3437 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3438 The 'M','N' parameters are 8,8, and have the same values on all procs. 3439 3440 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3441 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3442 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3443 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3444 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3445 matrix, ans [DF] as another SeqAIJ matrix. 3446 3447 When d_nz, o_nz parameters are specified, d_nz storage elements are 3448 allocated for every row of the local diagonal submatrix, and o_nz 3449 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3450 One way to choose d_nz and o_nz is to use the max nonzerors per local 3451 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3452 In this case, the values of d_nz,o_nz are: 3453 .vb 3454 proc0 : dnz = 2, o_nz = 2 3455 proc1 : dnz = 3, o_nz = 2 3456 proc2 : dnz = 1, o_nz = 4 3457 .ve 3458 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3459 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3460 for proc3. i.e we are using 12+15+10=37 storage locations to store 3461 34 values. 3462 3463 When d_nnz, o_nnz parameters are specified, the storage is specified 3464 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3465 In the above case the values for d_nnz,o_nnz are: 3466 .vb 3467 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3468 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3469 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3470 .ve 3471 Here the space allocated is sum of all the above values i.e 34, and 3472 hence pre-allocation is perfect. 3473 3474 Level: intermediate 3475 3476 .keywords: matrix, aij, compressed row, sparse, parallel 3477 3478 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3479 MPIAIJ, MatGetInfo() 3480 @*/ 3481 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3482 { 3483 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3484 3485 PetscFunctionBegin; 3486 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3487 if (f) { 3488 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3489 } 3490 PetscFunctionReturn(0); 3491 } 3492 3493 #undef __FUNCT__ 3494 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3495 /*@ 3496 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3497 CSR format the local rows. 3498 3499 Collective on MPI_Comm 3500 3501 Input Parameters: 3502 + comm - MPI communicator 3503 . m - number of local rows (Cannot be PETSC_DECIDE) 3504 . n - This value should be the same as the local size used in creating the 3505 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3506 calculated if N is given) For square matrices n is almost always m. 3507 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3508 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3509 . i - row indices 3510 . j - column indices 3511 - a - matrix values 3512 3513 Output Parameter: 3514 . mat - the matrix 3515 3516 Level: intermediate 3517 3518 Notes: 3519 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3520 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3521 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3522 3523 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3524 3525 The format which is used for the sparse matrix input, is equivalent to a 3526 row-major ordering.. i.e for the following matrix, the input data expected is 3527 as shown: 3528 3529 1 0 0 3530 2 0 3 P0 3531 ------- 3532 4 5 6 P1 3533 3534 Process0 [P0]: rows_owned=[0,1] 3535 i = {0,1,3} [size = nrow+1 = 2+1] 3536 j = {0,0,2} [size = nz = 6] 3537 v = {1,2,3} [size = nz = 6] 3538 3539 Process1 [P1]: rows_owned=[2] 3540 i = {0,3} [size = nrow+1 = 1+1] 3541 j = {0,1,2} [size = nz = 6] 3542 v = {4,5,6} [size = nz = 6] 3543 3544 .keywords: matrix, aij, compressed row, sparse, parallel 3545 3546 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3547 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3548 @*/ 3549 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3550 { 3551 PetscErrorCode ierr; 3552 3553 PetscFunctionBegin; 3554 if (i[0]) { 3555 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3556 } 3557 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3558 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3559 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3560 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3561 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3562 PetscFunctionReturn(0); 3563 } 3564 3565 #undef __FUNCT__ 3566 #define __FUNCT__ "MatCreateMPIAIJ" 3567 /*@C 3568 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3569 (the default parallel PETSc format). For good matrix assembly performance 3570 the user should preallocate the matrix storage by setting the parameters 3571 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3572 performance can be increased by more than a factor of 50. 3573 3574 Collective on MPI_Comm 3575 3576 Input Parameters: 3577 + comm - MPI communicator 3578 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3579 This value should be the same as the local size used in creating the 3580 y vector for the matrix-vector product y = Ax. 3581 . n - This value should be the same as the local size used in creating the 3582 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3583 calculated if N is given) For square matrices n is almost always m. 3584 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3585 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3586 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3587 (same value is used for all local rows) 3588 . d_nnz - array containing the number of nonzeros in the various rows of the 3589 DIAGONAL portion of the local submatrix (possibly different for each row) 3590 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3591 The size of this array is equal to the number of local rows, i.e 'm'. 3592 You must leave room for the diagonal entry even if it is zero. 3593 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3594 submatrix (same value is used for all local rows). 3595 - o_nnz - array containing the number of nonzeros in the various rows of the 3596 OFF-DIAGONAL portion of the local submatrix (possibly different for 3597 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3598 structure. The size of this array is equal to the number 3599 of local rows, i.e 'm'. 3600 3601 Output Parameter: 3602 . A - the matrix 3603 3604 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3605 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3606 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3607 3608 Notes: 3609 If the *_nnz parameter is given then the *_nz parameter is ignored 3610 3611 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3612 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3613 storage requirements for this matrix. 3614 3615 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3616 processor than it must be used on all processors that share the object for 3617 that argument. 3618 3619 The user MUST specify either the local or global matrix dimensions 3620 (possibly both). 3621 3622 The parallel matrix is partitioned across processors such that the 3623 first m0 rows belong to process 0, the next m1 rows belong to 3624 process 1, the next m2 rows belong to process 2 etc.. where 3625 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3626 values corresponding to [m x N] submatrix. 3627 3628 The columns are logically partitioned with the n0 columns belonging 3629 to 0th partition, the next n1 columns belonging to the next 3630 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3631 3632 The DIAGONAL portion of the local submatrix on any given processor 3633 is the submatrix corresponding to the rows and columns m,n 3634 corresponding to the given processor. i.e diagonal matrix on 3635 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3636 etc. The remaining portion of the local submatrix [m x (N-n)] 3637 constitute the OFF-DIAGONAL portion. The example below better 3638 illustrates this concept. 3639 3640 For a square global matrix we define each processor's diagonal portion 3641 to be its local rows and the corresponding columns (a square submatrix); 3642 each processor's off-diagonal portion encompasses the remainder of the 3643 local matrix (a rectangular submatrix). 3644 3645 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3646 3647 When calling this routine with a single process communicator, a matrix of 3648 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3649 type of communicator, use the construction mechanism: 3650 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3651 3652 By default, this format uses inodes (identical nodes) when possible. 3653 We search for consecutive rows with the same nonzero structure, thereby 3654 reusing matrix information to achieve increased efficiency. 3655 3656 Options Database Keys: 3657 + -mat_no_inode - Do not use inodes 3658 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3659 - -mat_aij_oneindex - Internally use indexing starting at 1 3660 rather than 0. Note that when calling MatSetValues(), 3661 the user still MUST index entries starting at 0! 3662 3663 3664 Example usage: 3665 3666 Consider the following 8x8 matrix with 34 non-zero values, that is 3667 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3668 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3669 as follows: 3670 3671 .vb 3672 1 2 0 | 0 3 0 | 0 4 3673 Proc0 0 5 6 | 7 0 0 | 8 0 3674 9 0 10 | 11 0 0 | 12 0 3675 ------------------------------------- 3676 13 0 14 | 15 16 17 | 0 0 3677 Proc1 0 18 0 | 19 20 21 | 0 0 3678 0 0 0 | 22 23 0 | 24 0 3679 ------------------------------------- 3680 Proc2 25 26 27 | 0 0 28 | 29 0 3681 30 0 0 | 31 32 33 | 0 34 3682 .ve 3683 3684 This can be represented as a collection of submatrices as: 3685 3686 .vb 3687 A B C 3688 D E F 3689 G H I 3690 .ve 3691 3692 Where the submatrices A,B,C are owned by proc0, D,E,F are 3693 owned by proc1, G,H,I are owned by proc2. 3694 3695 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3696 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3697 The 'M','N' parameters are 8,8, and have the same values on all procs. 3698 3699 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3700 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3701 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3702 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3703 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3704 matrix, ans [DF] as another SeqAIJ matrix. 3705 3706 When d_nz, o_nz parameters are specified, d_nz storage elements are 3707 allocated for every row of the local diagonal submatrix, and o_nz 3708 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3709 One way to choose d_nz and o_nz is to use the max nonzerors per local 3710 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3711 In this case, the values of d_nz,o_nz are: 3712 .vb 3713 proc0 : dnz = 2, o_nz = 2 3714 proc1 : dnz = 3, o_nz = 2 3715 proc2 : dnz = 1, o_nz = 4 3716 .ve 3717 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3718 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3719 for proc3. i.e we are using 12+15+10=37 storage locations to store 3720 34 values. 3721 3722 When d_nnz, o_nnz parameters are specified, the storage is specified 3723 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3724 In the above case the values for d_nnz,o_nnz are: 3725 .vb 3726 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3727 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3728 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3729 .ve 3730 Here the space allocated is sum of all the above values i.e 34, and 3731 hence pre-allocation is perfect. 3732 3733 Level: intermediate 3734 3735 .keywords: matrix, aij, compressed row, sparse, parallel 3736 3737 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3738 MPIAIJ, MatCreateMPIAIJWithArrays() 3739 @*/ 3740 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3741 { 3742 PetscErrorCode ierr; 3743 PetscMPIInt size; 3744 3745 PetscFunctionBegin; 3746 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3747 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3748 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3749 if (size > 1) { 3750 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3751 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3752 } else { 3753 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3754 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3755 } 3756 PetscFunctionReturn(0); 3757 } 3758 3759 #undef __FUNCT__ 3760 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3761 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3762 { 3763 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3764 3765 PetscFunctionBegin; 3766 *Ad = a->A; 3767 *Ao = a->B; 3768 *colmap = a->garray; 3769 PetscFunctionReturn(0); 3770 } 3771 3772 #undef __FUNCT__ 3773 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3774 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3775 { 3776 PetscErrorCode ierr; 3777 PetscInt i; 3778 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3779 3780 PetscFunctionBegin; 3781 if (coloring->ctype == IS_COLORING_GLOBAL) { 3782 ISColoringValue *allcolors,*colors; 3783 ISColoring ocoloring; 3784 3785 /* set coloring for diagonal portion */ 3786 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3787 3788 /* set coloring for off-diagonal portion */ 3789 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3790 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3791 for (i=0; i<a->B->cmap->n; i++) { 3792 colors[i] = allcolors[a->garray[i]]; 3793 } 3794 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3795 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3796 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3797 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3798 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3799 ISColoringValue *colors; 3800 PetscInt *larray; 3801 ISColoring ocoloring; 3802 3803 /* set coloring for diagonal portion */ 3804 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3805 for (i=0; i<a->A->cmap->n; i++) { 3806 larray[i] = i + A->cmap->rstart; 3807 } 3808 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3809 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3810 for (i=0; i<a->A->cmap->n; i++) { 3811 colors[i] = coloring->colors[larray[i]]; 3812 } 3813 ierr = PetscFree(larray);CHKERRQ(ierr); 3814 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3815 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3816 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3817 3818 /* set coloring for off-diagonal portion */ 3819 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3820 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3821 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3822 for (i=0; i<a->B->cmap->n; i++) { 3823 colors[i] = coloring->colors[larray[i]]; 3824 } 3825 ierr = PetscFree(larray);CHKERRQ(ierr); 3826 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3827 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3828 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3829 } else { 3830 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3831 } 3832 3833 PetscFunctionReturn(0); 3834 } 3835 3836 #if defined(PETSC_HAVE_ADIC) 3837 #undef __FUNCT__ 3838 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3839 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3840 { 3841 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3842 PetscErrorCode ierr; 3843 3844 PetscFunctionBegin; 3845 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3846 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3847 PetscFunctionReturn(0); 3848 } 3849 #endif 3850 3851 #undef __FUNCT__ 3852 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3853 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3854 { 3855 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3856 PetscErrorCode ierr; 3857 3858 PetscFunctionBegin; 3859 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3860 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3861 PetscFunctionReturn(0); 3862 } 3863 3864 #undef __FUNCT__ 3865 #define __FUNCT__ "MatMerge" 3866 /*@ 3867 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3868 matrices from each processor 3869 3870 Collective on MPI_Comm 3871 3872 Input Parameters: 3873 + comm - the communicators the parallel matrix will live on 3874 . inmat - the input sequential matrices 3875 . n - number of local columns (or PETSC_DECIDE) 3876 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3877 3878 Output Parameter: 3879 . outmat - the parallel matrix generated 3880 3881 Level: advanced 3882 3883 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3884 3885 @*/ 3886 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3887 { 3888 PetscErrorCode ierr; 3889 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3890 PetscInt *indx; 3891 PetscScalar *values; 3892 3893 PetscFunctionBegin; 3894 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3895 if (scall == MAT_INITIAL_MATRIX){ 3896 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3897 if (n == PETSC_DECIDE){ 3898 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3899 } 3900 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3901 rstart -= m; 3902 3903 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3904 for (i=0;i<m;i++) { 3905 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3906 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3907 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3908 } 3909 /* This routine will ONLY return MPIAIJ type matrix */ 3910 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3911 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3912 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3913 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3914 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3915 3916 } else if (scall == MAT_REUSE_MATRIX){ 3917 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3918 } else { 3919 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3920 } 3921 3922 for (i=0;i<m;i++) { 3923 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3924 Ii = i + rstart; 3925 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3926 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3927 } 3928 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3929 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3930 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3931 3932 PetscFunctionReturn(0); 3933 } 3934 3935 #undef __FUNCT__ 3936 #define __FUNCT__ "MatFileSplit" 3937 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3938 { 3939 PetscErrorCode ierr; 3940 PetscMPIInt rank; 3941 PetscInt m,N,i,rstart,nnz; 3942 size_t len; 3943 const PetscInt *indx; 3944 PetscViewer out; 3945 char *name; 3946 Mat B; 3947 const PetscScalar *values; 3948 3949 PetscFunctionBegin; 3950 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3951 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3952 /* Should this be the type of the diagonal block of A? */ 3953 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3954 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3955 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3956 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3957 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3958 for (i=0;i<m;i++) { 3959 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3960 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3961 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3962 } 3963 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3964 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3965 3966 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3967 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3968 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3969 sprintf(name,"%s.%d",outfile,rank); 3970 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3971 ierr = PetscFree(name); 3972 ierr = MatView(B,out);CHKERRQ(ierr); 3973 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3974 ierr = MatDestroy(B);CHKERRQ(ierr); 3975 PetscFunctionReturn(0); 3976 } 3977 3978 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3979 #undef __FUNCT__ 3980 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3981 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3982 { 3983 PetscErrorCode ierr; 3984 Mat_Merge_SeqsToMPI *merge; 3985 PetscContainer container; 3986 3987 PetscFunctionBegin; 3988 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3989 if (container) { 3990 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3991 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3992 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3993 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3994 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3999 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4000 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4001 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4002 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4003 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4004 4005 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4006 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4007 } 4008 ierr = PetscFree(merge);CHKERRQ(ierr); 4009 4010 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4011 PetscFunctionReturn(0); 4012 } 4013 4014 #include "../src/mat/utils/freespace.h" 4015 #include "petscbt.h" 4016 4017 #undef __FUNCT__ 4018 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4019 /*@C 4020 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4021 matrices from each processor 4022 4023 Collective on MPI_Comm 4024 4025 Input Parameters: 4026 + comm - the communicators the parallel matrix will live on 4027 . seqmat - the input sequential matrices 4028 . m - number of local rows (or PETSC_DECIDE) 4029 . n - number of local columns (or PETSC_DECIDE) 4030 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4031 4032 Output Parameter: 4033 . mpimat - the parallel matrix generated 4034 4035 Level: advanced 4036 4037 Notes: 4038 The dimensions of the sequential matrix in each processor MUST be the same. 4039 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4040 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4041 @*/ 4042 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4043 { 4044 PetscErrorCode ierr; 4045 MPI_Comm comm=((PetscObject)mpimat)->comm; 4046 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4047 PetscMPIInt size,rank,taga,*len_s; 4048 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4049 PetscInt proc,m; 4050 PetscInt **buf_ri,**buf_rj; 4051 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4052 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4053 MPI_Request *s_waits,*r_waits; 4054 MPI_Status *status; 4055 MatScalar *aa=a->a; 4056 MatScalar **abuf_r,*ba_i; 4057 Mat_Merge_SeqsToMPI *merge; 4058 PetscContainer container; 4059 4060 PetscFunctionBegin; 4061 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4062 4063 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4064 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4065 4066 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4067 if (container) { 4068 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4069 } 4070 bi = merge->bi; 4071 bj = merge->bj; 4072 buf_ri = merge->buf_ri; 4073 buf_rj = merge->buf_rj; 4074 4075 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4076 owners = merge->rowmap->range; 4077 len_s = merge->len_s; 4078 4079 /* send and recv matrix values */ 4080 /*-----------------------------*/ 4081 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4082 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4083 4084 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4085 for (proc=0,k=0; proc<size; proc++){ 4086 if (!len_s[proc]) continue; 4087 i = owners[proc]; 4088 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4089 k++; 4090 } 4091 4092 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4093 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4094 ierr = PetscFree(status);CHKERRQ(ierr); 4095 4096 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4097 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4098 4099 /* insert mat values of mpimat */ 4100 /*----------------------------*/ 4101 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4102 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4103 4104 for (k=0; k<merge->nrecv; k++){ 4105 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4106 nrows = *(buf_ri_k[k]); 4107 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4108 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4109 } 4110 4111 /* set values of ba */ 4112 m = merge->rowmap->n; 4113 for (i=0; i<m; i++) { 4114 arow = owners[rank] + i; 4115 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4116 bnzi = bi[i+1] - bi[i]; 4117 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4118 4119 /* add local non-zero vals of this proc's seqmat into ba */ 4120 anzi = ai[arow+1] - ai[arow]; 4121 aj = a->j + ai[arow]; 4122 aa = a->a + ai[arow]; 4123 nextaj = 0; 4124 for (j=0; nextaj<anzi; j++){ 4125 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4126 ba_i[j] += aa[nextaj++]; 4127 } 4128 } 4129 4130 /* add received vals into ba */ 4131 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4132 /* i-th row */ 4133 if (i == *nextrow[k]) { 4134 anzi = *(nextai[k]+1) - *nextai[k]; 4135 aj = buf_rj[k] + *(nextai[k]); 4136 aa = abuf_r[k] + *(nextai[k]); 4137 nextaj = 0; 4138 for (j=0; nextaj<anzi; j++){ 4139 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4140 ba_i[j] += aa[nextaj++]; 4141 } 4142 } 4143 nextrow[k]++; nextai[k]++; 4144 } 4145 } 4146 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4147 } 4148 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4149 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4150 4151 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4152 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4153 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4154 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4155 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4156 PetscFunctionReturn(0); 4157 } 4158 4159 #undef __FUNCT__ 4160 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4161 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4162 { 4163 PetscErrorCode ierr; 4164 Mat B_mpi; 4165 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4166 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4167 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4168 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4169 PetscInt len,proc,*dnz,*onz; 4170 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4171 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4172 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4173 MPI_Status *status; 4174 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4175 PetscBT lnkbt; 4176 Mat_Merge_SeqsToMPI *merge; 4177 PetscContainer container; 4178 4179 PetscFunctionBegin; 4180 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4181 4182 /* make sure it is a PETSc comm */ 4183 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4184 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4185 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4186 4187 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4188 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4189 4190 /* determine row ownership */ 4191 /*---------------------------------------------------------*/ 4192 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4193 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4194 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4195 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4196 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4197 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4198 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4199 4200 m = merge->rowmap->n; 4201 M = merge->rowmap->N; 4202 owners = merge->rowmap->range; 4203 4204 /* determine the number of messages to send, their lengths */ 4205 /*---------------------------------------------------------*/ 4206 len_s = merge->len_s; 4207 4208 len = 0; /* length of buf_si[] */ 4209 merge->nsend = 0; 4210 for (proc=0; proc<size; proc++){ 4211 len_si[proc] = 0; 4212 if (proc == rank){ 4213 len_s[proc] = 0; 4214 } else { 4215 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4216 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4217 } 4218 if (len_s[proc]) { 4219 merge->nsend++; 4220 nrows = 0; 4221 for (i=owners[proc]; i<owners[proc+1]; i++){ 4222 if (ai[i+1] > ai[i]) nrows++; 4223 } 4224 len_si[proc] = 2*(nrows+1); 4225 len += len_si[proc]; 4226 } 4227 } 4228 4229 /* determine the number and length of messages to receive for ij-structure */ 4230 /*-------------------------------------------------------------------------*/ 4231 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4232 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4233 4234 /* post the Irecv of j-structure */ 4235 /*-------------------------------*/ 4236 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4237 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4238 4239 /* post the Isend of j-structure */ 4240 /*--------------------------------*/ 4241 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4242 4243 for (proc=0, k=0; proc<size; proc++){ 4244 if (!len_s[proc]) continue; 4245 i = owners[proc]; 4246 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4247 k++; 4248 } 4249 4250 /* receives and sends of j-structure are complete */ 4251 /*------------------------------------------------*/ 4252 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4253 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4254 4255 /* send and recv i-structure */ 4256 /*---------------------------*/ 4257 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4258 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4259 4260 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4261 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4262 for (proc=0,k=0; proc<size; proc++){ 4263 if (!len_s[proc]) continue; 4264 /* form outgoing message for i-structure: 4265 buf_si[0]: nrows to be sent 4266 [1:nrows]: row index (global) 4267 [nrows+1:2*nrows+1]: i-structure index 4268 */ 4269 /*-------------------------------------------*/ 4270 nrows = len_si[proc]/2 - 1; 4271 buf_si_i = buf_si + nrows+1; 4272 buf_si[0] = nrows; 4273 buf_si_i[0] = 0; 4274 nrows = 0; 4275 for (i=owners[proc]; i<owners[proc+1]; i++){ 4276 anzi = ai[i+1] - ai[i]; 4277 if (anzi) { 4278 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4279 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4280 nrows++; 4281 } 4282 } 4283 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4284 k++; 4285 buf_si += len_si[proc]; 4286 } 4287 4288 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4289 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4290 4291 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4292 for (i=0; i<merge->nrecv; i++){ 4293 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4294 } 4295 4296 ierr = PetscFree(len_si);CHKERRQ(ierr); 4297 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4298 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4299 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4300 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4301 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4302 ierr = PetscFree(status);CHKERRQ(ierr); 4303 4304 /* compute a local seq matrix in each processor */ 4305 /*----------------------------------------------*/ 4306 /* allocate bi array and free space for accumulating nonzero column info */ 4307 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4308 bi[0] = 0; 4309 4310 /* create and initialize a linked list */ 4311 nlnk = N+1; 4312 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4313 4314 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4315 len = 0; 4316 len = ai[owners[rank+1]] - ai[owners[rank]]; 4317 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4318 current_space = free_space; 4319 4320 /* determine symbolic info for each local row */ 4321 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4322 4323 for (k=0; k<merge->nrecv; k++){ 4324 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4325 nrows = *buf_ri_k[k]; 4326 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4327 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4328 } 4329 4330 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4331 len = 0; 4332 for (i=0;i<m;i++) { 4333 bnzi = 0; 4334 /* add local non-zero cols of this proc's seqmat into lnk */ 4335 arow = owners[rank] + i; 4336 anzi = ai[arow+1] - ai[arow]; 4337 aj = a->j + ai[arow]; 4338 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4339 bnzi += nlnk; 4340 /* add received col data into lnk */ 4341 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4342 if (i == *nextrow[k]) { /* i-th row */ 4343 anzi = *(nextai[k]+1) - *nextai[k]; 4344 aj = buf_rj[k] + *nextai[k]; 4345 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4346 bnzi += nlnk; 4347 nextrow[k]++; nextai[k]++; 4348 } 4349 } 4350 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4351 4352 /* if free space is not available, make more free space */ 4353 if (current_space->local_remaining<bnzi) { 4354 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4355 nspacedouble++; 4356 } 4357 /* copy data into free space, then initialize lnk */ 4358 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4359 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4360 4361 current_space->array += bnzi; 4362 current_space->local_used += bnzi; 4363 current_space->local_remaining -= bnzi; 4364 4365 bi[i+1] = bi[i] + bnzi; 4366 } 4367 4368 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4369 4370 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4371 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4372 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4373 4374 /* create symbolic parallel matrix B_mpi */ 4375 /*---------------------------------------*/ 4376 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4377 if (n==PETSC_DECIDE) { 4378 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4379 } else { 4380 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4381 } 4382 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4383 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4384 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4385 4386 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4387 B_mpi->assembled = PETSC_FALSE; 4388 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4389 merge->bi = bi; 4390 merge->bj = bj; 4391 merge->buf_ri = buf_ri; 4392 merge->buf_rj = buf_rj; 4393 merge->coi = PETSC_NULL; 4394 merge->coj = PETSC_NULL; 4395 merge->owners_co = PETSC_NULL; 4396 4397 /* attach the supporting struct to B_mpi for reuse */ 4398 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4399 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4400 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4401 *mpimat = B_mpi; 4402 4403 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4404 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4405 PetscFunctionReturn(0); 4406 } 4407 4408 #undef __FUNCT__ 4409 #define __FUNCT__ "MatMerge_SeqsToMPI" 4410 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4411 { 4412 PetscErrorCode ierr; 4413 4414 PetscFunctionBegin; 4415 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4416 if (scall == MAT_INITIAL_MATRIX){ 4417 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4418 } 4419 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4420 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4421 PetscFunctionReturn(0); 4422 } 4423 4424 #undef __FUNCT__ 4425 #define __FUNCT__ "MatGetLocalMat" 4426 /*@ 4427 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4428 4429 Not Collective 4430 4431 Input Parameters: 4432 + A - the matrix 4433 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4434 4435 Output Parameter: 4436 . A_loc - the local sequential matrix generated 4437 4438 Level: developer 4439 4440 @*/ 4441 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4442 { 4443 PetscErrorCode ierr; 4444 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4445 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4446 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4447 MatScalar *aa=a->a,*ba=b->a,*cam; 4448 PetscScalar *ca; 4449 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4450 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4451 4452 PetscFunctionBegin; 4453 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4454 if (scall == MAT_INITIAL_MATRIX){ 4455 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4456 ci[0] = 0; 4457 for (i=0; i<am; i++){ 4458 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4459 } 4460 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4461 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4462 k = 0; 4463 for (i=0; i<am; i++) { 4464 ncols_o = bi[i+1] - bi[i]; 4465 ncols_d = ai[i+1] - ai[i]; 4466 /* off-diagonal portion of A */ 4467 for (jo=0; jo<ncols_o; jo++) { 4468 col = cmap[*bj]; 4469 if (col >= cstart) break; 4470 cj[k] = col; bj++; 4471 ca[k++] = *ba++; 4472 } 4473 /* diagonal portion of A */ 4474 for (j=0; j<ncols_d; j++) { 4475 cj[k] = cstart + *aj++; 4476 ca[k++] = *aa++; 4477 } 4478 /* off-diagonal portion of A */ 4479 for (j=jo; j<ncols_o; j++) { 4480 cj[k] = cmap[*bj++]; 4481 ca[k++] = *ba++; 4482 } 4483 } 4484 /* put together the new matrix */ 4485 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4486 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4487 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4488 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4489 mat->free_a = PETSC_TRUE; 4490 mat->free_ij = PETSC_TRUE; 4491 mat->nonew = 0; 4492 } else if (scall == MAT_REUSE_MATRIX){ 4493 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4494 ci = mat->i; cj = mat->j; cam = mat->a; 4495 for (i=0; i<am; i++) { 4496 /* off-diagonal portion of A */ 4497 ncols_o = bi[i+1] - bi[i]; 4498 for (jo=0; jo<ncols_o; jo++) { 4499 col = cmap[*bj]; 4500 if (col >= cstart) break; 4501 *cam++ = *ba++; bj++; 4502 } 4503 /* diagonal portion of A */ 4504 ncols_d = ai[i+1] - ai[i]; 4505 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4506 /* off-diagonal portion of A */ 4507 for (j=jo; j<ncols_o; j++) { 4508 *cam++ = *ba++; bj++; 4509 } 4510 } 4511 } else { 4512 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4513 } 4514 4515 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4516 PetscFunctionReturn(0); 4517 } 4518 4519 #undef __FUNCT__ 4520 #define __FUNCT__ "MatGetLocalMatCondensed" 4521 /*@C 4522 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4523 4524 Not Collective 4525 4526 Input Parameters: 4527 + A - the matrix 4528 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4529 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4530 4531 Output Parameter: 4532 . A_loc - the local sequential matrix generated 4533 4534 Level: developer 4535 4536 @*/ 4537 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4538 { 4539 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4540 PetscErrorCode ierr; 4541 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4542 IS isrowa,iscola; 4543 Mat *aloc; 4544 4545 PetscFunctionBegin; 4546 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4547 if (!row){ 4548 start = A->rmap->rstart; end = A->rmap->rend; 4549 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4550 } else { 4551 isrowa = *row; 4552 } 4553 if (!col){ 4554 start = A->cmap->rstart; 4555 cmap = a->garray; 4556 nzA = a->A->cmap->n; 4557 nzB = a->B->cmap->n; 4558 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4559 ncols = 0; 4560 for (i=0; i<nzB; i++) { 4561 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4562 else break; 4563 } 4564 imark = i; 4565 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4566 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4567 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4568 ierr = PetscFree(idx);CHKERRQ(ierr); 4569 } else { 4570 iscola = *col; 4571 } 4572 if (scall != MAT_INITIAL_MATRIX){ 4573 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4574 aloc[0] = *A_loc; 4575 } 4576 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4577 *A_loc = aloc[0]; 4578 ierr = PetscFree(aloc);CHKERRQ(ierr); 4579 if (!row){ 4580 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4581 } 4582 if (!col){ 4583 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4584 } 4585 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4586 PetscFunctionReturn(0); 4587 } 4588 4589 #undef __FUNCT__ 4590 #define __FUNCT__ "MatGetBrowsOfAcols" 4591 /*@C 4592 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4593 4594 Collective on Mat 4595 4596 Input Parameters: 4597 + A,B - the matrices in mpiaij format 4598 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4599 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4600 4601 Output Parameter: 4602 + rowb, colb - index sets of rows and columns of B to extract 4603 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4604 - B_seq - the sequential matrix generated 4605 4606 Level: developer 4607 4608 @*/ 4609 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4610 { 4611 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4612 PetscErrorCode ierr; 4613 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4614 IS isrowb,iscolb; 4615 Mat *bseq; 4616 4617 PetscFunctionBegin; 4618 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4619 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4620 } 4621 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4622 4623 if (scall == MAT_INITIAL_MATRIX){ 4624 start = A->cmap->rstart; 4625 cmap = a->garray; 4626 nzA = a->A->cmap->n; 4627 nzB = a->B->cmap->n; 4628 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4629 ncols = 0; 4630 for (i=0; i<nzB; i++) { /* row < local row index */ 4631 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4632 else break; 4633 } 4634 imark = i; 4635 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4636 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4637 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4638 ierr = PetscFree(idx);CHKERRQ(ierr); 4639 *brstart = imark; 4640 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4641 } else { 4642 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4643 isrowb = *rowb; iscolb = *colb; 4644 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4645 bseq[0] = *B_seq; 4646 } 4647 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4648 *B_seq = bseq[0]; 4649 ierr = PetscFree(bseq);CHKERRQ(ierr); 4650 if (!rowb){ 4651 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4652 } else { 4653 *rowb = isrowb; 4654 } 4655 if (!colb){ 4656 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4657 } else { 4658 *colb = iscolb; 4659 } 4660 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4661 PetscFunctionReturn(0); 4662 } 4663 4664 #undef __FUNCT__ 4665 #define __FUNCT__ "MatGetBrowsOfAoCols" 4666 /*@C 4667 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4668 of the OFF-DIAGONAL portion of local A 4669 4670 Collective on Mat 4671 4672 Input Parameters: 4673 + A,B - the matrices in mpiaij format 4674 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4675 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4676 . startsj_r - similar to startsj for receives 4677 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4678 4679 Output Parameter: 4680 + B_oth - the sequential matrix generated 4681 4682 Level: developer 4683 4684 @*/ 4685 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4686 { 4687 VecScatter_MPI_General *gen_to,*gen_from; 4688 PetscErrorCode ierr; 4689 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4690 Mat_SeqAIJ *b_oth; 4691 VecScatter ctx=a->Mvctx; 4692 MPI_Comm comm=((PetscObject)ctx)->comm; 4693 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4694 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4695 PetscScalar *rvalues,*svalues; 4696 MatScalar *b_otha,*bufa,*bufA; 4697 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4698 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4699 MPI_Status *sstatus,rstatus; 4700 PetscMPIInt jj; 4701 PetscInt *cols,sbs,rbs; 4702 PetscScalar *vals; 4703 4704 PetscFunctionBegin; 4705 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4706 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4707 } 4708 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4709 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4710 4711 gen_to = (VecScatter_MPI_General*)ctx->todata; 4712 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4713 rvalues = gen_from->values; /* holds the length of receiving row */ 4714 svalues = gen_to->values; /* holds the length of sending row */ 4715 nrecvs = gen_from->n; 4716 nsends = gen_to->n; 4717 4718 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4719 srow = gen_to->indices; /* local row index to be sent */ 4720 sstarts = gen_to->starts; 4721 sprocs = gen_to->procs; 4722 sstatus = gen_to->sstatus; 4723 sbs = gen_to->bs; 4724 rstarts = gen_from->starts; 4725 rprocs = gen_from->procs; 4726 rbs = gen_from->bs; 4727 4728 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4729 if (scall == MAT_INITIAL_MATRIX){ 4730 /* i-array */ 4731 /*---------*/ 4732 /* post receives */ 4733 for (i=0; i<nrecvs; i++){ 4734 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4735 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4736 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4737 } 4738 4739 /* pack the outgoing message */ 4740 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4741 sstartsj[0] = 0; rstartsj[0] = 0; 4742 len = 0; /* total length of j or a array to be sent */ 4743 k = 0; 4744 for (i=0; i<nsends; i++){ 4745 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4746 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4747 for (j=0; j<nrows; j++) { 4748 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4749 for (l=0; l<sbs; l++){ 4750 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4751 rowlen[j*sbs+l] = ncols; 4752 len += ncols; 4753 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4754 } 4755 k++; 4756 } 4757 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4758 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4759 } 4760 /* recvs and sends of i-array are completed */ 4761 i = nrecvs; 4762 while (i--) { 4763 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4764 } 4765 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4766 4767 /* allocate buffers for sending j and a arrays */ 4768 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4769 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4770 4771 /* create i-array of B_oth */ 4772 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4773 b_othi[0] = 0; 4774 len = 0; /* total length of j or a array to be received */ 4775 k = 0; 4776 for (i=0; i<nrecvs; i++){ 4777 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4778 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4779 for (j=0; j<nrows; j++) { 4780 b_othi[k+1] = b_othi[k] + rowlen[j]; 4781 len += rowlen[j]; k++; 4782 } 4783 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4784 } 4785 4786 /* allocate space for j and a arrrays of B_oth */ 4787 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4788 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4789 4790 /* j-array */ 4791 /*---------*/ 4792 /* post receives of j-array */ 4793 for (i=0; i<nrecvs; i++){ 4794 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4795 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4796 } 4797 4798 /* pack the outgoing message j-array */ 4799 k = 0; 4800 for (i=0; i<nsends; i++){ 4801 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4802 bufJ = bufj+sstartsj[i]; 4803 for (j=0; j<nrows; j++) { 4804 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4805 for (ll=0; ll<sbs; ll++){ 4806 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4807 for (l=0; l<ncols; l++){ 4808 *bufJ++ = cols[l]; 4809 } 4810 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4811 } 4812 } 4813 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4814 } 4815 4816 /* recvs and sends of j-array are completed */ 4817 i = nrecvs; 4818 while (i--) { 4819 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4820 } 4821 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4822 } else if (scall == MAT_REUSE_MATRIX){ 4823 sstartsj = *startsj; 4824 rstartsj = *startsj_r; 4825 bufa = *bufa_ptr; 4826 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4827 b_otha = b_oth->a; 4828 } else { 4829 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4830 } 4831 4832 /* a-array */ 4833 /*---------*/ 4834 /* post receives of a-array */ 4835 for (i=0; i<nrecvs; i++){ 4836 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4837 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4838 } 4839 4840 /* pack the outgoing message a-array */ 4841 k = 0; 4842 for (i=0; i<nsends; i++){ 4843 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4844 bufA = bufa+sstartsj[i]; 4845 for (j=0; j<nrows; j++) { 4846 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4847 for (ll=0; ll<sbs; ll++){ 4848 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4849 for (l=0; l<ncols; l++){ 4850 *bufA++ = vals[l]; 4851 } 4852 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4853 } 4854 } 4855 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4856 } 4857 /* recvs and sends of a-array are completed */ 4858 i = nrecvs; 4859 while (i--) { 4860 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4861 } 4862 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4863 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4864 4865 if (scall == MAT_INITIAL_MATRIX){ 4866 /* put together the new matrix */ 4867 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4868 4869 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4870 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4871 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4872 b_oth->free_a = PETSC_TRUE; 4873 b_oth->free_ij = PETSC_TRUE; 4874 b_oth->nonew = 0; 4875 4876 ierr = PetscFree(bufj);CHKERRQ(ierr); 4877 if (!startsj || !bufa_ptr){ 4878 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4879 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4880 } else { 4881 *startsj = sstartsj; 4882 *startsj_r = rstartsj; 4883 *bufa_ptr = bufa; 4884 } 4885 } 4886 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4887 PetscFunctionReturn(0); 4888 } 4889 4890 #undef __FUNCT__ 4891 #define __FUNCT__ "MatGetCommunicationStructs" 4892 /*@C 4893 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4894 4895 Not Collective 4896 4897 Input Parameters: 4898 . A - The matrix in mpiaij format 4899 4900 Output Parameter: 4901 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4902 . colmap - A map from global column index to local index into lvec 4903 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4904 4905 Level: developer 4906 4907 @*/ 4908 #if defined (PETSC_USE_CTABLE) 4909 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4910 #else 4911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4912 #endif 4913 { 4914 Mat_MPIAIJ *a; 4915 4916 PetscFunctionBegin; 4917 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4918 PetscValidPointer(lvec, 2) 4919 PetscValidPointer(colmap, 3) 4920 PetscValidPointer(multScatter, 4) 4921 a = (Mat_MPIAIJ *) A->data; 4922 if (lvec) *lvec = a->lvec; 4923 if (colmap) *colmap = a->colmap; 4924 if (multScatter) *multScatter = a->Mvctx; 4925 PetscFunctionReturn(0); 4926 } 4927 4928 EXTERN_C_BEGIN 4929 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4930 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4931 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4932 EXTERN_C_END 4933 4934 #undef __FUNCT__ 4935 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4936 /* 4937 Computes (B'*A')' since computing B*A directly is untenable 4938 4939 n p p 4940 ( ) ( ) ( ) 4941 m ( A ) * n ( B ) = m ( C ) 4942 ( ) ( ) ( ) 4943 4944 */ 4945 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4946 { 4947 PetscErrorCode ierr; 4948 Mat At,Bt,Ct; 4949 4950 PetscFunctionBegin; 4951 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4952 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4953 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4954 ierr = MatDestroy(At);CHKERRQ(ierr); 4955 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4956 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4957 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4958 PetscFunctionReturn(0); 4959 } 4960 4961 #undef __FUNCT__ 4962 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4963 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4964 { 4965 PetscErrorCode ierr; 4966 PetscInt m=A->rmap->n,n=B->cmap->n; 4967 Mat Cmat; 4968 4969 PetscFunctionBegin; 4970 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4971 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4972 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4973 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4974 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4975 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4976 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4977 *C = Cmat; 4978 PetscFunctionReturn(0); 4979 } 4980 4981 /* ----------------------------------------------------------------*/ 4982 #undef __FUNCT__ 4983 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4984 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4985 { 4986 PetscErrorCode ierr; 4987 4988 PetscFunctionBegin; 4989 if (scall == MAT_INITIAL_MATRIX){ 4990 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4991 } 4992 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4993 PetscFunctionReturn(0); 4994 } 4995 4996 EXTERN_C_BEGIN 4997 #if defined(PETSC_HAVE_MUMPS) 4998 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4999 #endif 5000 #if defined(PETSC_HAVE_PASTIX) 5001 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5002 #endif 5003 #if defined(PETSC_HAVE_SUPERLU_DIST) 5004 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5005 #endif 5006 #if defined(PETSC_HAVE_SPOOLES) 5007 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5008 #endif 5009 EXTERN_C_END 5010 5011 /*MC 5012 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5013 5014 Options Database Keys: 5015 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5016 5017 Level: beginner 5018 5019 .seealso: MatCreateMPIAIJ() 5020 M*/ 5021 5022 EXTERN_C_BEGIN 5023 #undef __FUNCT__ 5024 #define __FUNCT__ "MatCreate_MPIAIJ" 5025 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5026 { 5027 Mat_MPIAIJ *b; 5028 PetscErrorCode ierr; 5029 PetscMPIInt size; 5030 5031 PetscFunctionBegin; 5032 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5033 5034 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5035 B->data = (void*)b; 5036 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5037 B->rmap->bs = 1; 5038 B->assembled = PETSC_FALSE; 5039 B->mapping = 0; 5040 5041 B->insertmode = NOT_SET_VALUES; 5042 b->size = size; 5043 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5044 5045 /* build cache for off array entries formed */ 5046 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5047 b->donotstash = PETSC_FALSE; 5048 b->colmap = 0; 5049 b->garray = 0; 5050 b->roworiented = PETSC_TRUE; 5051 5052 /* stuff used for matrix vector multiply */ 5053 b->lvec = PETSC_NULL; 5054 b->Mvctx = PETSC_NULL; 5055 5056 /* stuff for MatGetRow() */ 5057 b->rowindices = 0; 5058 b->rowvalues = 0; 5059 b->getrowactive = PETSC_FALSE; 5060 5061 #if defined(PETSC_HAVE_SPOOLES) 5062 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5063 "MatGetFactor_mpiaij_spooles", 5064 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5065 #endif 5066 #if defined(PETSC_HAVE_MUMPS) 5067 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5068 "MatGetFactor_mpiaij_mumps", 5069 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5070 #endif 5071 #if defined(PETSC_HAVE_PASTIX) 5072 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5073 "MatGetFactor_mpiaij_pastix", 5074 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5075 #endif 5076 #if defined(PETSC_HAVE_SUPERLU_DIST) 5077 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5078 "MatGetFactor_mpiaij_superlu_dist", 5079 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5080 #endif 5081 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5082 "MatStoreValues_MPIAIJ", 5083 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5084 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5085 "MatRetrieveValues_MPIAIJ", 5086 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5087 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5088 "MatGetDiagonalBlock_MPIAIJ", 5089 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5090 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5091 "MatIsTranspose_MPIAIJ", 5092 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5094 "MatMPIAIJSetPreallocation_MPIAIJ", 5095 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5096 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5097 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5098 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5100 "MatDiagonalScaleLocal_MPIAIJ", 5101 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5102 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5103 "MatConvert_MPIAIJ_MPICSRPERM", 5104 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5106 "MatConvert_MPIAIJ_MPICRL", 5107 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5108 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5109 "MatConvert_MPIAIJ_MPISBAIJ", 5110 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5112 "MatMatMult_MPIDense_MPIAIJ", 5113 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5114 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5115 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5116 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5117 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5118 "MatMatMultNumeric_MPIDense_MPIAIJ", 5119 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5120 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5121 PetscFunctionReturn(0); 5122 } 5123 EXTERN_C_END 5124 5125 #undef __FUNCT__ 5126 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5127 /*@ 5128 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5129 and "off-diagonal" part of the matrix in CSR format. 5130 5131 Collective on MPI_Comm 5132 5133 Input Parameters: 5134 + comm - MPI communicator 5135 . m - number of local rows (Cannot be PETSC_DECIDE) 5136 . n - This value should be the same as the local size used in creating the 5137 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5138 calculated if N is given) For square matrices n is almost always m. 5139 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5140 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5141 . i - row indices for "diagonal" portion of matrix 5142 . j - column indices 5143 . a - matrix values 5144 . oi - row indices for "off-diagonal" portion of matrix 5145 . oj - column indices 5146 - oa - matrix values 5147 5148 Output Parameter: 5149 . mat - the matrix 5150 5151 Level: advanced 5152 5153 Notes: 5154 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5155 5156 The i and j indices are 0 based 5157 5158 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5159 5160 This sets local rows and cannot be used to set off-processor values. 5161 5162 You cannot later use MatSetValues() to change values in this matrix. 5163 5164 .keywords: matrix, aij, compressed row, sparse, parallel 5165 5166 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5167 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5168 @*/ 5169 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5170 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5171 { 5172 PetscErrorCode ierr; 5173 Mat_MPIAIJ *maij; 5174 5175 PetscFunctionBegin; 5176 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5177 if (i[0]) { 5178 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5179 } 5180 if (oi[0]) { 5181 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5182 } 5183 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5184 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5185 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5186 maij = (Mat_MPIAIJ*) (*mat)->data; 5187 maij->donotstash = PETSC_TRUE; 5188 (*mat)->preallocated = PETSC_TRUE; 5189 5190 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5191 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5192 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5193 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5194 5195 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5196 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5197 5198 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5199 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5200 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5201 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5202 5203 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5204 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5205 PetscFunctionReturn(0); 5206 } 5207 5208 /* 5209 Special version for direct calls from Fortran 5210 */ 5211 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5212 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5213 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5214 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5215 #endif 5216 5217 /* Change these macros so can be used in void function */ 5218 #undef CHKERRQ 5219 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5220 #undef SETERRQ2 5221 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5222 #undef SETERRQ 5223 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5224 5225 EXTERN_C_BEGIN 5226 #undef __FUNCT__ 5227 #define __FUNCT__ "matsetvaluesmpiaij_" 5228 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5229 { 5230 Mat mat = *mmat; 5231 PetscInt m = *mm, n = *mn; 5232 InsertMode addv = *maddv; 5233 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5234 PetscScalar value; 5235 PetscErrorCode ierr; 5236 5237 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5238 if (mat->insertmode == NOT_SET_VALUES) { 5239 mat->insertmode = addv; 5240 } 5241 #if defined(PETSC_USE_DEBUG) 5242 else if (mat->insertmode != addv) { 5243 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5244 } 5245 #endif 5246 { 5247 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5248 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5249 PetscTruth roworiented = aij->roworiented; 5250 5251 /* Some Variables required in the macro */ 5252 Mat A = aij->A; 5253 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5254 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5255 MatScalar *aa = a->a; 5256 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5257 Mat B = aij->B; 5258 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5259 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5260 MatScalar *ba = b->a; 5261 5262 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5263 PetscInt nonew = a->nonew; 5264 MatScalar *ap1,*ap2; 5265 5266 PetscFunctionBegin; 5267 for (i=0; i<m; i++) { 5268 if (im[i] < 0) continue; 5269 #if defined(PETSC_USE_DEBUG) 5270 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5271 #endif 5272 if (im[i] >= rstart && im[i] < rend) { 5273 row = im[i] - rstart; 5274 lastcol1 = -1; 5275 rp1 = aj + ai[row]; 5276 ap1 = aa + ai[row]; 5277 rmax1 = aimax[row]; 5278 nrow1 = ailen[row]; 5279 low1 = 0; 5280 high1 = nrow1; 5281 lastcol2 = -1; 5282 rp2 = bj + bi[row]; 5283 ap2 = ba + bi[row]; 5284 rmax2 = bimax[row]; 5285 nrow2 = bilen[row]; 5286 low2 = 0; 5287 high2 = nrow2; 5288 5289 for (j=0; j<n; j++) { 5290 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5291 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5292 if (in[j] >= cstart && in[j] < cend){ 5293 col = in[j] - cstart; 5294 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5295 } else if (in[j] < 0) continue; 5296 #if defined(PETSC_USE_DEBUG) 5297 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5298 #endif 5299 else { 5300 if (mat->was_assembled) { 5301 if (!aij->colmap) { 5302 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5303 } 5304 #if defined (PETSC_USE_CTABLE) 5305 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5306 col--; 5307 #else 5308 col = aij->colmap[in[j]] - 1; 5309 #endif 5310 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5311 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5312 col = in[j]; 5313 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5314 B = aij->B; 5315 b = (Mat_SeqAIJ*)B->data; 5316 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5317 rp2 = bj + bi[row]; 5318 ap2 = ba + bi[row]; 5319 rmax2 = bimax[row]; 5320 nrow2 = bilen[row]; 5321 low2 = 0; 5322 high2 = nrow2; 5323 bm = aij->B->rmap->n; 5324 ba = b->a; 5325 } 5326 } else col = in[j]; 5327 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5328 } 5329 } 5330 } else { 5331 if (!aij->donotstash) { 5332 if (roworiented) { 5333 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5334 } else { 5335 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5336 } 5337 } 5338 } 5339 }} 5340 PetscFunctionReturnVoid(); 5341 } 5342 EXTERN_C_END 5343 5344