1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatDistribute_MPIAIJ" 7 /* 8 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 9 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 10 11 Only for square matrices 12 */ 13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 14 { 15 PetscMPIInt rank,size; 16 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 17 PetscErrorCode ierr; 18 Mat mat; 19 Mat_SeqAIJ *gmata; 20 PetscMPIInt tag; 21 MPI_Status status; 22 PetscTruth aij; 23 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 24 25 PetscFunctionBegin; 26 CHKMEMQ; 27 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 28 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 29 if (!rank) { 30 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 31 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 32 } 33 if (reuse == MAT_INITIAL_MATRIX) { 34 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 35 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 36 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 37 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 38 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 39 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 40 rowners[0] = 0; 41 for (i=2; i<=size; i++) { 42 rowners[i] += rowners[i-1]; 43 } 44 rstart = rowners[rank]; 45 rend = rowners[rank+1]; 46 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 47 if (!rank) { 48 gmata = (Mat_SeqAIJ*) gmat->data; 49 /* send row lengths to all processors */ 50 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 51 for (i=1; i<size; i++) { 52 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 53 } 54 /* determine number diagonal and off-diagonal counts */ 55 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 56 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 57 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 58 jj = 0; 59 for (i=0; i<m; i++) { 60 for (j=0; j<dlens[i]; j++) { 61 if (gmata->j[jj] < rstart) ld[i]++; 62 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 63 jj++; 64 } 65 } 66 /* send column indices to other processes */ 67 for (i=1; i<size; i++) { 68 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 69 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 70 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 } 72 73 /* send numerical values to other processes */ 74 for (i=1; i<size; i++) { 75 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 76 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 77 } 78 gmataa = gmata->a; 79 gmataj = gmata->j; 80 81 } else { 82 /* receive row lengths */ 83 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 84 /* receive column indices */ 85 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 86 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 87 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 88 /* determine number diagonal and off-diagonal counts */ 89 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 90 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 91 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 92 jj = 0; 93 for (i=0; i<m; i++) { 94 for (j=0; j<dlens[i]; j++) { 95 if (gmataj[jj] < rstart) ld[i]++; 96 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 97 jj++; 98 } 99 } 100 /* receive numerical values */ 101 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 103 } 104 /* set preallocation */ 105 for (i=0; i<m; i++) { 106 dlens[i] -= olens[i]; 107 } 108 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 109 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 110 111 for (i=0; i<m; i++) { 112 dlens[i] += olens[i]; 113 } 114 cnt = 0; 115 for (i=0; i<m; i++) { 116 row = rstart + i; 117 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 118 cnt += dlens[i]; 119 } 120 if (rank) { 121 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 122 } 123 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 124 ierr = PetscFree(rowners);CHKERRQ(ierr); 125 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 126 *inmat = mat; 127 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 128 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 129 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 130 mat = *inmat; 131 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 132 if (!rank) { 133 /* send numerical values to other processes */ 134 gmata = (Mat_SeqAIJ*) gmat->data; 135 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 136 gmataa = gmata->a; 137 for (i=1; i<size; i++) { 138 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 139 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 140 } 141 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 142 } else { 143 /* receive numerical values from process 0*/ 144 nz = Ad->nz + Ao->nz; 145 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 146 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 147 } 148 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 149 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 150 ad = Ad->a; 151 ao = Ao->a; 152 if (mat->rmap->n) { 153 i = 0; 154 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 155 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 156 } 157 for (i=1; i<mat->rmap->n; i++) { 158 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 159 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 160 } 161 i--; 162 if (mat->rmap->n) { 163 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 164 } 165 if (rank) { 166 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 167 } 168 } 169 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 CHKMEMQ; 172 PetscFunctionReturn(0); 173 } 174 175 /* 176 Local utility routine that creates a mapping from the global column 177 number to the local number in the off-diagonal part of the local 178 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 179 a slightly higher hash table cost; without it it is not scalable (each processor 180 has an order N integer array but is fast to acess. 181 */ 182 #undef __FUNCT__ 183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 187 PetscErrorCode ierr; 188 PetscInt n = aij->B->cmap->n,i; 189 190 PetscFunctionBegin; 191 #if defined (PETSC_USE_CTABLE) 192 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 193 for (i=0; i<n; i++){ 194 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 195 } 196 #else 197 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 199 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 201 #endif 202 PetscFunctionReturn(0); 203 } 204 205 206 #define CHUNKSIZE 15 207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 208 { \ 209 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 210 lastcol1 = col;\ 211 while (high1-low1 > 5) { \ 212 t = (low1+high1)/2; \ 213 if (rp1[t] > col) high1 = t; \ 214 else low1 = t; \ 215 } \ 216 for (_i=low1; _i<high1; _i++) { \ 217 if (rp1[_i] > col) break; \ 218 if (rp1[_i] == col) { \ 219 if (addv == ADD_VALUES) ap1[_i] += value; \ 220 else ap1[_i] = value; \ 221 goto a_noinsert; \ 222 } \ 223 } \ 224 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 225 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 226 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 227 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 228 N = nrow1++ - 1; a->nz++; high1++; \ 229 /* shift up all the later entries in this row */ \ 230 for (ii=N; ii>=_i; ii--) { \ 231 rp1[ii+1] = rp1[ii]; \ 232 ap1[ii+1] = ap1[ii]; \ 233 } \ 234 rp1[_i] = col; \ 235 ap1[_i] = value; \ 236 a_noinsert: ; \ 237 ailen[row] = nrow1; \ 238 } 239 240 241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 242 { \ 243 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 244 lastcol2 = col;\ 245 while (high2-low2 > 5) { \ 246 t = (low2+high2)/2; \ 247 if (rp2[t] > col) high2 = t; \ 248 else low2 = t; \ 249 } \ 250 for (_i=low2; _i<high2; _i++) { \ 251 if (rp2[_i] > col) break; \ 252 if (rp2[_i] == col) { \ 253 if (addv == ADD_VALUES) ap2[_i] += value; \ 254 else ap2[_i] = value; \ 255 goto b_noinsert; \ 256 } \ 257 } \ 258 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 261 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 262 N = nrow2++ - 1; b->nz++; high2++; \ 263 /* shift up all the later entries in this row */ \ 264 for (ii=N; ii>=_i; ii--) { \ 265 rp2[ii+1] = rp2[ii]; \ 266 ap2[ii+1] = ap2[ii]; \ 267 } \ 268 rp2[_i] = col; \ 269 ap2[_i] = value; \ 270 b_noinsert: ; \ 271 bilen[row] = nrow2; \ 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 277 { 278 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 280 PetscErrorCode ierr; 281 PetscInt l,*garray = mat->garray,diag; 282 283 PetscFunctionBegin; 284 /* code only works for square matrices A */ 285 286 /* find size of row to the left of the diagonal part */ 287 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 288 row = row - diag; 289 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 290 if (garray[b->j[b->i[row]+l]] > diag) break; 291 } 292 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 293 294 /* diagonal part */ 295 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 296 297 /* right of diagonal part */ 298 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIAIJ" 304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscScalar value; 308 PetscErrorCode ierr; 309 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 310 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 311 PetscTruth roworiented = aij->roworiented; 312 313 /* Some Variables required in the macro */ 314 Mat A = aij->A; 315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 316 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 317 MatScalar *aa = a->a; 318 PetscTruth ignorezeroentries = a->ignorezeroentries; 319 Mat B = aij->B; 320 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 321 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 322 MatScalar *ba = b->a; 323 324 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 325 PetscInt nonew = a->nonew; 326 MatScalar *ap1,*ap2; 327 328 PetscFunctionBegin; 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 396 } else { 397 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatGetValues_MPIAIJ" 407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413 414 PetscFunctionBegin; 415 for (i=0; i<m; i++) { 416 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418 if (idxm[i] >= rstart && idxm[i] < rend) { 419 row = idxm[i] - rstart; 420 for (j=0; j<n; j++) { 421 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423 if (idxn[j] >= cstart && idxn[j] < cend){ 424 col = idxn[j] - cstart; 425 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426 } else { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 #if defined (PETSC_USE_CTABLE) 431 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432 col --; 433 #else 434 col = aij->colmap[idxn[j]] - 1; 435 #endif 436 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437 else { 438 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439 } 440 } 441 } 442 } else { 443 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscErrorCode ierr; 457 PetscInt nstash,reallocs; 458 InsertMode addv; 459 460 PetscFunctionBegin; 461 if (aij->donotstash) { 462 PetscFunctionReturn(0); 463 } 464 465 /* make sure all processors are either in INSERTMODE or ADDMODE */ 466 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 if (addv == (ADD_VALUES|INSERT_VALUES)) { 468 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 } 470 mat->insertmode = addv; /* in case this processor had no cache */ 471 472 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 473 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 474 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 481 { 482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 483 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 484 PetscErrorCode ierr; 485 PetscMPIInt n; 486 PetscInt i,j,rstart,ncols,flg; 487 PetscInt *row,*col; 488 PetscTruth other_disassembled; 489 PetscScalar *val; 490 InsertMode addv = mat->insertmode; 491 492 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 493 PetscFunctionBegin; 494 if (!aij->donotstash) { 495 while (1) { 496 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 497 if (!flg) break; 498 499 for (i=0; i<n;) { 500 /* Now identify the consecutive vals belonging to the same row */ 501 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 502 if (j < n) ncols = j-i; 503 else ncols = n-i; 504 /* Now assemble all these values with a single function call */ 505 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 506 i = j; 507 } 508 } 509 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 510 } 511 a->compressedrow.use = PETSC_FALSE; 512 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 513 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 514 515 /* determine if any processor has disassembled, if so we must 516 also disassemble ourselfs, in order that we may reassemble. */ 517 /* 518 if nonzero structure of submatrix B cannot change then we know that 519 no processor disassembled thus we can skip this stuff 520 */ 521 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 522 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 523 if (mat->was_assembled && !other_disassembled) { 524 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 525 } 526 } 527 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 528 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 529 } 530 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 531 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 532 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 533 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 534 535 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 536 aij->rowvalues = 0; 537 538 /* used by MatAXPY() */ 539 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 540 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 541 542 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 543 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 550 { 551 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 556 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatZeroRows_MPIAIJ" 562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 563 { 564 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 567 PetscInt i,*owners = A->rmap->range; 568 PetscInt *nprocs,j,idx,nsends,row; 569 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 570 PetscInt *rvalues,count,base,slen,*source; 571 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 572 MPI_Comm comm = ((PetscObject)A)->comm; 573 MPI_Request *send_waits,*recv_waits; 574 MPI_Status recv_status,*send_status; 575 #if defined(PETSC_DEBUG) 576 PetscTruth found = PETSC_FALSE; 577 #endif 578 579 PetscFunctionBegin; 580 /* first count number of contributors to each processor */ 581 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 582 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 583 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 584 j = 0; 585 for (i=0; i<N; i++) { 586 if (lastidx > (idx = rows[i])) j = 0; 587 lastidx = idx; 588 for (; j<size; j++) { 589 if (idx >= owners[j] && idx < owners[j+1]) { 590 nprocs[2*j]++; 591 nprocs[2*j+1] = 1; 592 owner[i] = j; 593 #if defined(PETSC_DEBUG) 594 found = PETSC_TRUE; 595 #endif 596 break; 597 } 598 } 599 #if defined(PETSC_DEBUG) 600 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 601 found = PETSC_FALSE; 602 #endif 603 } 604 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 605 606 /* inform other processors of number of messages and max length*/ 607 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 608 609 /* post receives: */ 610 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 611 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 612 for (i=0; i<nrecvs; i++) { 613 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 614 } 615 616 /* do sends: 617 1) starts[i] gives the starting index in svalues for stuff going to 618 the ith processor 619 */ 620 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 621 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 622 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 623 starts[0] = 0; 624 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 625 for (i=0; i<N; i++) { 626 svalues[starts[owner[i]]++] = rows[i]; 627 } 628 629 starts[0] = 0; 630 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 631 count = 0; 632 for (i=0; i<size; i++) { 633 if (nprocs[2*i+1]) { 634 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 635 } 636 } 637 ierr = PetscFree(starts);CHKERRQ(ierr); 638 639 base = owners[rank]; 640 641 /* wait on receives */ 642 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 643 source = lens + nrecvs; 644 count = nrecvs; slen = 0; 645 while (count) { 646 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 647 /* unpack receives into our local space */ 648 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 649 source[imdex] = recv_status.MPI_SOURCE; 650 lens[imdex] = n; 651 slen += n; 652 count--; 653 } 654 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 655 656 /* move the data into the send scatter */ 657 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 658 count = 0; 659 for (i=0; i<nrecvs; i++) { 660 values = rvalues + i*nmax; 661 for (j=0; j<lens[i]; j++) { 662 lrows[count++] = values[j] - base; 663 } 664 } 665 ierr = PetscFree(rvalues);CHKERRQ(ierr); 666 ierr = PetscFree(lens);CHKERRQ(ierr); 667 ierr = PetscFree(owner);CHKERRQ(ierr); 668 ierr = PetscFree(nprocs);CHKERRQ(ierr); 669 670 /* actually zap the local rows */ 671 /* 672 Zero the required rows. If the "diagonal block" of the matrix 673 is square and the user wishes to set the diagonal we use separate 674 code so that MatSetValues() is not called for each diagonal allocating 675 new memory, thus calling lots of mallocs and slowing things down. 676 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatMultAdd_MPIAIJ" 745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 746 { 747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 752 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 753 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 761 { 762 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 763 PetscErrorCode ierr; 764 PetscTruth merged; 765 766 PetscFunctionBegin; 767 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 768 /* do nondiagonal part */ 769 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 770 if (!merged) { 771 /* send it on its way */ 772 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 /* do local part */ 774 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 775 /* receive remote parts: note this assumes the values are not actually */ 776 /* added in yy until the next line, */ 777 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 } else { 779 /* do local part */ 780 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 781 /* send it on its way */ 782 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 783 /* values actually were received in the Begin() but we need to call this nop */ 784 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 } 786 PetscFunctionReturn(0); 787 } 788 789 EXTERN_C_BEGIN 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 793 { 794 MPI_Comm comm; 795 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 796 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 797 IS Me,Notme; 798 PetscErrorCode ierr; 799 PetscInt M,N,first,last,*notme,i; 800 PetscMPIInt size; 801 802 PetscFunctionBegin; 803 804 /* Easy test: symmetric diagonal block */ 805 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 806 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 807 if (!*f) PetscFunctionReturn(0); 808 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 809 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 810 if (size == 1) PetscFunctionReturn(0); 811 812 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 813 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 814 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 815 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 816 for (i=0; i<first; i++) notme[i] = i; 817 for (i=last; i<M; i++) notme[i-last+first] = i; 818 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 819 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 820 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 821 Aoff = Aoffs[0]; 822 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 823 Boff = Boffs[0]; 824 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 825 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 826 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 827 ierr = ISDestroy(Me);CHKERRQ(ierr); 828 ierr = ISDestroy(Notme);CHKERRQ(ierr); 829 830 PetscFunctionReturn(0); 831 } 832 EXTERN_C_END 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 837 { 838 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 839 PetscErrorCode ierr; 840 841 PetscFunctionBegin; 842 /* do nondiagonal part */ 843 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 844 /* send it on its way */ 845 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 846 /* do local part */ 847 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 848 /* receive remote parts */ 849 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 /* 854 This only works correctly for square matrices where the subblock A->A is the 855 diagonal block 856 */ 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 860 { 861 PetscErrorCode ierr; 862 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 863 864 PetscFunctionBegin; 865 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 866 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 868 } 869 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatScale_MPIAIJ" 875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876 { 877 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_MPIAIJ" 888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_LOG) 895 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896 #endif 897 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 899 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 900 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 901 #if defined (PETSC_USE_CTABLE) 902 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903 #else 904 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905 #endif 906 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 907 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 908 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 909 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 910 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911 ierr = PetscFree(aij);CHKERRQ(ierr); 912 913 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 921 PetscFunctionReturn(0); 922 } 923 924 #undef __FUNCT__ 925 #define __FUNCT__ "MatView_MPIAIJ_Binary" 926 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 927 { 928 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 929 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 930 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 931 PetscErrorCode ierr; 932 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 933 int fd; 934 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 935 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 936 PetscScalar *column_values; 937 938 PetscFunctionBegin; 939 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 940 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 941 nz = A->nz + B->nz; 942 if (!rank) { 943 header[0] = MAT_FILE_COOKIE; 944 header[1] = mat->rmap->N; 945 header[2] = mat->cmap->N; 946 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 947 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 948 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 949 /* get largest number of rows any processor has */ 950 rlen = mat->rmap->n; 951 range = mat->rmap->range; 952 for (i=1; i<size; i++) { 953 rlen = PetscMax(rlen,range[i+1] - range[i]); 954 } 955 } else { 956 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 957 rlen = mat->rmap->n; 958 } 959 960 /* load up the local row counts */ 961 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 962 for (i=0; i<mat->rmap->n; i++) { 963 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 964 } 965 966 /* store the row lengths to the file */ 967 if (!rank) { 968 MPI_Status status; 969 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 970 for (i=1; i<size; i++) { 971 rlen = range[i+1] - range[i]; 972 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 973 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 974 } 975 } else { 976 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 977 } 978 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 979 980 /* load up the local column indices */ 981 nzmax = nz; /* )th processor needs space a largest processor needs */ 982 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 983 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 984 cnt = 0; 985 for (i=0; i<mat->rmap->n; i++) { 986 for (j=B->i[i]; j<B->i[i+1]; j++) { 987 if ( (col = garray[B->j[j]]) > cstart) break; 988 column_indices[cnt++] = col; 989 } 990 for (k=A->i[i]; k<A->i[i+1]; k++) { 991 column_indices[cnt++] = A->j[k] + cstart; 992 } 993 for (; j<B->i[i+1]; j++) { 994 column_indices[cnt++] = garray[B->j[j]]; 995 } 996 } 997 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 998 999 /* store the column indices to the file */ 1000 if (!rank) { 1001 MPI_Status status; 1002 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1003 for (i=1; i<size; i++) { 1004 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1005 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1006 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1007 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1008 } 1009 } else { 1010 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1011 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1012 } 1013 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1014 1015 /* load up the local column values */ 1016 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1017 cnt = 0; 1018 for (i=0; i<mat->rmap->n; i++) { 1019 for (j=B->i[i]; j<B->i[i+1]; j++) { 1020 if ( garray[B->j[j]] > cstart) break; 1021 column_values[cnt++] = B->a[j]; 1022 } 1023 for (k=A->i[i]; k<A->i[i+1]; k++) { 1024 column_values[cnt++] = A->a[k]; 1025 } 1026 for (; j<B->i[i+1]; j++) { 1027 column_values[cnt++] = B->a[j]; 1028 } 1029 } 1030 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1031 1032 /* store the column values to the file */ 1033 if (!rank) { 1034 MPI_Status status; 1035 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1036 for (i=1; i<size; i++) { 1037 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1038 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1039 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1040 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1041 } 1042 } else { 1043 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1044 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1045 } 1046 ierr = PetscFree(column_values);CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1052 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1053 { 1054 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1055 PetscErrorCode ierr; 1056 PetscMPIInt rank = aij->rank,size = aij->size; 1057 PetscTruth isdraw,iascii,isbinary; 1058 PetscViewer sviewer; 1059 PetscViewerFormat format; 1060 1061 PetscFunctionBegin; 1062 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1063 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1065 if (iascii) { 1066 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1067 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1068 MatInfo info; 1069 PetscTruth inodes; 1070 1071 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1072 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1073 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1074 if (!inodes) { 1075 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1076 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1077 } else { 1078 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1079 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1080 } 1081 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1082 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1083 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1084 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1085 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1086 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1087 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1090 PetscInt inodecount,inodelimit,*inodes; 1091 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1092 if (inodes) { 1093 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1094 } else { 1095 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1096 } 1097 PetscFunctionReturn(0); 1098 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1099 PetscFunctionReturn(0); 1100 } 1101 } else if (isbinary) { 1102 if (size == 1) { 1103 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1104 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1105 } else { 1106 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1107 } 1108 PetscFunctionReturn(0); 1109 } else if (isdraw) { 1110 PetscDraw draw; 1111 PetscTruth isnull; 1112 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1113 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1114 } 1115 1116 if (size == 1) { 1117 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1118 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1119 } else { 1120 /* assemble the entire matrix onto first processor. */ 1121 Mat A; 1122 Mat_SeqAIJ *Aloc; 1123 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1124 MatScalar *a; 1125 1126 if (mat->rmap->N > 1024) { 1127 PetscTruth flg = PETSC_FALSE; 1128 1129 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1130 if (!flg) { 1131 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1132 } 1133 } 1134 1135 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1136 if (!rank) { 1137 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1138 } else { 1139 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1140 } 1141 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1142 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1143 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1144 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1145 1146 /* copy over the A part */ 1147 Aloc = (Mat_SeqAIJ*)aij->A->data; 1148 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1149 row = mat->rmap->rstart; 1150 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1151 for (i=0; i<m; i++) { 1152 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1153 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1154 } 1155 aj = Aloc->j; 1156 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1157 1158 /* copy over the B part */ 1159 Aloc = (Mat_SeqAIJ*)aij->B->data; 1160 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1161 row = mat->rmap->rstart; 1162 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1163 ct = cols; 1164 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1165 for (i=0; i<m; i++) { 1166 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1167 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1168 } 1169 ierr = PetscFree(ct);CHKERRQ(ierr); 1170 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1171 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 /* 1173 Everyone has to call to draw the matrix since the graphics waits are 1174 synchronized across all processors that share the PetscDraw object 1175 */ 1176 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1177 if (!rank) { 1178 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1179 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1180 } 1181 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1182 ierr = MatDestroy(A);CHKERRQ(ierr); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatView_MPIAIJ" 1189 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1190 { 1191 PetscErrorCode ierr; 1192 PetscTruth iascii,isdraw,issocket,isbinary; 1193 1194 PetscFunctionBegin; 1195 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1199 if (iascii || isdraw || isbinary || issocket) { 1200 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1201 } else { 1202 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatRelax_MPIAIJ" 1209 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1210 { 1211 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1212 PetscErrorCode ierr; 1213 Vec bb1 = 0; 1214 PetscTruth hasop; 1215 1216 PetscFunctionBegin; 1217 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1218 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1219 } 1220 1221 if (flag == SOR_APPLY_UPPER) { 1222 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1227 if (flag & SOR_ZERO_INITIAL_GUESS) { 1228 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1229 its--; 1230 } 1231 1232 while (its--) { 1233 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1234 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 1236 /* update rhs: bb1 = bb - B*x */ 1237 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1238 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1239 1240 /* local sweep */ 1241 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1242 } 1243 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1244 if (flag & SOR_ZERO_INITIAL_GUESS) { 1245 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1246 its--; 1247 } 1248 while (its--) { 1249 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1250 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1251 1252 /* update rhs: bb1 = bb - B*x */ 1253 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1254 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1255 1256 /* local sweep */ 1257 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1258 } 1259 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1260 if (flag & SOR_ZERO_INITIAL_GUESS) { 1261 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1262 its--; 1263 } 1264 while (its--) { 1265 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 1268 /* update rhs: bb1 = bb - B*x */ 1269 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1270 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1271 1272 /* local sweep */ 1273 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1274 } 1275 } else if (flag & SOR_EISENSTAT) { 1276 Vec xx1; 1277 1278 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1279 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1280 1281 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1282 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283 if (!mat->diag) { 1284 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1285 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1286 } 1287 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1288 if (hasop) { 1289 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1290 } else { 1291 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1292 } 1293 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1294 1295 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1296 1297 /* local sweep */ 1298 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1299 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1300 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1301 } else { 1302 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1303 } 1304 1305 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatPermute_MPIAIJ" 1311 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1312 { 1313 MPI_Comm comm,pcomm; 1314 PetscInt first,local_size,nrows; 1315 const PetscInt *rows; 1316 PetscMPIInt size; 1317 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1318 PetscErrorCode ierr; 1319 1320 PetscFunctionBegin; 1321 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1322 /* make a collective version of 'rowp' */ 1323 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1324 if (pcomm==comm) { 1325 crowp = rowp; 1326 } else { 1327 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1328 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1329 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1330 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1331 } 1332 /* collect the global row permutation and invert it */ 1333 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1334 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1335 if (pcomm!=comm) { 1336 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1337 } 1338 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1339 /* get the local target indices */ 1340 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1341 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1342 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1343 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1344 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1345 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1346 /* the column permutation is so much easier; 1347 make a local version of 'colp' and invert it */ 1348 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1349 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1350 if (size==1) { 1351 lcolp = colp; 1352 } else { 1353 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1354 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1355 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1356 } 1357 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1358 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1359 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1360 if (size>1) { 1361 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1362 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1363 } 1364 /* now we just get the submatrix */ 1365 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1366 /* clean up */ 1367 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1368 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1374 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1375 { 1376 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1377 Mat A = mat->A,B = mat->B; 1378 PetscErrorCode ierr; 1379 PetscReal isend[5],irecv[5]; 1380 1381 PetscFunctionBegin; 1382 info->block_size = 1.0; 1383 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1384 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1385 isend[3] = info->memory; isend[4] = info->mallocs; 1386 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1387 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1388 isend[3] += info->memory; isend[4] += info->mallocs; 1389 if (flag == MAT_LOCAL) { 1390 info->nz_used = isend[0]; 1391 info->nz_allocated = isend[1]; 1392 info->nz_unneeded = isend[2]; 1393 info->memory = isend[3]; 1394 info->mallocs = isend[4]; 1395 } else if (flag == MAT_GLOBAL_MAX) { 1396 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1397 info->nz_used = irecv[0]; 1398 info->nz_allocated = irecv[1]; 1399 info->nz_unneeded = irecv[2]; 1400 info->memory = irecv[3]; 1401 info->mallocs = irecv[4]; 1402 } else if (flag == MAT_GLOBAL_SUM) { 1403 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1404 info->nz_used = irecv[0]; 1405 info->nz_allocated = irecv[1]; 1406 info->nz_unneeded = irecv[2]; 1407 info->memory = irecv[3]; 1408 info->mallocs = irecv[4]; 1409 } 1410 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1411 info->fill_ratio_needed = 0; 1412 info->factor_mallocs = 0; 1413 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatSetOption_MPIAIJ" 1419 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1420 { 1421 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1422 PetscErrorCode ierr; 1423 1424 PetscFunctionBegin; 1425 switch (op) { 1426 case MAT_NEW_NONZERO_LOCATIONS: 1427 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1428 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1429 case MAT_KEEP_NONZERO_PATTERN: 1430 case MAT_NEW_NONZERO_LOCATION_ERR: 1431 case MAT_USE_INODES: 1432 case MAT_IGNORE_ZERO_ENTRIES: 1433 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1434 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1435 break; 1436 case MAT_ROW_ORIENTED: 1437 a->roworiented = flg; 1438 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1439 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1440 break; 1441 case MAT_NEW_DIAGONALS: 1442 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1443 break; 1444 case MAT_IGNORE_OFF_PROC_ENTRIES: 1445 a->donotstash = PETSC_TRUE; 1446 break; 1447 case MAT_SYMMETRIC: 1448 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1449 break; 1450 case MAT_STRUCTURALLY_SYMMETRIC: 1451 case MAT_HERMITIAN: 1452 case MAT_SYMMETRY_ETERNAL: 1453 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1454 break; 1455 default: 1456 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1457 } 1458 PetscFunctionReturn(0); 1459 } 1460 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "MatGetRow_MPIAIJ" 1463 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1464 { 1465 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1466 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1467 PetscErrorCode ierr; 1468 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1469 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1470 PetscInt *cmap,*idx_p; 1471 1472 PetscFunctionBegin; 1473 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1474 mat->getrowactive = PETSC_TRUE; 1475 1476 if (!mat->rowvalues && (idx || v)) { 1477 /* 1478 allocate enough space to hold information from the longest row. 1479 */ 1480 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1481 PetscInt max = 1,tmp; 1482 for (i=0; i<matin->rmap->n; i++) { 1483 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1484 if (max < tmp) { max = tmp; } 1485 } 1486 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1487 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1488 } 1489 1490 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1491 lrow = row - rstart; 1492 1493 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1494 if (!v) {pvA = 0; pvB = 0;} 1495 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1496 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1497 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1498 nztot = nzA + nzB; 1499 1500 cmap = mat->garray; 1501 if (v || idx) { 1502 if (nztot) { 1503 /* Sort by increasing column numbers, assuming A and B already sorted */ 1504 PetscInt imark = -1; 1505 if (v) { 1506 *v = v_p = mat->rowvalues; 1507 for (i=0; i<nzB; i++) { 1508 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1509 else break; 1510 } 1511 imark = i; 1512 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1513 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1514 } 1515 if (idx) { 1516 *idx = idx_p = mat->rowindices; 1517 if (imark > -1) { 1518 for (i=0; i<imark; i++) { 1519 idx_p[i] = cmap[cworkB[i]]; 1520 } 1521 } else { 1522 for (i=0; i<nzB; i++) { 1523 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1524 else break; 1525 } 1526 imark = i; 1527 } 1528 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1529 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1530 } 1531 } else { 1532 if (idx) *idx = 0; 1533 if (v) *v = 0; 1534 } 1535 } 1536 *nz = nztot; 1537 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1538 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1539 PetscFunctionReturn(0); 1540 } 1541 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1544 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1545 { 1546 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1547 1548 PetscFunctionBegin; 1549 if (!aij->getrowactive) { 1550 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1551 } 1552 aij->getrowactive = PETSC_FALSE; 1553 PetscFunctionReturn(0); 1554 } 1555 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatNorm_MPIAIJ" 1558 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1559 { 1560 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1561 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1562 PetscErrorCode ierr; 1563 PetscInt i,j,cstart = mat->cmap->rstart; 1564 PetscReal sum = 0.0; 1565 MatScalar *v; 1566 1567 PetscFunctionBegin; 1568 if (aij->size == 1) { 1569 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1570 } else { 1571 if (type == NORM_FROBENIUS) { 1572 v = amat->a; 1573 for (i=0; i<amat->nz; i++) { 1574 #if defined(PETSC_USE_COMPLEX) 1575 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1576 #else 1577 sum += (*v)*(*v); v++; 1578 #endif 1579 } 1580 v = bmat->a; 1581 for (i=0; i<bmat->nz; i++) { 1582 #if defined(PETSC_USE_COMPLEX) 1583 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1584 #else 1585 sum += (*v)*(*v); v++; 1586 #endif 1587 } 1588 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1589 *norm = sqrt(*norm); 1590 } else if (type == NORM_1) { /* max column norm */ 1591 PetscReal *tmp,*tmp2; 1592 PetscInt *jj,*garray = aij->garray; 1593 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1594 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1595 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1596 *norm = 0.0; 1597 v = amat->a; jj = amat->j; 1598 for (j=0; j<amat->nz; j++) { 1599 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1600 } 1601 v = bmat->a; jj = bmat->j; 1602 for (j=0; j<bmat->nz; j++) { 1603 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1604 } 1605 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1606 for (j=0; j<mat->cmap->N; j++) { 1607 if (tmp2[j] > *norm) *norm = tmp2[j]; 1608 } 1609 ierr = PetscFree(tmp);CHKERRQ(ierr); 1610 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1611 } else if (type == NORM_INFINITY) { /* max row norm */ 1612 PetscReal ntemp = 0.0; 1613 for (j=0; j<aij->A->rmap->n; j++) { 1614 v = amat->a + amat->i[j]; 1615 sum = 0.0; 1616 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1617 sum += PetscAbsScalar(*v); v++; 1618 } 1619 v = bmat->a + bmat->i[j]; 1620 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1621 sum += PetscAbsScalar(*v); v++; 1622 } 1623 if (sum > ntemp) ntemp = sum; 1624 } 1625 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1626 } else { 1627 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1628 } 1629 } 1630 PetscFunctionReturn(0); 1631 } 1632 1633 #undef __FUNCT__ 1634 #define __FUNCT__ "MatTranspose_MPIAIJ" 1635 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1636 { 1637 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1638 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1639 PetscErrorCode ierr; 1640 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1641 PetscInt cstart=A->cmap->rstart,ncol; 1642 Mat B; 1643 MatScalar *array; 1644 1645 PetscFunctionBegin; 1646 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1647 1648 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1649 ai = Aloc->i; aj = Aloc->j; 1650 bi = Bloc->i; bj = Bloc->j; 1651 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1652 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1653 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1654 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1655 for (i=0; i<ai[ma]; i++){ 1656 d_nnz[aj[i]] ++; 1657 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1658 } 1659 1660 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1661 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1662 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1663 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1664 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1665 } else { 1666 B = *matout; 1667 } 1668 1669 /* copy over the A part */ 1670 array = Aloc->a; 1671 row = A->rmap->rstart; 1672 for (i=0; i<ma; i++) { 1673 ncol = ai[i+1]-ai[i]; 1674 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1675 row++; array += ncol; aj += ncol; 1676 } 1677 aj = Aloc->j; 1678 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1679 1680 /* copy over the B part */ 1681 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1682 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1683 array = Bloc->a; 1684 row = A->rmap->rstart; 1685 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1686 cols_tmp = cols; 1687 for (i=0; i<mb; i++) { 1688 ncol = bi[i+1]-bi[i]; 1689 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1690 row++; array += ncol; cols_tmp += ncol; 1691 } 1692 ierr = PetscFree(cols);CHKERRQ(ierr); 1693 1694 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1695 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1696 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1697 *matout = B; 1698 } else { 1699 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1700 } 1701 PetscFunctionReturn(0); 1702 } 1703 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1706 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1707 { 1708 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1709 Mat a = aij->A,b = aij->B; 1710 PetscErrorCode ierr; 1711 PetscInt s1,s2,s3; 1712 1713 PetscFunctionBegin; 1714 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1715 if (rr) { 1716 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1717 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1718 /* Overlap communication with computation. */ 1719 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1720 } 1721 if (ll) { 1722 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1723 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1724 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1725 } 1726 /* scale the diagonal block */ 1727 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1728 1729 if (rr) { 1730 /* Do a scatter end and then right scale the off-diagonal block */ 1731 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1732 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1733 } 1734 1735 PetscFunctionReturn(0); 1736 } 1737 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1740 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1741 { 1742 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1743 PetscErrorCode ierr; 1744 1745 PetscFunctionBegin; 1746 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1747 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1748 PetscFunctionReturn(0); 1749 } 1750 #undef __FUNCT__ 1751 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1752 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1753 { 1754 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1755 PetscErrorCode ierr; 1756 1757 PetscFunctionBegin; 1758 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1759 PetscFunctionReturn(0); 1760 } 1761 1762 #undef __FUNCT__ 1763 #define __FUNCT__ "MatEqual_MPIAIJ" 1764 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1765 { 1766 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1767 Mat a,b,c,d; 1768 PetscTruth flg; 1769 PetscErrorCode ierr; 1770 1771 PetscFunctionBegin; 1772 a = matA->A; b = matA->B; 1773 c = matB->A; d = matB->B; 1774 1775 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1776 if (flg) { 1777 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1778 } 1779 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "MatCopy_MPIAIJ" 1785 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1786 { 1787 PetscErrorCode ierr; 1788 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1789 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1790 1791 PetscFunctionBegin; 1792 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1793 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1794 /* because of the column compression in the off-processor part of the matrix a->B, 1795 the number of columns in a->B and b->B may be different, hence we cannot call 1796 the MatCopy() directly on the two parts. If need be, we can provide a more 1797 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1798 then copying the submatrices */ 1799 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1800 } else { 1801 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1802 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1803 } 1804 PetscFunctionReturn(0); 1805 } 1806 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1809 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1810 { 1811 PetscErrorCode ierr; 1812 1813 PetscFunctionBegin; 1814 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1815 PetscFunctionReturn(0); 1816 } 1817 1818 #include "petscblaslapack.h" 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "MatAXPY_MPIAIJ" 1821 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1822 { 1823 PetscErrorCode ierr; 1824 PetscInt i; 1825 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1826 PetscBLASInt bnz,one=1; 1827 Mat_SeqAIJ *x,*y; 1828 1829 PetscFunctionBegin; 1830 if (str == SAME_NONZERO_PATTERN) { 1831 PetscScalar alpha = a; 1832 x = (Mat_SeqAIJ *)xx->A->data; 1833 y = (Mat_SeqAIJ *)yy->A->data; 1834 bnz = PetscBLASIntCast(x->nz); 1835 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1836 x = (Mat_SeqAIJ *)xx->B->data; 1837 y = (Mat_SeqAIJ *)yy->B->data; 1838 bnz = PetscBLASIntCast(x->nz); 1839 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1840 } else if (str == SUBSET_NONZERO_PATTERN) { 1841 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1842 1843 x = (Mat_SeqAIJ *)xx->B->data; 1844 y = (Mat_SeqAIJ *)yy->B->data; 1845 if (y->xtoy && y->XtoY != xx->B) { 1846 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1847 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1848 } 1849 if (!y->xtoy) { /* get xtoy */ 1850 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1851 y->XtoY = xx->B; 1852 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1853 } 1854 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1855 } else { 1856 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "MatConjugate_MPIAIJ" 1865 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1866 { 1867 #if defined(PETSC_USE_COMPLEX) 1868 PetscErrorCode ierr; 1869 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1870 1871 PetscFunctionBegin; 1872 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1873 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1874 #else 1875 PetscFunctionBegin; 1876 #endif 1877 PetscFunctionReturn(0); 1878 } 1879 1880 #undef __FUNCT__ 1881 #define __FUNCT__ "MatRealPart_MPIAIJ" 1882 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1883 { 1884 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1885 PetscErrorCode ierr; 1886 1887 PetscFunctionBegin; 1888 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1889 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1895 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1896 { 1897 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1898 PetscErrorCode ierr; 1899 1900 PetscFunctionBegin; 1901 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1902 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1903 PetscFunctionReturn(0); 1904 } 1905 1906 #ifdef PETSC_HAVE_PBGL 1907 1908 #include <boost/parallel/mpi/bsp_process_group.hpp> 1909 #include <boost/graph/distributed/ilu_default_graph.hpp> 1910 #include <boost/graph/distributed/ilu_0_block.hpp> 1911 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1912 #include <boost/graph/distributed/petsc/interface.hpp> 1913 #include <boost/multi_array.hpp> 1914 #include <boost/parallel/distributed_property_map->hpp> 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1918 /* 1919 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1920 */ 1921 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1922 { 1923 namespace petsc = boost::distributed::petsc; 1924 1925 namespace graph_dist = boost::graph::distributed; 1926 using boost::graph::distributed::ilu_default::process_group_type; 1927 using boost::graph::ilu_permuted; 1928 1929 PetscTruth row_identity, col_identity; 1930 PetscContainer c; 1931 PetscInt m, n, M, N; 1932 PetscErrorCode ierr; 1933 1934 PetscFunctionBegin; 1935 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1936 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1937 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1938 if (!row_identity || !col_identity) { 1939 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1940 } 1941 1942 process_group_type pg; 1943 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1944 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1945 lgraph_type& level_graph = *lgraph_p; 1946 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1947 1948 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1949 ilu_permuted(level_graph); 1950 1951 /* put together the new matrix */ 1952 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1953 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1954 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1955 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1956 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1957 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1958 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1959 1960 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1961 ierr = PetscContainerSetPointer(c, lgraph_p); 1962 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1963 PetscFunctionReturn(0); 1964 } 1965 1966 #undef __FUNCT__ 1967 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1968 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1969 { 1970 PetscFunctionBegin; 1971 PetscFunctionReturn(0); 1972 } 1973 1974 #undef __FUNCT__ 1975 #define __FUNCT__ "MatSolve_MPIAIJ" 1976 /* 1977 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1978 */ 1979 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1980 { 1981 namespace graph_dist = boost::graph::distributed; 1982 1983 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1984 lgraph_type* lgraph_p; 1985 PetscContainer c; 1986 PetscErrorCode ierr; 1987 1988 PetscFunctionBegin; 1989 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1990 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1991 ierr = VecCopy(b, x);CHKERRQ(ierr); 1992 1993 PetscScalar* array_x; 1994 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1995 PetscInt sx; 1996 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1997 1998 PetscScalar* array_b; 1999 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2000 PetscInt sb; 2001 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2002 2003 lgraph_type& level_graph = *lgraph_p; 2004 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2005 2006 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2007 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2008 ref_x(array_x, boost::extents[num_vertices(graph)]); 2009 2010 typedef boost::iterator_property_map<array_ref_type::iterator, 2011 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2012 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2013 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2014 2015 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2016 2017 PetscFunctionReturn(0); 2018 } 2019 #endif 2020 2021 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2022 PetscInt nzlocal,nsends,nrecvs; 2023 PetscMPIInt *send_rank; 2024 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 2025 PetscScalar *sbuf_a,**rbuf_a; 2026 PetscErrorCode (*MatDestroy)(Mat); 2027 } Mat_Redundant; 2028 2029 #undef __FUNCT__ 2030 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2031 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2032 { 2033 PetscErrorCode ierr; 2034 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2035 PetscInt i; 2036 2037 PetscFunctionBegin; 2038 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 2039 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2040 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2041 for (i=0; i<redund->nrecvs; i++){ 2042 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2043 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2044 } 2045 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2046 ierr = PetscFree(redund);CHKERRQ(ierr); 2047 PetscFunctionReturn(0); 2048 } 2049 2050 #undef __FUNCT__ 2051 #define __FUNCT__ "MatDestroy_MatRedundant" 2052 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2053 { 2054 PetscErrorCode ierr; 2055 PetscContainer container; 2056 Mat_Redundant *redund=PETSC_NULL; 2057 2058 PetscFunctionBegin; 2059 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2060 if (container) { 2061 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2062 } else { 2063 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2064 } 2065 A->ops->destroy = redund->MatDestroy; 2066 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2067 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2068 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2069 PetscFunctionReturn(0); 2070 } 2071 2072 #undef __FUNCT__ 2073 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2074 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2075 { 2076 PetscMPIInt rank,size; 2077 MPI_Comm comm=((PetscObject)mat)->comm; 2078 PetscErrorCode ierr; 2079 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2080 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2081 PetscInt *rowrange=mat->rmap->range; 2082 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2083 Mat A=aij->A,B=aij->B,C=*matredundant; 2084 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2085 PetscScalar *sbuf_a; 2086 PetscInt nzlocal=a->nz+b->nz; 2087 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2088 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2089 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2090 MatScalar *aworkA,*aworkB; 2091 PetscScalar *vals; 2092 PetscMPIInt tag1,tag2,tag3,imdex; 2093 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2094 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2095 MPI_Status recv_status,*send_status; 2096 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2097 PetscInt **rbuf_j=PETSC_NULL; 2098 PetscScalar **rbuf_a=PETSC_NULL; 2099 Mat_Redundant *redund=PETSC_NULL; 2100 PetscContainer container; 2101 2102 PetscFunctionBegin; 2103 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2104 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2105 2106 if (reuse == MAT_REUSE_MATRIX) { 2107 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2108 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2109 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2110 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2111 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2112 if (container) { 2113 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2114 } else { 2115 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2116 } 2117 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2118 2119 nsends = redund->nsends; 2120 nrecvs = redund->nrecvs; 2121 send_rank = redund->send_rank; recv_rank = send_rank + size; 2122 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2123 sbuf_j = redund->sbuf_j; 2124 sbuf_a = redund->sbuf_a; 2125 rbuf_j = redund->rbuf_j; 2126 rbuf_a = redund->rbuf_a; 2127 } 2128 2129 if (reuse == MAT_INITIAL_MATRIX){ 2130 PetscMPIInt subrank,subsize; 2131 PetscInt nleftover,np_subcomm; 2132 /* get the destination processors' id send_rank, nsends and nrecvs */ 2133 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2134 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2135 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2136 recv_rank = send_rank + size; 2137 np_subcomm = size/nsubcomm; 2138 nleftover = size - nsubcomm*np_subcomm; 2139 nsends = 0; nrecvs = 0; 2140 for (i=0; i<size; i++){ /* i=rank*/ 2141 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2142 send_rank[nsends] = i; nsends++; 2143 recv_rank[nrecvs++] = i; 2144 } 2145 } 2146 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2147 i = size-nleftover-1; 2148 j = 0; 2149 while (j < nsubcomm - nleftover){ 2150 send_rank[nsends++] = i; 2151 i--; j++; 2152 } 2153 } 2154 2155 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2156 for (i=0; i<nleftover; i++){ 2157 recv_rank[nrecvs++] = size-nleftover+i; 2158 } 2159 } 2160 2161 /* allocate sbuf_j, sbuf_a */ 2162 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2163 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2164 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2165 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2166 2167 /* copy mat's local entries into the buffers */ 2168 if (reuse == MAT_INITIAL_MATRIX){ 2169 rownz_max = 0; 2170 rptr = sbuf_j; 2171 cols = sbuf_j + rend-rstart + 1; 2172 vals = sbuf_a; 2173 rptr[0] = 0; 2174 for (i=0; i<rend-rstart; i++){ 2175 row = i + rstart; 2176 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2177 ncols = nzA + nzB; 2178 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2179 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2180 /* load the column indices for this row into cols */ 2181 lwrite = 0; 2182 for (l=0; l<nzB; l++) { 2183 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2184 vals[lwrite] = aworkB[l]; 2185 cols[lwrite++] = ctmp; 2186 } 2187 } 2188 for (l=0; l<nzA; l++){ 2189 vals[lwrite] = aworkA[l]; 2190 cols[lwrite++] = cstart + cworkA[l]; 2191 } 2192 for (l=0; l<nzB; l++) { 2193 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2194 vals[lwrite] = aworkB[l]; 2195 cols[lwrite++] = ctmp; 2196 } 2197 } 2198 vals += ncols; 2199 cols += ncols; 2200 rptr[i+1] = rptr[i] + ncols; 2201 if (rownz_max < ncols) rownz_max = ncols; 2202 } 2203 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2204 } else { /* only copy matrix values into sbuf_a */ 2205 rptr = sbuf_j; 2206 vals = sbuf_a; 2207 rptr[0] = 0; 2208 for (i=0; i<rend-rstart; i++){ 2209 row = i + rstart; 2210 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2211 ncols = nzA + nzB; 2212 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2213 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2214 lwrite = 0; 2215 for (l=0; l<nzB; l++) { 2216 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2217 } 2218 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2219 for (l=0; l<nzB; l++) { 2220 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2221 } 2222 vals += ncols; 2223 rptr[i+1] = rptr[i] + ncols; 2224 } 2225 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2226 2227 /* send nzlocal to others, and recv other's nzlocal */ 2228 /*--------------------------------------------------*/ 2229 if (reuse == MAT_INITIAL_MATRIX){ 2230 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2231 s_waits2 = s_waits3 + nsends; 2232 s_waits1 = s_waits2 + nsends; 2233 r_waits1 = s_waits1 + nsends; 2234 r_waits2 = r_waits1 + nrecvs; 2235 r_waits3 = r_waits2 + nrecvs; 2236 } else { 2237 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2238 r_waits3 = s_waits3 + nsends; 2239 } 2240 2241 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2242 if (reuse == MAT_INITIAL_MATRIX){ 2243 /* get new tags to keep the communication clean */ 2244 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2245 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2246 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2247 rbuf_nz = sbuf_nz + nsends; 2248 2249 /* post receives of other's nzlocal */ 2250 for (i=0; i<nrecvs; i++){ 2251 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2252 } 2253 /* send nzlocal to others */ 2254 for (i=0; i<nsends; i++){ 2255 sbuf_nz[i] = nzlocal; 2256 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2257 } 2258 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2259 count = nrecvs; 2260 while (count) { 2261 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2262 recv_rank[imdex] = recv_status.MPI_SOURCE; 2263 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2264 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2265 2266 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2267 rbuf_nz[imdex] += i + 2; 2268 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2269 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2270 count--; 2271 } 2272 /* wait on sends of nzlocal */ 2273 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2274 /* send mat->i,j to others, and recv from other's */ 2275 /*------------------------------------------------*/ 2276 for (i=0; i<nsends; i++){ 2277 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2278 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2279 } 2280 /* wait on receives of mat->i,j */ 2281 /*------------------------------*/ 2282 count = nrecvs; 2283 while (count) { 2284 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2285 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2286 count--; 2287 } 2288 /* wait on sends of mat->i,j */ 2289 /*---------------------------*/ 2290 if (nsends) { 2291 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2292 } 2293 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2294 2295 /* post receives, send and receive mat->a */ 2296 /*----------------------------------------*/ 2297 for (imdex=0; imdex<nrecvs; imdex++) { 2298 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2299 } 2300 for (i=0; i<nsends; i++){ 2301 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2302 } 2303 count = nrecvs; 2304 while (count) { 2305 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2306 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2307 count--; 2308 } 2309 if (nsends) { 2310 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2311 } 2312 2313 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2314 2315 /* create redundant matrix */ 2316 /*-------------------------*/ 2317 if (reuse == MAT_INITIAL_MATRIX){ 2318 /* compute rownz_max for preallocation */ 2319 for (imdex=0; imdex<nrecvs; imdex++){ 2320 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2321 rptr = rbuf_j[imdex]; 2322 for (i=0; i<j; i++){ 2323 ncols = rptr[i+1] - rptr[i]; 2324 if (rownz_max < ncols) rownz_max = ncols; 2325 } 2326 } 2327 2328 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2329 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2330 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2331 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2332 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2333 } else { 2334 C = *matredundant; 2335 } 2336 2337 /* insert local matrix entries */ 2338 rptr = sbuf_j; 2339 cols = sbuf_j + rend-rstart + 1; 2340 vals = sbuf_a; 2341 for (i=0; i<rend-rstart; i++){ 2342 row = i + rstart; 2343 ncols = rptr[i+1] - rptr[i]; 2344 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2345 vals += ncols; 2346 cols += ncols; 2347 } 2348 /* insert received matrix entries */ 2349 for (imdex=0; imdex<nrecvs; imdex++){ 2350 rstart = rowrange[recv_rank[imdex]]; 2351 rend = rowrange[recv_rank[imdex]+1]; 2352 rptr = rbuf_j[imdex]; 2353 cols = rbuf_j[imdex] + rend-rstart + 1; 2354 vals = rbuf_a[imdex]; 2355 for (i=0; i<rend-rstart; i++){ 2356 row = i + rstart; 2357 ncols = rptr[i+1] - rptr[i]; 2358 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2359 vals += ncols; 2360 cols += ncols; 2361 } 2362 } 2363 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2364 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2365 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2366 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2367 if (reuse == MAT_INITIAL_MATRIX){ 2368 PetscContainer container; 2369 *matredundant = C; 2370 /* create a supporting struct and attach it to C for reuse */ 2371 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2372 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2373 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2374 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2375 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2376 2377 redund->nzlocal = nzlocal; 2378 redund->nsends = nsends; 2379 redund->nrecvs = nrecvs; 2380 redund->send_rank = send_rank; 2381 redund->sbuf_nz = sbuf_nz; 2382 redund->sbuf_j = sbuf_j; 2383 redund->sbuf_a = sbuf_a; 2384 redund->rbuf_j = rbuf_j; 2385 redund->rbuf_a = rbuf_a; 2386 2387 redund->MatDestroy = C->ops->destroy; 2388 C->ops->destroy = MatDestroy_MatRedundant; 2389 } 2390 PetscFunctionReturn(0); 2391 } 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2395 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2396 { 2397 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2398 PetscErrorCode ierr; 2399 PetscInt i,*idxb = 0; 2400 PetscScalar *va,*vb; 2401 Vec vtmp; 2402 2403 PetscFunctionBegin; 2404 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2405 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2406 if (idx) { 2407 for (i=0; i<A->rmap->n; i++) { 2408 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2409 } 2410 } 2411 2412 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2413 if (idx) { 2414 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2415 } 2416 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2417 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2418 2419 for (i=0; i<A->rmap->n; i++){ 2420 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2421 va[i] = vb[i]; 2422 if (idx) idx[i] = a->garray[idxb[i]]; 2423 } 2424 } 2425 2426 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2427 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2428 if (idxb) { 2429 ierr = PetscFree(idxb);CHKERRQ(ierr); 2430 } 2431 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2432 PetscFunctionReturn(0); 2433 } 2434 2435 #undef __FUNCT__ 2436 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2437 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2438 { 2439 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2440 PetscErrorCode ierr; 2441 PetscInt i,*idxb = 0; 2442 PetscScalar *va,*vb; 2443 Vec vtmp; 2444 2445 PetscFunctionBegin; 2446 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2447 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2448 if (idx) { 2449 for (i=0; i<A->cmap->n; i++) { 2450 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2451 } 2452 } 2453 2454 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2455 if (idx) { 2456 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2457 } 2458 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2459 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2460 2461 for (i=0; i<A->rmap->n; i++){ 2462 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2463 va[i] = vb[i]; 2464 if (idx) idx[i] = a->garray[idxb[i]]; 2465 } 2466 } 2467 2468 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2469 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2470 if (idxb) { 2471 ierr = PetscFree(idxb);CHKERRQ(ierr); 2472 } 2473 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 #undef __FUNCT__ 2478 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2479 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2480 { 2481 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2482 PetscInt n = A->rmap->n; 2483 PetscInt cstart = A->cmap->rstart; 2484 PetscInt *cmap = mat->garray; 2485 PetscInt *diagIdx, *offdiagIdx; 2486 Vec diagV, offdiagV; 2487 PetscScalar *a, *diagA, *offdiagA; 2488 PetscInt r; 2489 PetscErrorCode ierr; 2490 2491 PetscFunctionBegin; 2492 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2493 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2494 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2495 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2496 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2497 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2498 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2499 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2500 for(r = 0; r < n; ++r) { 2501 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2502 a[r] = diagA[r]; 2503 idx[r] = cstart + diagIdx[r]; 2504 } else { 2505 a[r] = offdiagA[r]; 2506 idx[r] = cmap[offdiagIdx[r]]; 2507 } 2508 } 2509 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2510 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2511 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2512 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2513 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2514 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2515 PetscFunctionReturn(0); 2516 } 2517 2518 #undef __FUNCT__ 2519 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2520 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2521 { 2522 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2523 PetscInt n = A->rmap->n; 2524 PetscInt cstart = A->cmap->rstart; 2525 PetscInt *cmap = mat->garray; 2526 PetscInt *diagIdx, *offdiagIdx; 2527 Vec diagV, offdiagV; 2528 PetscScalar *a, *diagA, *offdiagA; 2529 PetscInt r; 2530 PetscErrorCode ierr; 2531 2532 PetscFunctionBegin; 2533 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2534 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2535 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2536 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2537 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2538 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2539 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2540 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2541 for(r = 0; r < n; ++r) { 2542 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2543 a[r] = diagA[r]; 2544 idx[r] = cstart + diagIdx[r]; 2545 } else { 2546 a[r] = offdiagA[r]; 2547 idx[r] = cmap[offdiagIdx[r]]; 2548 } 2549 } 2550 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2551 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2552 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2553 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2554 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2555 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2556 PetscFunctionReturn(0); 2557 } 2558 2559 #undef __FUNCT__ 2560 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2561 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2562 { 2563 PetscErrorCode ierr; 2564 Mat *dummy; 2565 2566 PetscFunctionBegin; 2567 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2568 *newmat = *dummy; 2569 ierr = PetscFree(dummy);CHKERRQ(ierr); 2570 PetscFunctionReturn(0); 2571 } 2572 2573 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2574 /* -------------------------------------------------------------------*/ 2575 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2576 MatGetRow_MPIAIJ, 2577 MatRestoreRow_MPIAIJ, 2578 MatMult_MPIAIJ, 2579 /* 4*/ MatMultAdd_MPIAIJ, 2580 MatMultTranspose_MPIAIJ, 2581 MatMultTransposeAdd_MPIAIJ, 2582 #ifdef PETSC_HAVE_PBGL 2583 MatSolve_MPIAIJ, 2584 #else 2585 0, 2586 #endif 2587 0, 2588 0, 2589 /*10*/ 0, 2590 0, 2591 0, 2592 MatRelax_MPIAIJ, 2593 MatTranspose_MPIAIJ, 2594 /*15*/ MatGetInfo_MPIAIJ, 2595 MatEqual_MPIAIJ, 2596 MatGetDiagonal_MPIAIJ, 2597 MatDiagonalScale_MPIAIJ, 2598 MatNorm_MPIAIJ, 2599 /*20*/ MatAssemblyBegin_MPIAIJ, 2600 MatAssemblyEnd_MPIAIJ, 2601 MatSetOption_MPIAIJ, 2602 MatZeroEntries_MPIAIJ, 2603 /*24*/ MatZeroRows_MPIAIJ, 2604 0, 2605 #ifdef PETSC_HAVE_PBGL 2606 0, 2607 #else 2608 0, 2609 #endif 2610 0, 2611 0, 2612 /*29*/ MatSetUpPreallocation_MPIAIJ, 2613 #ifdef PETSC_HAVE_PBGL 2614 0, 2615 #else 2616 0, 2617 #endif 2618 0, 2619 0, 2620 0, 2621 /*34*/ MatDuplicate_MPIAIJ, 2622 0, 2623 0, 2624 0, 2625 0, 2626 /*39*/ MatAXPY_MPIAIJ, 2627 MatGetSubMatrices_MPIAIJ, 2628 MatIncreaseOverlap_MPIAIJ, 2629 MatGetValues_MPIAIJ, 2630 MatCopy_MPIAIJ, 2631 /*44*/ MatGetRowMax_MPIAIJ, 2632 MatScale_MPIAIJ, 2633 0, 2634 0, 2635 0, 2636 /*49*/ MatSetBlockSize_MPIAIJ, 2637 0, 2638 0, 2639 0, 2640 0, 2641 /*54*/ MatFDColoringCreate_MPIAIJ, 2642 0, 2643 MatSetUnfactored_MPIAIJ, 2644 MatPermute_MPIAIJ, 2645 0, 2646 /*59*/ MatGetSubMatrix_MPIAIJ, 2647 MatDestroy_MPIAIJ, 2648 MatView_MPIAIJ, 2649 0, 2650 0, 2651 /*64*/ 0, 2652 0, 2653 0, 2654 0, 2655 0, 2656 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2657 MatGetRowMinAbs_MPIAIJ, 2658 0, 2659 MatSetColoring_MPIAIJ, 2660 #if defined(PETSC_HAVE_ADIC) 2661 MatSetValuesAdic_MPIAIJ, 2662 #else 2663 0, 2664 #endif 2665 MatSetValuesAdifor_MPIAIJ, 2666 /*75*/ MatFDColoringApply_AIJ, 2667 0, 2668 0, 2669 0, 2670 0, 2671 /*80*/ 0, 2672 0, 2673 0, 2674 /*83*/ MatLoad_MPIAIJ, 2675 0, 2676 0, 2677 0, 2678 0, 2679 0, 2680 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2681 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2682 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2683 MatPtAP_Basic, 2684 MatPtAPSymbolic_MPIAIJ, 2685 /*94*/ MatPtAPNumeric_MPIAIJ, 2686 0, 2687 0, 2688 0, 2689 0, 2690 /*99*/ 0, 2691 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2692 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2693 MatConjugate_MPIAIJ, 2694 0, 2695 /*104*/MatSetValuesRow_MPIAIJ, 2696 MatRealPart_MPIAIJ, 2697 MatImaginaryPart_MPIAIJ, 2698 0, 2699 0, 2700 /*109*/0, 2701 MatGetRedundantMatrix_MPIAIJ, 2702 MatGetRowMin_MPIAIJ, 2703 0, 2704 0, 2705 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2706 0, 2707 0, 2708 0, 2709 0, 2710 0 2711 }; 2712 2713 /* ----------------------------------------------------------------------------------------*/ 2714 2715 EXTERN_C_BEGIN 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2718 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2719 { 2720 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2721 PetscErrorCode ierr; 2722 2723 PetscFunctionBegin; 2724 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2725 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2726 PetscFunctionReturn(0); 2727 } 2728 EXTERN_C_END 2729 2730 EXTERN_C_BEGIN 2731 #undef __FUNCT__ 2732 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2733 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2734 { 2735 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2736 PetscErrorCode ierr; 2737 2738 PetscFunctionBegin; 2739 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2740 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2741 PetscFunctionReturn(0); 2742 } 2743 EXTERN_C_END 2744 2745 #include "petscpc.h" 2746 EXTERN_C_BEGIN 2747 #undef __FUNCT__ 2748 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2749 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2750 { 2751 Mat_MPIAIJ *b; 2752 PetscErrorCode ierr; 2753 PetscInt i; 2754 2755 PetscFunctionBegin; 2756 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2757 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2758 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2759 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2760 2761 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2762 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2763 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2764 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2765 if (d_nnz) { 2766 for (i=0; i<B->rmap->n; i++) { 2767 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2768 } 2769 } 2770 if (o_nnz) { 2771 for (i=0; i<B->rmap->n; i++) { 2772 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2773 } 2774 } 2775 b = (Mat_MPIAIJ*)B->data; 2776 2777 if (!B->preallocated) { 2778 /* Explicitly create 2 MATSEQAIJ matrices. */ 2779 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2780 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2781 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2782 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2783 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2784 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2785 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2786 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2787 } 2788 2789 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2790 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2791 B->preallocated = PETSC_TRUE; 2792 PetscFunctionReturn(0); 2793 } 2794 EXTERN_C_END 2795 2796 #undef __FUNCT__ 2797 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2798 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2799 { 2800 Mat mat; 2801 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2802 PetscErrorCode ierr; 2803 2804 PetscFunctionBegin; 2805 *newmat = 0; 2806 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2807 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2808 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2809 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2810 a = (Mat_MPIAIJ*)mat->data; 2811 2812 mat->factor = matin->factor; 2813 mat->rmap->bs = matin->rmap->bs; 2814 mat->assembled = PETSC_TRUE; 2815 mat->insertmode = NOT_SET_VALUES; 2816 mat->preallocated = PETSC_TRUE; 2817 2818 a->size = oldmat->size; 2819 a->rank = oldmat->rank; 2820 a->donotstash = oldmat->donotstash; 2821 a->roworiented = oldmat->roworiented; 2822 a->rowindices = 0; 2823 a->rowvalues = 0; 2824 a->getrowactive = PETSC_FALSE; 2825 2826 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2827 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2828 2829 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2830 if (oldmat->colmap) { 2831 #if defined (PETSC_USE_CTABLE) 2832 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2833 #else 2834 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2835 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2836 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2837 #endif 2838 } else a->colmap = 0; 2839 if (oldmat->garray) { 2840 PetscInt len; 2841 len = oldmat->B->cmap->n; 2842 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2843 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2844 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2845 } else a->garray = 0; 2846 2847 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2848 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2849 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2850 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2851 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2852 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2853 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2854 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2855 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2856 *newmat = mat; 2857 PetscFunctionReturn(0); 2858 } 2859 2860 #include "petscsys.h" 2861 2862 #undef __FUNCT__ 2863 #define __FUNCT__ "MatLoad_MPIAIJ" 2864 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2865 { 2866 Mat A; 2867 PetscScalar *vals,*svals; 2868 MPI_Comm comm = ((PetscObject)viewer)->comm; 2869 MPI_Status status; 2870 PetscErrorCode ierr; 2871 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2872 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2873 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2874 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2875 PetscInt cend,cstart,n,*rowners; 2876 int fd; 2877 2878 PetscFunctionBegin; 2879 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2880 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2881 if (!rank) { 2882 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2883 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2884 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2885 } 2886 2887 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2888 M = header[1]; N = header[2]; 2889 /* determine ownership of all rows */ 2890 m = M/size + ((M % size) > rank); 2891 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2892 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2893 2894 /* First process needs enough room for process with most rows */ 2895 if (!rank) { 2896 mmax = rowners[1]; 2897 for (i=2; i<size; i++) { 2898 mmax = PetscMax(mmax,rowners[i]); 2899 } 2900 } else mmax = m; 2901 2902 rowners[0] = 0; 2903 for (i=2; i<=size; i++) { 2904 rowners[i] += rowners[i-1]; 2905 } 2906 rstart = rowners[rank]; 2907 rend = rowners[rank+1]; 2908 2909 /* distribute row lengths to all processors */ 2910 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2911 if (!rank) { 2912 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2913 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2914 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2915 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2916 for (j=0; j<m; j++) { 2917 procsnz[0] += ourlens[j]; 2918 } 2919 for (i=1; i<size; i++) { 2920 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2921 /* calculate the number of nonzeros on each processor */ 2922 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2923 procsnz[i] += rowlengths[j]; 2924 } 2925 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2926 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2927 } 2928 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2929 } else { 2930 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2931 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2932 } 2933 2934 if (!rank) { 2935 /* determine max buffer needed and allocate it */ 2936 maxnz = 0; 2937 for (i=0; i<size; i++) { 2938 maxnz = PetscMax(maxnz,procsnz[i]); 2939 } 2940 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2941 2942 /* read in my part of the matrix column indices */ 2943 nz = procsnz[0]; 2944 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2945 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2946 2947 /* read in every one elses and ship off */ 2948 for (i=1; i<size; i++) { 2949 nz = procsnz[i]; 2950 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2951 mpicnt = PetscMPIIntCast(nz); 2952 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2953 } 2954 ierr = PetscFree(cols);CHKERRQ(ierr); 2955 } else { 2956 /* determine buffer space needed for message */ 2957 nz = 0; 2958 for (i=0; i<m; i++) { 2959 nz += ourlens[i]; 2960 } 2961 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2962 2963 /* receive message of column indices*/ 2964 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2965 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2966 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2967 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2968 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2969 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2970 } 2971 2972 /* determine column ownership if matrix is not square */ 2973 if (N != M) { 2974 n = N/size + ((N % size) > rank); 2975 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2976 cstart = cend - n; 2977 } else { 2978 cstart = rstart; 2979 cend = rend; 2980 n = cend - cstart; 2981 } 2982 2983 /* loop over local rows, determining number of off diagonal entries */ 2984 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2985 jj = 0; 2986 for (i=0; i<m; i++) { 2987 for (j=0; j<ourlens[i]; j++) { 2988 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2989 jj++; 2990 } 2991 } 2992 2993 /* create our matrix */ 2994 for (i=0; i<m; i++) { 2995 ourlens[i] -= offlens[i]; 2996 } 2997 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2998 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2999 ierr = MatSetType(A,type);CHKERRQ(ierr); 3000 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3001 3002 for (i=0; i<m; i++) { 3003 ourlens[i] += offlens[i]; 3004 } 3005 3006 if (!rank) { 3007 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3008 3009 /* read in my part of the matrix numerical values */ 3010 nz = procsnz[0]; 3011 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3012 3013 /* insert into matrix */ 3014 jj = rstart; 3015 smycols = mycols; 3016 svals = vals; 3017 for (i=0; i<m; i++) { 3018 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3019 smycols += ourlens[i]; 3020 svals += ourlens[i]; 3021 jj++; 3022 } 3023 3024 /* read in other processors and ship out */ 3025 for (i=1; i<size; i++) { 3026 nz = procsnz[i]; 3027 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3028 mpicnt = PetscMPIIntCast(nz); 3029 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3030 } 3031 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3032 } else { 3033 /* receive numeric values */ 3034 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3035 3036 /* receive message of values*/ 3037 mpicnt = PetscMPIIntCast(nz); 3038 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3039 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3040 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3041 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3042 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3043 3044 /* insert into matrix */ 3045 jj = rstart; 3046 smycols = mycols; 3047 svals = vals; 3048 for (i=0; i<m; i++) { 3049 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3050 smycols += ourlens[i]; 3051 svals += ourlens[i]; 3052 jj++; 3053 } 3054 } 3055 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3056 ierr = PetscFree(vals);CHKERRQ(ierr); 3057 ierr = PetscFree(mycols);CHKERRQ(ierr); 3058 ierr = PetscFree(rowners);CHKERRQ(ierr); 3059 3060 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3061 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3062 *newmat = A; 3063 PetscFunctionReturn(0); 3064 } 3065 3066 #undef __FUNCT__ 3067 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3068 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3069 { 3070 PetscErrorCode ierr; 3071 IS iscol_local; 3072 PetscInt csize; 3073 3074 PetscFunctionBegin; 3075 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3076 if (call == MAT_REUSE_MATRIX) { 3077 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3078 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3079 } else { 3080 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3081 } 3082 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3083 if (call == MAT_INITIAL_MATRIX) { 3084 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3085 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3086 } 3087 PetscFunctionReturn(0); 3088 } 3089 3090 #undef __FUNCT__ 3091 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3092 /* 3093 Not great since it makes two copies of the submatrix, first an SeqAIJ 3094 in local and then by concatenating the local matrices the end result. 3095 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3096 3097 Note: This requires a sequential iscol with all indices. 3098 */ 3099 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3100 { 3101 PetscErrorCode ierr; 3102 PetscMPIInt rank,size; 3103 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3104 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3105 Mat *local,M,Mreuse; 3106 MatScalar *vwork,*aa; 3107 MPI_Comm comm = ((PetscObject)mat)->comm; 3108 Mat_SeqAIJ *aij; 3109 3110 3111 PetscFunctionBegin; 3112 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3113 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3114 3115 if (call == MAT_REUSE_MATRIX) { 3116 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3117 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3118 local = &Mreuse; 3119 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3120 } else { 3121 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3122 Mreuse = *local; 3123 ierr = PetscFree(local);CHKERRQ(ierr); 3124 } 3125 3126 /* 3127 m - number of local rows 3128 n - number of columns (same on all processors) 3129 rstart - first row in new global matrix generated 3130 */ 3131 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3132 if (call == MAT_INITIAL_MATRIX) { 3133 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3134 ii = aij->i; 3135 jj = aij->j; 3136 3137 /* 3138 Determine the number of non-zeros in the diagonal and off-diagonal 3139 portions of the matrix in order to do correct preallocation 3140 */ 3141 3142 /* first get start and end of "diagonal" columns */ 3143 if (csize == PETSC_DECIDE) { 3144 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3145 if (mglobal == n) { /* square matrix */ 3146 nlocal = m; 3147 } else { 3148 nlocal = n/size + ((n % size) > rank); 3149 } 3150 } else { 3151 nlocal = csize; 3152 } 3153 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3154 rstart = rend - nlocal; 3155 if (rank == size - 1 && rend != n) { 3156 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3157 } 3158 3159 /* next, compute all the lengths */ 3160 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3161 olens = dlens + m; 3162 for (i=0; i<m; i++) { 3163 jend = ii[i+1] - ii[i]; 3164 olen = 0; 3165 dlen = 0; 3166 for (j=0; j<jend; j++) { 3167 if (*jj < rstart || *jj >= rend) olen++; 3168 else dlen++; 3169 jj++; 3170 } 3171 olens[i] = olen; 3172 dlens[i] = dlen; 3173 } 3174 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3175 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3176 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3177 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3178 ierr = PetscFree(dlens);CHKERRQ(ierr); 3179 } else { 3180 PetscInt ml,nl; 3181 3182 M = *newmat; 3183 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3184 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3185 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3186 /* 3187 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3188 rather than the slower MatSetValues(). 3189 */ 3190 M->was_assembled = PETSC_TRUE; 3191 M->assembled = PETSC_FALSE; 3192 } 3193 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3194 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3195 ii = aij->i; 3196 jj = aij->j; 3197 aa = aij->a; 3198 for (i=0; i<m; i++) { 3199 row = rstart + i; 3200 nz = ii[i+1] - ii[i]; 3201 cwork = jj; jj += nz; 3202 vwork = aa; aa += nz; 3203 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3204 } 3205 3206 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3207 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3208 *newmat = M; 3209 3210 /* save submatrix used in processor for next request */ 3211 if (call == MAT_INITIAL_MATRIX) { 3212 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3213 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3214 } 3215 3216 PetscFunctionReturn(0); 3217 } 3218 3219 EXTERN_C_BEGIN 3220 #undef __FUNCT__ 3221 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3222 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3223 { 3224 PetscInt m,cstart, cend,j,nnz,i,d; 3225 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3226 const PetscInt *JJ; 3227 PetscScalar *values; 3228 PetscErrorCode ierr; 3229 3230 PetscFunctionBegin; 3231 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3232 3233 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3234 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3235 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3236 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3237 m = B->rmap->n; 3238 cstart = B->cmap->rstart; 3239 cend = B->cmap->rend; 3240 rstart = B->rmap->rstart; 3241 3242 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3243 o_nnz = d_nnz + m; 3244 3245 #if defined(PETSC_USE_DEBUGGING) 3246 for (i=0; i<m; i++) { 3247 nnz = Ii[i+1]- Ii[i]; 3248 JJ = J + Ii[i]; 3249 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3250 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3251 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3252 for (j=1; j<nnz; j++) { 3253 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3254 } 3255 } 3256 #endif 3257 3258 for (i=0; i<m; i++) { 3259 nnz = Ii[i+1]- Ii[i]; 3260 JJ = J + Ii[i]; 3261 nnz_max = PetscMax(nnz_max,nnz); 3262 for (j=0; j<nnz; j++) { 3263 if (*JJ >= cstart) break; 3264 JJ++; 3265 } 3266 d = 0; 3267 for (; j<nnz; j++) { 3268 if (*JJ++ >= cend) break; 3269 d++; 3270 } 3271 d_nnz[i] = d; 3272 o_nnz[i] = nnz - d; 3273 } 3274 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3275 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3276 3277 if (v) values = (PetscScalar*)v; 3278 else { 3279 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3280 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3281 } 3282 3283 for (i=0; i<m; i++) { 3284 ii = i + rstart; 3285 nnz = Ii[i+1]- Ii[i]; 3286 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3287 } 3288 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3289 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3290 3291 if (!v) { 3292 ierr = PetscFree(values);CHKERRQ(ierr); 3293 } 3294 PetscFunctionReturn(0); 3295 } 3296 EXTERN_C_END 3297 3298 #undef __FUNCT__ 3299 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3300 /*@ 3301 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3302 (the default parallel PETSc format). 3303 3304 Collective on MPI_Comm 3305 3306 Input Parameters: 3307 + B - the matrix 3308 . i - the indices into j for the start of each local row (starts with zero) 3309 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3310 - v - optional values in the matrix 3311 3312 Level: developer 3313 3314 Notes: 3315 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3316 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3317 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3318 3319 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3320 3321 The format which is used for the sparse matrix input, is equivalent to a 3322 row-major ordering.. i.e for the following matrix, the input data expected is 3323 as shown: 3324 3325 1 0 0 3326 2 0 3 P0 3327 ------- 3328 4 5 6 P1 3329 3330 Process0 [P0]: rows_owned=[0,1] 3331 i = {0,1,3} [size = nrow+1 = 2+1] 3332 j = {0,0,2} [size = nz = 6] 3333 v = {1,2,3} [size = nz = 6] 3334 3335 Process1 [P1]: rows_owned=[2] 3336 i = {0,3} [size = nrow+1 = 1+1] 3337 j = {0,1,2} [size = nz = 6] 3338 v = {4,5,6} [size = nz = 6] 3339 3340 The column indices for each row MUST be sorted. 3341 3342 .keywords: matrix, aij, compressed row, sparse, parallel 3343 3344 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3345 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3346 @*/ 3347 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3348 { 3349 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3350 3351 PetscFunctionBegin; 3352 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3353 if (f) { 3354 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3355 } 3356 PetscFunctionReturn(0); 3357 } 3358 3359 #undef __FUNCT__ 3360 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3361 /*@C 3362 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3363 (the default parallel PETSc format). For good matrix assembly performance 3364 the user should preallocate the matrix storage by setting the parameters 3365 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3366 performance can be increased by more than a factor of 50. 3367 3368 Collective on MPI_Comm 3369 3370 Input Parameters: 3371 + A - the matrix 3372 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3373 (same value is used for all local rows) 3374 . d_nnz - array containing the number of nonzeros in the various rows of the 3375 DIAGONAL portion of the local submatrix (possibly different for each row) 3376 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3377 The size of this array is equal to the number of local rows, i.e 'm'. 3378 You must leave room for the diagonal entry even if it is zero. 3379 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3380 submatrix (same value is used for all local rows). 3381 - o_nnz - array containing the number of nonzeros in the various rows of the 3382 OFF-DIAGONAL portion of the local submatrix (possibly different for 3383 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3384 structure. The size of this array is equal to the number 3385 of local rows, i.e 'm'. 3386 3387 If the *_nnz parameter is given then the *_nz parameter is ignored 3388 3389 The AIJ format (also called the Yale sparse matrix format or 3390 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3391 storage. The stored row and column indices begin with zero. See the users manual for details. 3392 3393 The parallel matrix is partitioned such that the first m0 rows belong to 3394 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3395 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3396 3397 The DIAGONAL portion of the local submatrix of a processor can be defined 3398 as the submatrix which is obtained by extraction the part corresponding 3399 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3400 first row that belongs to the processor, and r2 is the last row belonging 3401 to the this processor. This is a square mxm matrix. The remaining portion 3402 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3403 3404 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3405 3406 You can call MatGetInfo() to get information on how effective the preallocation was; 3407 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3408 You can also run with the option -info and look for messages with the string 3409 malloc in them to see if additional memory allocation was needed. 3410 3411 Example usage: 3412 3413 Consider the following 8x8 matrix with 34 non-zero values, that is 3414 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3415 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3416 as follows: 3417 3418 .vb 3419 1 2 0 | 0 3 0 | 0 4 3420 Proc0 0 5 6 | 7 0 0 | 8 0 3421 9 0 10 | 11 0 0 | 12 0 3422 ------------------------------------- 3423 13 0 14 | 15 16 17 | 0 0 3424 Proc1 0 18 0 | 19 20 21 | 0 0 3425 0 0 0 | 22 23 0 | 24 0 3426 ------------------------------------- 3427 Proc2 25 26 27 | 0 0 28 | 29 0 3428 30 0 0 | 31 32 33 | 0 34 3429 .ve 3430 3431 This can be represented as a collection of submatrices as: 3432 3433 .vb 3434 A B C 3435 D E F 3436 G H I 3437 .ve 3438 3439 Where the submatrices A,B,C are owned by proc0, D,E,F are 3440 owned by proc1, G,H,I are owned by proc2. 3441 3442 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3443 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3444 The 'M','N' parameters are 8,8, and have the same values on all procs. 3445 3446 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3447 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3448 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3449 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3450 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3451 matrix, ans [DF] as another SeqAIJ matrix. 3452 3453 When d_nz, o_nz parameters are specified, d_nz storage elements are 3454 allocated for every row of the local diagonal submatrix, and o_nz 3455 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3456 One way to choose d_nz and o_nz is to use the max nonzerors per local 3457 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3458 In this case, the values of d_nz,o_nz are: 3459 .vb 3460 proc0 : dnz = 2, o_nz = 2 3461 proc1 : dnz = 3, o_nz = 2 3462 proc2 : dnz = 1, o_nz = 4 3463 .ve 3464 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3465 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3466 for proc3. i.e we are using 12+15+10=37 storage locations to store 3467 34 values. 3468 3469 When d_nnz, o_nnz parameters are specified, the storage is specified 3470 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3471 In the above case the values for d_nnz,o_nnz are: 3472 .vb 3473 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3474 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3475 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3476 .ve 3477 Here the space allocated is sum of all the above values i.e 34, and 3478 hence pre-allocation is perfect. 3479 3480 Level: intermediate 3481 3482 .keywords: matrix, aij, compressed row, sparse, parallel 3483 3484 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3485 MPIAIJ, MatGetInfo() 3486 @*/ 3487 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3488 { 3489 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3490 3491 PetscFunctionBegin; 3492 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3493 if (f) { 3494 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3495 } 3496 PetscFunctionReturn(0); 3497 } 3498 3499 #undef __FUNCT__ 3500 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3501 /*@ 3502 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3503 CSR format the local rows. 3504 3505 Collective on MPI_Comm 3506 3507 Input Parameters: 3508 + comm - MPI communicator 3509 . m - number of local rows (Cannot be PETSC_DECIDE) 3510 . n - This value should be the same as the local size used in creating the 3511 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3512 calculated if N is given) For square matrices n is almost always m. 3513 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3514 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3515 . i - row indices 3516 . j - column indices 3517 - a - matrix values 3518 3519 Output Parameter: 3520 . mat - the matrix 3521 3522 Level: intermediate 3523 3524 Notes: 3525 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3526 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3527 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3528 3529 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3530 3531 The format which is used for the sparse matrix input, is equivalent to a 3532 row-major ordering.. i.e for the following matrix, the input data expected is 3533 as shown: 3534 3535 1 0 0 3536 2 0 3 P0 3537 ------- 3538 4 5 6 P1 3539 3540 Process0 [P0]: rows_owned=[0,1] 3541 i = {0,1,3} [size = nrow+1 = 2+1] 3542 j = {0,0,2} [size = nz = 6] 3543 v = {1,2,3} [size = nz = 6] 3544 3545 Process1 [P1]: rows_owned=[2] 3546 i = {0,3} [size = nrow+1 = 1+1] 3547 j = {0,1,2} [size = nz = 6] 3548 v = {4,5,6} [size = nz = 6] 3549 3550 .keywords: matrix, aij, compressed row, sparse, parallel 3551 3552 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3553 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3554 @*/ 3555 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3556 { 3557 PetscErrorCode ierr; 3558 3559 PetscFunctionBegin; 3560 if (i[0]) { 3561 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3562 } 3563 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3564 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3565 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3566 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3567 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3568 PetscFunctionReturn(0); 3569 } 3570 3571 #undef __FUNCT__ 3572 #define __FUNCT__ "MatCreateMPIAIJ" 3573 /*@C 3574 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3575 (the default parallel PETSc format). For good matrix assembly performance 3576 the user should preallocate the matrix storage by setting the parameters 3577 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3578 performance can be increased by more than a factor of 50. 3579 3580 Collective on MPI_Comm 3581 3582 Input Parameters: 3583 + comm - MPI communicator 3584 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3585 This value should be the same as the local size used in creating the 3586 y vector for the matrix-vector product y = Ax. 3587 . n - This value should be the same as the local size used in creating the 3588 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3589 calculated if N is given) For square matrices n is almost always m. 3590 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3591 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3592 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3593 (same value is used for all local rows) 3594 . d_nnz - array containing the number of nonzeros in the various rows of the 3595 DIAGONAL portion of the local submatrix (possibly different for each row) 3596 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3597 The size of this array is equal to the number of local rows, i.e 'm'. 3598 You must leave room for the diagonal entry even if it is zero. 3599 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3600 submatrix (same value is used for all local rows). 3601 - o_nnz - array containing the number of nonzeros in the various rows of the 3602 OFF-DIAGONAL portion of the local submatrix (possibly different for 3603 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3604 structure. The size of this array is equal to the number 3605 of local rows, i.e 'm'. 3606 3607 Output Parameter: 3608 . A - the matrix 3609 3610 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3611 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3612 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3613 3614 Notes: 3615 If the *_nnz parameter is given then the *_nz parameter is ignored 3616 3617 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3618 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3619 storage requirements for this matrix. 3620 3621 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3622 processor than it must be used on all processors that share the object for 3623 that argument. 3624 3625 The user MUST specify either the local or global matrix dimensions 3626 (possibly both). 3627 3628 The parallel matrix is partitioned across processors such that the 3629 first m0 rows belong to process 0, the next m1 rows belong to 3630 process 1, the next m2 rows belong to process 2 etc.. where 3631 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3632 values corresponding to [m x N] submatrix. 3633 3634 The columns are logically partitioned with the n0 columns belonging 3635 to 0th partition, the next n1 columns belonging to the next 3636 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3637 3638 The DIAGONAL portion of the local submatrix on any given processor 3639 is the submatrix corresponding to the rows and columns m,n 3640 corresponding to the given processor. i.e diagonal matrix on 3641 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3642 etc. The remaining portion of the local submatrix [m x (N-n)] 3643 constitute the OFF-DIAGONAL portion. The example below better 3644 illustrates this concept. 3645 3646 For a square global matrix we define each processor's diagonal portion 3647 to be its local rows and the corresponding columns (a square submatrix); 3648 each processor's off-diagonal portion encompasses the remainder of the 3649 local matrix (a rectangular submatrix). 3650 3651 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3652 3653 When calling this routine with a single process communicator, a matrix of 3654 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3655 type of communicator, use the construction mechanism: 3656 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3657 3658 By default, this format uses inodes (identical nodes) when possible. 3659 We search for consecutive rows with the same nonzero structure, thereby 3660 reusing matrix information to achieve increased efficiency. 3661 3662 Options Database Keys: 3663 + -mat_no_inode - Do not use inodes 3664 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3665 - -mat_aij_oneindex - Internally use indexing starting at 1 3666 rather than 0. Note that when calling MatSetValues(), 3667 the user still MUST index entries starting at 0! 3668 3669 3670 Example usage: 3671 3672 Consider the following 8x8 matrix with 34 non-zero values, that is 3673 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3674 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3675 as follows: 3676 3677 .vb 3678 1 2 0 | 0 3 0 | 0 4 3679 Proc0 0 5 6 | 7 0 0 | 8 0 3680 9 0 10 | 11 0 0 | 12 0 3681 ------------------------------------- 3682 13 0 14 | 15 16 17 | 0 0 3683 Proc1 0 18 0 | 19 20 21 | 0 0 3684 0 0 0 | 22 23 0 | 24 0 3685 ------------------------------------- 3686 Proc2 25 26 27 | 0 0 28 | 29 0 3687 30 0 0 | 31 32 33 | 0 34 3688 .ve 3689 3690 This can be represented as a collection of submatrices as: 3691 3692 .vb 3693 A B C 3694 D E F 3695 G H I 3696 .ve 3697 3698 Where the submatrices A,B,C are owned by proc0, D,E,F are 3699 owned by proc1, G,H,I are owned by proc2. 3700 3701 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3702 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3703 The 'M','N' parameters are 8,8, and have the same values on all procs. 3704 3705 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3706 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3707 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3708 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3709 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3710 matrix, ans [DF] as another SeqAIJ matrix. 3711 3712 When d_nz, o_nz parameters are specified, d_nz storage elements are 3713 allocated for every row of the local diagonal submatrix, and o_nz 3714 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3715 One way to choose d_nz and o_nz is to use the max nonzerors per local 3716 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3717 In this case, the values of d_nz,o_nz are: 3718 .vb 3719 proc0 : dnz = 2, o_nz = 2 3720 proc1 : dnz = 3, o_nz = 2 3721 proc2 : dnz = 1, o_nz = 4 3722 .ve 3723 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3724 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3725 for proc3. i.e we are using 12+15+10=37 storage locations to store 3726 34 values. 3727 3728 When d_nnz, o_nnz parameters are specified, the storage is specified 3729 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3730 In the above case the values for d_nnz,o_nnz are: 3731 .vb 3732 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3733 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3734 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3735 .ve 3736 Here the space allocated is sum of all the above values i.e 34, and 3737 hence pre-allocation is perfect. 3738 3739 Level: intermediate 3740 3741 .keywords: matrix, aij, compressed row, sparse, parallel 3742 3743 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3744 MPIAIJ, MatCreateMPIAIJWithArrays() 3745 @*/ 3746 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3747 { 3748 PetscErrorCode ierr; 3749 PetscMPIInt size; 3750 3751 PetscFunctionBegin; 3752 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3753 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3754 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3755 if (size > 1) { 3756 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3757 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3758 } else { 3759 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3760 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3761 } 3762 PetscFunctionReturn(0); 3763 } 3764 3765 #undef __FUNCT__ 3766 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3767 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3768 { 3769 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3770 3771 PetscFunctionBegin; 3772 *Ad = a->A; 3773 *Ao = a->B; 3774 *colmap = a->garray; 3775 PetscFunctionReturn(0); 3776 } 3777 3778 #undef __FUNCT__ 3779 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3780 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3781 { 3782 PetscErrorCode ierr; 3783 PetscInt i; 3784 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3785 3786 PetscFunctionBegin; 3787 if (coloring->ctype == IS_COLORING_GLOBAL) { 3788 ISColoringValue *allcolors,*colors; 3789 ISColoring ocoloring; 3790 3791 /* set coloring for diagonal portion */ 3792 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3793 3794 /* set coloring for off-diagonal portion */ 3795 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3796 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3797 for (i=0; i<a->B->cmap->n; i++) { 3798 colors[i] = allcolors[a->garray[i]]; 3799 } 3800 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3801 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3802 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3803 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3804 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3805 ISColoringValue *colors; 3806 PetscInt *larray; 3807 ISColoring ocoloring; 3808 3809 /* set coloring for diagonal portion */ 3810 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3811 for (i=0; i<a->A->cmap->n; i++) { 3812 larray[i] = i + A->cmap->rstart; 3813 } 3814 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3815 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3816 for (i=0; i<a->A->cmap->n; i++) { 3817 colors[i] = coloring->colors[larray[i]]; 3818 } 3819 ierr = PetscFree(larray);CHKERRQ(ierr); 3820 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3821 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3822 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3823 3824 /* set coloring for off-diagonal portion */ 3825 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3826 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3827 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3828 for (i=0; i<a->B->cmap->n; i++) { 3829 colors[i] = coloring->colors[larray[i]]; 3830 } 3831 ierr = PetscFree(larray);CHKERRQ(ierr); 3832 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3833 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3834 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3835 } else { 3836 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3837 } 3838 3839 PetscFunctionReturn(0); 3840 } 3841 3842 #if defined(PETSC_HAVE_ADIC) 3843 #undef __FUNCT__ 3844 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3845 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3846 { 3847 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3848 PetscErrorCode ierr; 3849 3850 PetscFunctionBegin; 3851 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3852 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3853 PetscFunctionReturn(0); 3854 } 3855 #endif 3856 3857 #undef __FUNCT__ 3858 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3859 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3860 { 3861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3862 PetscErrorCode ierr; 3863 3864 PetscFunctionBegin; 3865 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3866 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3867 PetscFunctionReturn(0); 3868 } 3869 3870 #undef __FUNCT__ 3871 #define __FUNCT__ "MatMerge" 3872 /*@ 3873 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3874 matrices from each processor 3875 3876 Collective on MPI_Comm 3877 3878 Input Parameters: 3879 + comm - the communicators the parallel matrix will live on 3880 . inmat - the input sequential matrices 3881 . n - number of local columns (or PETSC_DECIDE) 3882 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3883 3884 Output Parameter: 3885 . outmat - the parallel matrix generated 3886 3887 Level: advanced 3888 3889 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3890 3891 @*/ 3892 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3893 { 3894 PetscErrorCode ierr; 3895 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3896 PetscInt *indx; 3897 PetscScalar *values; 3898 3899 PetscFunctionBegin; 3900 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3901 if (scall == MAT_INITIAL_MATRIX){ 3902 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3903 if (n == PETSC_DECIDE){ 3904 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3905 } 3906 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3907 rstart -= m; 3908 3909 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3910 for (i=0;i<m;i++) { 3911 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3912 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3913 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3914 } 3915 /* This routine will ONLY return MPIAIJ type matrix */ 3916 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3917 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3918 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3919 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3920 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3921 3922 } else if (scall == MAT_REUSE_MATRIX){ 3923 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3924 } else { 3925 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3926 } 3927 3928 for (i=0;i<m;i++) { 3929 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3930 Ii = i + rstart; 3931 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3932 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3933 } 3934 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3935 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3936 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3937 3938 PetscFunctionReturn(0); 3939 } 3940 3941 #undef __FUNCT__ 3942 #define __FUNCT__ "MatFileSplit" 3943 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3944 { 3945 PetscErrorCode ierr; 3946 PetscMPIInt rank; 3947 PetscInt m,N,i,rstart,nnz; 3948 size_t len; 3949 const PetscInt *indx; 3950 PetscViewer out; 3951 char *name; 3952 Mat B; 3953 const PetscScalar *values; 3954 3955 PetscFunctionBegin; 3956 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3957 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3958 /* Should this be the type of the diagonal block of A? */ 3959 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3960 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3961 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3962 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3963 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3964 for (i=0;i<m;i++) { 3965 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3966 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3967 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3968 } 3969 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3970 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3971 3972 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3973 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3974 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3975 sprintf(name,"%s.%d",outfile,rank); 3976 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3977 ierr = PetscFree(name); 3978 ierr = MatView(B,out);CHKERRQ(ierr); 3979 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3980 ierr = MatDestroy(B);CHKERRQ(ierr); 3981 PetscFunctionReturn(0); 3982 } 3983 3984 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3985 #undef __FUNCT__ 3986 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3987 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3988 { 3989 PetscErrorCode ierr; 3990 Mat_Merge_SeqsToMPI *merge; 3991 PetscContainer container; 3992 3993 PetscFunctionBegin; 3994 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3995 if (container) { 3996 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3999 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4000 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4001 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4002 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4003 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4004 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4005 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4006 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4007 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 4008 4009 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4010 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4011 } 4012 ierr = PetscFree(merge);CHKERRQ(ierr); 4013 4014 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4015 PetscFunctionReturn(0); 4016 } 4017 4018 #include "../src/mat/utils/freespace.h" 4019 #include "petscbt.h" 4020 4021 #undef __FUNCT__ 4022 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4023 /*@C 4024 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4025 matrices from each processor 4026 4027 Collective on MPI_Comm 4028 4029 Input Parameters: 4030 + comm - the communicators the parallel matrix will live on 4031 . seqmat - the input sequential matrices 4032 . m - number of local rows (or PETSC_DECIDE) 4033 . n - number of local columns (or PETSC_DECIDE) 4034 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4035 4036 Output Parameter: 4037 . mpimat - the parallel matrix generated 4038 4039 Level: advanced 4040 4041 Notes: 4042 The dimensions of the sequential matrix in each processor MUST be the same. 4043 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4044 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4045 @*/ 4046 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4047 { 4048 PetscErrorCode ierr; 4049 MPI_Comm comm=((PetscObject)mpimat)->comm; 4050 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4051 PetscMPIInt size,rank,taga,*len_s; 4052 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4053 PetscInt proc,m; 4054 PetscInt **buf_ri,**buf_rj; 4055 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4056 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4057 MPI_Request *s_waits,*r_waits; 4058 MPI_Status *status; 4059 MatScalar *aa=a->a; 4060 MatScalar **abuf_r,*ba_i; 4061 Mat_Merge_SeqsToMPI *merge; 4062 PetscContainer container; 4063 4064 PetscFunctionBegin; 4065 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4066 4067 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4068 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4069 4070 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4071 if (container) { 4072 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4073 } 4074 bi = merge->bi; 4075 bj = merge->bj; 4076 buf_ri = merge->buf_ri; 4077 buf_rj = merge->buf_rj; 4078 4079 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4080 owners = merge->rowmap.range; 4081 len_s = merge->len_s; 4082 4083 /* send and recv matrix values */ 4084 /*-----------------------------*/ 4085 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4086 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4087 4088 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4089 for (proc=0,k=0; proc<size; proc++){ 4090 if (!len_s[proc]) continue; 4091 i = owners[proc]; 4092 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4093 k++; 4094 } 4095 4096 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4097 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4098 ierr = PetscFree(status);CHKERRQ(ierr); 4099 4100 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4101 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4102 4103 /* insert mat values of mpimat */ 4104 /*----------------------------*/ 4105 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4106 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4107 nextrow = buf_ri_k + merge->nrecv; 4108 nextai = nextrow + merge->nrecv; 4109 4110 for (k=0; k<merge->nrecv; k++){ 4111 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4112 nrows = *(buf_ri_k[k]); 4113 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4114 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4115 } 4116 4117 /* set values of ba */ 4118 m = merge->rowmap.n; 4119 for (i=0; i<m; i++) { 4120 arow = owners[rank] + i; 4121 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4122 bnzi = bi[i+1] - bi[i]; 4123 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4124 4125 /* add local non-zero vals of this proc's seqmat into ba */ 4126 anzi = ai[arow+1] - ai[arow]; 4127 aj = a->j + ai[arow]; 4128 aa = a->a + ai[arow]; 4129 nextaj = 0; 4130 for (j=0; nextaj<anzi; j++){ 4131 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4132 ba_i[j] += aa[nextaj++]; 4133 } 4134 } 4135 4136 /* add received vals into ba */ 4137 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4138 /* i-th row */ 4139 if (i == *nextrow[k]) { 4140 anzi = *(nextai[k]+1) - *nextai[k]; 4141 aj = buf_rj[k] + *(nextai[k]); 4142 aa = abuf_r[k] + *(nextai[k]); 4143 nextaj = 0; 4144 for (j=0; nextaj<anzi; j++){ 4145 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4146 ba_i[j] += aa[nextaj++]; 4147 } 4148 } 4149 nextrow[k]++; nextai[k]++; 4150 } 4151 } 4152 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4153 } 4154 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4155 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4156 4157 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4158 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4159 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4160 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4161 PetscFunctionReturn(0); 4162 } 4163 4164 #undef __FUNCT__ 4165 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4166 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4167 { 4168 PetscErrorCode ierr; 4169 Mat B_mpi; 4170 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4171 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4172 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4173 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4174 PetscInt len,proc,*dnz,*onz; 4175 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4176 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4177 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4178 MPI_Status *status; 4179 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4180 PetscBT lnkbt; 4181 Mat_Merge_SeqsToMPI *merge; 4182 PetscContainer container; 4183 4184 PetscFunctionBegin; 4185 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4186 4187 /* make sure it is a PETSc comm */ 4188 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4189 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4190 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4191 4192 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4193 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4194 4195 /* determine row ownership */ 4196 /*---------------------------------------------------------*/ 4197 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4198 merge->rowmap.n = m; 4199 merge->rowmap.N = M; 4200 merge->rowmap.bs = 1; 4201 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4202 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4203 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4204 4205 m = merge->rowmap.n; 4206 M = merge->rowmap.N; 4207 owners = merge->rowmap.range; 4208 4209 /* determine the number of messages to send, their lengths */ 4210 /*---------------------------------------------------------*/ 4211 len_s = merge->len_s; 4212 4213 len = 0; /* length of buf_si[] */ 4214 merge->nsend = 0; 4215 for (proc=0; proc<size; proc++){ 4216 len_si[proc] = 0; 4217 if (proc == rank){ 4218 len_s[proc] = 0; 4219 } else { 4220 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4221 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4222 } 4223 if (len_s[proc]) { 4224 merge->nsend++; 4225 nrows = 0; 4226 for (i=owners[proc]; i<owners[proc+1]; i++){ 4227 if (ai[i+1] > ai[i]) nrows++; 4228 } 4229 len_si[proc] = 2*(nrows+1); 4230 len += len_si[proc]; 4231 } 4232 } 4233 4234 /* determine the number and length of messages to receive for ij-structure */ 4235 /*-------------------------------------------------------------------------*/ 4236 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4237 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4238 4239 /* post the Irecv of j-structure */ 4240 /*-------------------------------*/ 4241 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4242 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4243 4244 /* post the Isend of j-structure */ 4245 /*--------------------------------*/ 4246 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4247 sj_waits = si_waits + merge->nsend; 4248 4249 for (proc=0, k=0; proc<size; proc++){ 4250 if (!len_s[proc]) continue; 4251 i = owners[proc]; 4252 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4253 k++; 4254 } 4255 4256 /* receives and sends of j-structure are complete */ 4257 /*------------------------------------------------*/ 4258 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4259 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4260 4261 /* send and recv i-structure */ 4262 /*---------------------------*/ 4263 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4264 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4265 4266 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4267 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4268 for (proc=0,k=0; proc<size; proc++){ 4269 if (!len_s[proc]) continue; 4270 /* form outgoing message for i-structure: 4271 buf_si[0]: nrows to be sent 4272 [1:nrows]: row index (global) 4273 [nrows+1:2*nrows+1]: i-structure index 4274 */ 4275 /*-------------------------------------------*/ 4276 nrows = len_si[proc]/2 - 1; 4277 buf_si_i = buf_si + nrows+1; 4278 buf_si[0] = nrows; 4279 buf_si_i[0] = 0; 4280 nrows = 0; 4281 for (i=owners[proc]; i<owners[proc+1]; i++){ 4282 anzi = ai[i+1] - ai[i]; 4283 if (anzi) { 4284 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4285 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4286 nrows++; 4287 } 4288 } 4289 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4290 k++; 4291 buf_si += len_si[proc]; 4292 } 4293 4294 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4295 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4296 4297 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4298 for (i=0; i<merge->nrecv; i++){ 4299 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4300 } 4301 4302 ierr = PetscFree(len_si);CHKERRQ(ierr); 4303 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4304 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4305 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4306 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4307 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4308 ierr = PetscFree(status);CHKERRQ(ierr); 4309 4310 /* compute a local seq matrix in each processor */ 4311 /*----------------------------------------------*/ 4312 /* allocate bi array and free space for accumulating nonzero column info */ 4313 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4314 bi[0] = 0; 4315 4316 /* create and initialize a linked list */ 4317 nlnk = N+1; 4318 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4319 4320 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4321 len = 0; 4322 len = ai[owners[rank+1]] - ai[owners[rank]]; 4323 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4324 current_space = free_space; 4325 4326 /* determine symbolic info for each local row */ 4327 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4328 nextrow = buf_ri_k + merge->nrecv; 4329 nextai = nextrow + merge->nrecv; 4330 for (k=0; k<merge->nrecv; k++){ 4331 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4332 nrows = *buf_ri_k[k]; 4333 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4334 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4335 } 4336 4337 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4338 len = 0; 4339 for (i=0;i<m;i++) { 4340 bnzi = 0; 4341 /* add local non-zero cols of this proc's seqmat into lnk */ 4342 arow = owners[rank] + i; 4343 anzi = ai[arow+1] - ai[arow]; 4344 aj = a->j + ai[arow]; 4345 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4346 bnzi += nlnk; 4347 /* add received col data into lnk */ 4348 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4349 if (i == *nextrow[k]) { /* i-th row */ 4350 anzi = *(nextai[k]+1) - *nextai[k]; 4351 aj = buf_rj[k] + *nextai[k]; 4352 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4353 bnzi += nlnk; 4354 nextrow[k]++; nextai[k]++; 4355 } 4356 } 4357 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4358 4359 /* if free space is not available, make more free space */ 4360 if (current_space->local_remaining<bnzi) { 4361 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4362 nspacedouble++; 4363 } 4364 /* copy data into free space, then initialize lnk */ 4365 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4366 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4367 4368 current_space->array += bnzi; 4369 current_space->local_used += bnzi; 4370 current_space->local_remaining -= bnzi; 4371 4372 bi[i+1] = bi[i] + bnzi; 4373 } 4374 4375 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4376 4377 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4378 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4379 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4380 4381 /* create symbolic parallel matrix B_mpi */ 4382 /*---------------------------------------*/ 4383 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4384 if (n==PETSC_DECIDE) { 4385 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4386 } else { 4387 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4388 } 4389 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4390 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4391 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4392 4393 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4394 B_mpi->assembled = PETSC_FALSE; 4395 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4396 merge->bi = bi; 4397 merge->bj = bj; 4398 merge->buf_ri = buf_ri; 4399 merge->buf_rj = buf_rj; 4400 merge->coi = PETSC_NULL; 4401 merge->coj = PETSC_NULL; 4402 merge->owners_co = PETSC_NULL; 4403 4404 /* attach the supporting struct to B_mpi for reuse */ 4405 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4406 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4407 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4408 *mpimat = B_mpi; 4409 4410 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4411 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4412 PetscFunctionReturn(0); 4413 } 4414 4415 #undef __FUNCT__ 4416 #define __FUNCT__ "MatMerge_SeqsToMPI" 4417 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4418 { 4419 PetscErrorCode ierr; 4420 4421 PetscFunctionBegin; 4422 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4423 if (scall == MAT_INITIAL_MATRIX){ 4424 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4425 } 4426 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4427 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4428 PetscFunctionReturn(0); 4429 } 4430 4431 #undef __FUNCT__ 4432 #define __FUNCT__ "MatGetLocalMat" 4433 /*@ 4434 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4435 4436 Not Collective 4437 4438 Input Parameters: 4439 + A - the matrix 4440 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4441 4442 Output Parameter: 4443 . A_loc - the local sequential matrix generated 4444 4445 Level: developer 4446 4447 @*/ 4448 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4449 { 4450 PetscErrorCode ierr; 4451 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4452 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4453 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4454 MatScalar *aa=a->a,*ba=b->a,*cam; 4455 PetscScalar *ca; 4456 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4457 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4458 4459 PetscFunctionBegin; 4460 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4461 if (scall == MAT_INITIAL_MATRIX){ 4462 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4463 ci[0] = 0; 4464 for (i=0; i<am; i++){ 4465 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4466 } 4467 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4468 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4469 k = 0; 4470 for (i=0; i<am; i++) { 4471 ncols_o = bi[i+1] - bi[i]; 4472 ncols_d = ai[i+1] - ai[i]; 4473 /* off-diagonal portion of A */ 4474 for (jo=0; jo<ncols_o; jo++) { 4475 col = cmap[*bj]; 4476 if (col >= cstart) break; 4477 cj[k] = col; bj++; 4478 ca[k++] = *ba++; 4479 } 4480 /* diagonal portion of A */ 4481 for (j=0; j<ncols_d; j++) { 4482 cj[k] = cstart + *aj++; 4483 ca[k++] = *aa++; 4484 } 4485 /* off-diagonal portion of A */ 4486 for (j=jo; j<ncols_o; j++) { 4487 cj[k] = cmap[*bj++]; 4488 ca[k++] = *ba++; 4489 } 4490 } 4491 /* put together the new matrix */ 4492 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4493 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4494 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4495 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4496 mat->free_a = PETSC_TRUE; 4497 mat->free_ij = PETSC_TRUE; 4498 mat->nonew = 0; 4499 } else if (scall == MAT_REUSE_MATRIX){ 4500 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4501 ci = mat->i; cj = mat->j; cam = mat->a; 4502 for (i=0; i<am; i++) { 4503 /* off-diagonal portion of A */ 4504 ncols_o = bi[i+1] - bi[i]; 4505 for (jo=0; jo<ncols_o; jo++) { 4506 col = cmap[*bj]; 4507 if (col >= cstart) break; 4508 *cam++ = *ba++; bj++; 4509 } 4510 /* diagonal portion of A */ 4511 ncols_d = ai[i+1] - ai[i]; 4512 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4513 /* off-diagonal portion of A */ 4514 for (j=jo; j<ncols_o; j++) { 4515 *cam++ = *ba++; bj++; 4516 } 4517 } 4518 } else { 4519 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4520 } 4521 4522 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4523 PetscFunctionReturn(0); 4524 } 4525 4526 #undef __FUNCT__ 4527 #define __FUNCT__ "MatGetLocalMatCondensed" 4528 /*@C 4529 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4530 4531 Not Collective 4532 4533 Input Parameters: 4534 + A - the matrix 4535 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4536 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4537 4538 Output Parameter: 4539 . A_loc - the local sequential matrix generated 4540 4541 Level: developer 4542 4543 @*/ 4544 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4545 { 4546 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4547 PetscErrorCode ierr; 4548 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4549 IS isrowa,iscola; 4550 Mat *aloc; 4551 4552 PetscFunctionBegin; 4553 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4554 if (!row){ 4555 start = A->rmap->rstart; end = A->rmap->rend; 4556 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4557 } else { 4558 isrowa = *row; 4559 } 4560 if (!col){ 4561 start = A->cmap->rstart; 4562 cmap = a->garray; 4563 nzA = a->A->cmap->n; 4564 nzB = a->B->cmap->n; 4565 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4566 ncols = 0; 4567 for (i=0; i<nzB; i++) { 4568 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4569 else break; 4570 } 4571 imark = i; 4572 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4573 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4574 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4575 ierr = PetscFree(idx);CHKERRQ(ierr); 4576 } else { 4577 iscola = *col; 4578 } 4579 if (scall != MAT_INITIAL_MATRIX){ 4580 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4581 aloc[0] = *A_loc; 4582 } 4583 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4584 *A_loc = aloc[0]; 4585 ierr = PetscFree(aloc);CHKERRQ(ierr); 4586 if (!row){ 4587 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4588 } 4589 if (!col){ 4590 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4591 } 4592 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4593 PetscFunctionReturn(0); 4594 } 4595 4596 #undef __FUNCT__ 4597 #define __FUNCT__ "MatGetBrowsOfAcols" 4598 /*@C 4599 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4600 4601 Collective on Mat 4602 4603 Input Parameters: 4604 + A,B - the matrices in mpiaij format 4605 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4606 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4607 4608 Output Parameter: 4609 + rowb, colb - index sets of rows and columns of B to extract 4610 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4611 - B_seq - the sequential matrix generated 4612 4613 Level: developer 4614 4615 @*/ 4616 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4617 { 4618 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4619 PetscErrorCode ierr; 4620 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4621 IS isrowb,iscolb; 4622 Mat *bseq; 4623 4624 PetscFunctionBegin; 4625 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4626 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4627 } 4628 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4629 4630 if (scall == MAT_INITIAL_MATRIX){ 4631 start = A->cmap->rstart; 4632 cmap = a->garray; 4633 nzA = a->A->cmap->n; 4634 nzB = a->B->cmap->n; 4635 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4636 ncols = 0; 4637 for (i=0; i<nzB; i++) { /* row < local row index */ 4638 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4639 else break; 4640 } 4641 imark = i; 4642 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4643 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4644 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4645 ierr = PetscFree(idx);CHKERRQ(ierr); 4646 *brstart = imark; 4647 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4648 } else { 4649 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4650 isrowb = *rowb; iscolb = *colb; 4651 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4652 bseq[0] = *B_seq; 4653 } 4654 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4655 *B_seq = bseq[0]; 4656 ierr = PetscFree(bseq);CHKERRQ(ierr); 4657 if (!rowb){ 4658 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4659 } else { 4660 *rowb = isrowb; 4661 } 4662 if (!colb){ 4663 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4664 } else { 4665 *colb = iscolb; 4666 } 4667 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4668 PetscFunctionReturn(0); 4669 } 4670 4671 #undef __FUNCT__ 4672 #define __FUNCT__ "MatGetBrowsOfAoCols" 4673 /*@C 4674 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4675 of the OFF-DIAGONAL portion of local A 4676 4677 Collective on Mat 4678 4679 Input Parameters: 4680 + A,B - the matrices in mpiaij format 4681 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4682 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4683 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4684 4685 Output Parameter: 4686 + B_oth - the sequential matrix generated 4687 4688 Level: developer 4689 4690 @*/ 4691 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4692 { 4693 VecScatter_MPI_General *gen_to,*gen_from; 4694 PetscErrorCode ierr; 4695 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4696 Mat_SeqAIJ *b_oth; 4697 VecScatter ctx=a->Mvctx; 4698 MPI_Comm comm=((PetscObject)ctx)->comm; 4699 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4700 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4701 PetscScalar *rvalues,*svalues; 4702 MatScalar *b_otha,*bufa,*bufA; 4703 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4704 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4705 MPI_Status *sstatus,rstatus; 4706 PetscMPIInt jj; 4707 PetscInt *cols,sbs,rbs; 4708 PetscScalar *vals; 4709 4710 PetscFunctionBegin; 4711 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4712 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4713 } 4714 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4715 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4716 4717 gen_to = (VecScatter_MPI_General*)ctx->todata; 4718 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4719 rvalues = gen_from->values; /* holds the length of receiving row */ 4720 svalues = gen_to->values; /* holds the length of sending row */ 4721 nrecvs = gen_from->n; 4722 nsends = gen_to->n; 4723 4724 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4725 srow = gen_to->indices; /* local row index to be sent */ 4726 sstarts = gen_to->starts; 4727 sprocs = gen_to->procs; 4728 sstatus = gen_to->sstatus; 4729 sbs = gen_to->bs; 4730 rstarts = gen_from->starts; 4731 rprocs = gen_from->procs; 4732 rbs = gen_from->bs; 4733 4734 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4735 if (scall == MAT_INITIAL_MATRIX){ 4736 /* i-array */ 4737 /*---------*/ 4738 /* post receives */ 4739 for (i=0; i<nrecvs; i++){ 4740 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4741 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4742 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4743 } 4744 4745 /* pack the outgoing message */ 4746 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4747 rstartsj = sstartsj + nsends +1; 4748 sstartsj[0] = 0; rstartsj[0] = 0; 4749 len = 0; /* total length of j or a array to be sent */ 4750 k = 0; 4751 for (i=0; i<nsends; i++){ 4752 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4753 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4754 for (j=0; j<nrows; j++) { 4755 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4756 for (l=0; l<sbs; l++){ 4757 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4758 rowlen[j*sbs+l] = ncols; 4759 len += ncols; 4760 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4761 } 4762 k++; 4763 } 4764 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4765 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4766 } 4767 /* recvs and sends of i-array are completed */ 4768 i = nrecvs; 4769 while (i--) { 4770 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4771 } 4772 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4773 4774 /* allocate buffers for sending j and a arrays */ 4775 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4776 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4777 4778 /* create i-array of B_oth */ 4779 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4780 b_othi[0] = 0; 4781 len = 0; /* total length of j or a array to be received */ 4782 k = 0; 4783 for (i=0; i<nrecvs; i++){ 4784 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4785 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4786 for (j=0; j<nrows; j++) { 4787 b_othi[k+1] = b_othi[k] + rowlen[j]; 4788 len += rowlen[j]; k++; 4789 } 4790 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4791 } 4792 4793 /* allocate space for j and a arrrays of B_oth */ 4794 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4795 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4796 4797 /* j-array */ 4798 /*---------*/ 4799 /* post receives of j-array */ 4800 for (i=0; i<nrecvs; i++){ 4801 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4802 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4803 } 4804 4805 /* pack the outgoing message j-array */ 4806 k = 0; 4807 for (i=0; i<nsends; i++){ 4808 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4809 bufJ = bufj+sstartsj[i]; 4810 for (j=0; j<nrows; j++) { 4811 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4812 for (ll=0; ll<sbs; ll++){ 4813 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4814 for (l=0; l<ncols; l++){ 4815 *bufJ++ = cols[l]; 4816 } 4817 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4818 } 4819 } 4820 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4821 } 4822 4823 /* recvs and sends of j-array are completed */ 4824 i = nrecvs; 4825 while (i--) { 4826 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4827 } 4828 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4829 } else if (scall == MAT_REUSE_MATRIX){ 4830 sstartsj = *startsj; 4831 rstartsj = sstartsj + nsends +1; 4832 bufa = *bufa_ptr; 4833 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4834 b_otha = b_oth->a; 4835 } else { 4836 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4837 } 4838 4839 /* a-array */ 4840 /*---------*/ 4841 /* post receives of a-array */ 4842 for (i=0; i<nrecvs; i++){ 4843 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4844 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4845 } 4846 4847 /* pack the outgoing message a-array */ 4848 k = 0; 4849 for (i=0; i<nsends; i++){ 4850 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4851 bufA = bufa+sstartsj[i]; 4852 for (j=0; j<nrows; j++) { 4853 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4854 for (ll=0; ll<sbs; ll++){ 4855 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4856 for (l=0; l<ncols; l++){ 4857 *bufA++ = vals[l]; 4858 } 4859 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4860 } 4861 } 4862 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4863 } 4864 /* recvs and sends of a-array are completed */ 4865 i = nrecvs; 4866 while (i--) { 4867 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4868 } 4869 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4870 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4871 4872 if (scall == MAT_INITIAL_MATRIX){ 4873 /* put together the new matrix */ 4874 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4875 4876 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4877 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4878 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4879 b_oth->free_a = PETSC_TRUE; 4880 b_oth->free_ij = PETSC_TRUE; 4881 b_oth->nonew = 0; 4882 4883 ierr = PetscFree(bufj);CHKERRQ(ierr); 4884 if (!startsj || !bufa_ptr){ 4885 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4886 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4887 } else { 4888 *startsj = sstartsj; 4889 *bufa_ptr = bufa; 4890 } 4891 } 4892 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4893 PetscFunctionReturn(0); 4894 } 4895 4896 #undef __FUNCT__ 4897 #define __FUNCT__ "MatGetCommunicationStructs" 4898 /*@C 4899 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4900 4901 Not Collective 4902 4903 Input Parameters: 4904 . A - The matrix in mpiaij format 4905 4906 Output Parameter: 4907 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4908 . colmap - A map from global column index to local index into lvec 4909 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4910 4911 Level: developer 4912 4913 @*/ 4914 #if defined (PETSC_USE_CTABLE) 4915 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4916 #else 4917 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4918 #endif 4919 { 4920 Mat_MPIAIJ *a; 4921 4922 PetscFunctionBegin; 4923 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4924 PetscValidPointer(lvec, 2) 4925 PetscValidPointer(colmap, 3) 4926 PetscValidPointer(multScatter, 4) 4927 a = (Mat_MPIAIJ *) A->data; 4928 if (lvec) *lvec = a->lvec; 4929 if (colmap) *colmap = a->colmap; 4930 if (multScatter) *multScatter = a->Mvctx; 4931 PetscFunctionReturn(0); 4932 } 4933 4934 EXTERN_C_BEGIN 4935 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4936 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4937 EXTERN_C_END 4938 4939 #include "../src/mat/impls/dense/mpi/mpidense.h" 4940 4941 #undef __FUNCT__ 4942 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4943 /* 4944 Computes (B'*A')' since computing B*A directly is untenable 4945 4946 n p p 4947 ( ) ( ) ( ) 4948 m ( A ) * n ( B ) = m ( C ) 4949 ( ) ( ) ( ) 4950 4951 */ 4952 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4953 { 4954 PetscErrorCode ierr; 4955 Mat At,Bt,Ct; 4956 4957 PetscFunctionBegin; 4958 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4959 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4960 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4961 ierr = MatDestroy(At);CHKERRQ(ierr); 4962 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4963 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4964 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4965 PetscFunctionReturn(0); 4966 } 4967 4968 #undef __FUNCT__ 4969 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4970 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4971 { 4972 PetscErrorCode ierr; 4973 PetscInt m=A->rmap->n,n=B->cmap->n; 4974 Mat Cmat; 4975 4976 PetscFunctionBegin; 4977 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4978 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4979 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4980 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4981 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4982 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4983 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4984 *C = Cmat; 4985 PetscFunctionReturn(0); 4986 } 4987 4988 /* ----------------------------------------------------------------*/ 4989 #undef __FUNCT__ 4990 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4991 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4992 { 4993 PetscErrorCode ierr; 4994 4995 PetscFunctionBegin; 4996 if (scall == MAT_INITIAL_MATRIX){ 4997 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4998 } 4999 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5000 PetscFunctionReturn(0); 5001 } 5002 5003 EXTERN_C_BEGIN 5004 #if defined(PETSC_HAVE_MUMPS) 5005 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 5006 #endif 5007 #if defined(PETSC_HAVE_PASTIX) 5008 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5009 #endif 5010 #if defined(PETSC_HAVE_SUPERLU_DIST) 5011 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5012 #endif 5013 #if defined(PETSC_HAVE_SPOOLES) 5014 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5015 #endif 5016 EXTERN_C_END 5017 5018 /*MC 5019 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5020 5021 Options Database Keys: 5022 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5023 5024 Level: beginner 5025 5026 .seealso: MatCreateMPIAIJ() 5027 M*/ 5028 5029 EXTERN_C_BEGIN 5030 #undef __FUNCT__ 5031 #define __FUNCT__ "MatCreate_MPIAIJ" 5032 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5033 { 5034 Mat_MPIAIJ *b; 5035 PetscErrorCode ierr; 5036 PetscMPIInt size; 5037 5038 PetscFunctionBegin; 5039 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5040 5041 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5042 B->data = (void*)b; 5043 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5044 B->rmap->bs = 1; 5045 B->assembled = PETSC_FALSE; 5046 B->mapping = 0; 5047 5048 B->insertmode = NOT_SET_VALUES; 5049 b->size = size; 5050 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5051 5052 /* build cache for off array entries formed */ 5053 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5054 b->donotstash = PETSC_FALSE; 5055 b->colmap = 0; 5056 b->garray = 0; 5057 b->roworiented = PETSC_TRUE; 5058 5059 /* stuff used for matrix vector multiply */ 5060 b->lvec = PETSC_NULL; 5061 b->Mvctx = PETSC_NULL; 5062 5063 /* stuff for MatGetRow() */ 5064 b->rowindices = 0; 5065 b->rowvalues = 0; 5066 b->getrowactive = PETSC_FALSE; 5067 5068 #if defined(PETSC_HAVE_SPOOLES) 5069 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5070 "MatGetFactor_mpiaij_spooles", 5071 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5072 #endif 5073 #if defined(PETSC_HAVE_MUMPS) 5074 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5075 "MatGetFactor_mpiaij_mumps", 5076 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5077 #endif 5078 #if defined(PETSC_HAVE_PASTIX) 5079 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5080 "MatGetFactor_mpiaij_pastix", 5081 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5082 #endif 5083 #if defined(PETSC_HAVE_SUPERLU_DIST) 5084 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5085 "MatGetFactor_mpiaij_superlu_dist", 5086 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5087 #endif 5088 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5089 "MatStoreValues_MPIAIJ", 5090 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5091 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5092 "MatRetrieveValues_MPIAIJ", 5093 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5094 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5095 "MatGetDiagonalBlock_MPIAIJ", 5096 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5098 "MatIsTranspose_MPIAIJ", 5099 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5100 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5101 "MatMPIAIJSetPreallocation_MPIAIJ", 5102 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5104 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5105 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5106 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5107 "MatDiagonalScaleLocal_MPIAIJ", 5108 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5110 "MatConvert_MPIAIJ_MPICSRPERM", 5111 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5112 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5113 "MatConvert_MPIAIJ_MPICRL", 5114 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5115 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5116 "MatMatMult_MPIDense_MPIAIJ", 5117 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5118 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5119 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5120 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5121 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5122 "MatMatMultNumeric_MPIDense_MPIAIJ", 5123 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5124 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5125 PetscFunctionReturn(0); 5126 } 5127 EXTERN_C_END 5128 5129 #undef __FUNCT__ 5130 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5131 /*@ 5132 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5133 and "off-diagonal" part of the matrix in CSR format. 5134 5135 Collective on MPI_Comm 5136 5137 Input Parameters: 5138 + comm - MPI communicator 5139 . m - number of local rows (Cannot be PETSC_DECIDE) 5140 . n - This value should be the same as the local size used in creating the 5141 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5142 calculated if N is given) For square matrices n is almost always m. 5143 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5144 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5145 . i - row indices for "diagonal" portion of matrix 5146 . j - column indices 5147 . a - matrix values 5148 . oi - row indices for "off-diagonal" portion of matrix 5149 . oj - column indices 5150 - oa - matrix values 5151 5152 Output Parameter: 5153 . mat - the matrix 5154 5155 Level: advanced 5156 5157 Notes: 5158 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5159 5160 The i and j indices are 0 based 5161 5162 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5163 5164 This sets local rows and cannot be used to set off-processor values. 5165 5166 You cannot later use MatSetValues() to change values in this matrix. 5167 5168 .keywords: matrix, aij, compressed row, sparse, parallel 5169 5170 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5171 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5172 @*/ 5173 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5174 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5175 { 5176 PetscErrorCode ierr; 5177 Mat_MPIAIJ *maij; 5178 5179 PetscFunctionBegin; 5180 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5181 if (i[0]) { 5182 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5183 } 5184 if (oi[0]) { 5185 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5186 } 5187 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5188 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5189 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5190 maij = (Mat_MPIAIJ*) (*mat)->data; 5191 maij->donotstash = PETSC_TRUE; 5192 (*mat)->preallocated = PETSC_TRUE; 5193 5194 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5195 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5196 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5197 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5198 5199 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5200 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5201 5202 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5203 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5204 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5205 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5206 5207 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5208 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5209 PetscFunctionReturn(0); 5210 } 5211 5212 /* 5213 Special version for direct calls from Fortran 5214 */ 5215 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5216 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5217 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5218 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5219 #endif 5220 5221 /* Change these macros so can be used in void function */ 5222 #undef CHKERRQ 5223 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5224 #undef SETERRQ2 5225 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5226 #undef SETERRQ 5227 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5228 5229 EXTERN_C_BEGIN 5230 #undef __FUNCT__ 5231 #define __FUNCT__ "matsetvaluesmpiaij_" 5232 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5233 { 5234 Mat mat = *mmat; 5235 PetscInt m = *mm, n = *mn; 5236 InsertMode addv = *maddv; 5237 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5238 PetscScalar value; 5239 PetscErrorCode ierr; 5240 5241 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5242 if (mat->insertmode == NOT_SET_VALUES) { 5243 mat->insertmode = addv; 5244 } 5245 #if defined(PETSC_USE_DEBUG) 5246 else if (mat->insertmode != addv) { 5247 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5248 } 5249 #endif 5250 { 5251 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5252 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5253 PetscTruth roworiented = aij->roworiented; 5254 5255 /* Some Variables required in the macro */ 5256 Mat A = aij->A; 5257 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5258 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5259 MatScalar *aa = a->a; 5260 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5261 Mat B = aij->B; 5262 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5263 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5264 MatScalar *ba = b->a; 5265 5266 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5267 PetscInt nonew = a->nonew; 5268 MatScalar *ap1,*ap2; 5269 5270 PetscFunctionBegin; 5271 for (i=0; i<m; i++) { 5272 if (im[i] < 0) continue; 5273 #if defined(PETSC_USE_DEBUG) 5274 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5275 #endif 5276 if (im[i] >= rstart && im[i] < rend) { 5277 row = im[i] - rstart; 5278 lastcol1 = -1; 5279 rp1 = aj + ai[row]; 5280 ap1 = aa + ai[row]; 5281 rmax1 = aimax[row]; 5282 nrow1 = ailen[row]; 5283 low1 = 0; 5284 high1 = nrow1; 5285 lastcol2 = -1; 5286 rp2 = bj + bi[row]; 5287 ap2 = ba + bi[row]; 5288 rmax2 = bimax[row]; 5289 nrow2 = bilen[row]; 5290 low2 = 0; 5291 high2 = nrow2; 5292 5293 for (j=0; j<n; j++) { 5294 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5295 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5296 if (in[j] >= cstart && in[j] < cend){ 5297 col = in[j] - cstart; 5298 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5299 } else if (in[j] < 0) continue; 5300 #if defined(PETSC_USE_DEBUG) 5301 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5302 #endif 5303 else { 5304 if (mat->was_assembled) { 5305 if (!aij->colmap) { 5306 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5307 } 5308 #if defined (PETSC_USE_CTABLE) 5309 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5310 col--; 5311 #else 5312 col = aij->colmap[in[j]] - 1; 5313 #endif 5314 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5315 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5316 col = in[j]; 5317 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5318 B = aij->B; 5319 b = (Mat_SeqAIJ*)B->data; 5320 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5321 rp2 = bj + bi[row]; 5322 ap2 = ba + bi[row]; 5323 rmax2 = bimax[row]; 5324 nrow2 = bilen[row]; 5325 low2 = 0; 5326 high2 = nrow2; 5327 bm = aij->B->rmap->n; 5328 ba = b->a; 5329 } 5330 } else col = in[j]; 5331 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5332 } 5333 } 5334 } else { 5335 if (!aij->donotstash) { 5336 if (roworiented) { 5337 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5338 } else { 5339 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5340 } 5341 } 5342 } 5343 }} 5344 PetscFunctionReturnVoid(); 5345 } 5346 EXTERN_C_END 5347 5348