1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatDistribute_MPIAIJ" 7 /* 8 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 9 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 10 11 Only for square matrices 12 */ 13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 14 { 15 PetscMPIInt rank,size; 16 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 17 PetscErrorCode ierr; 18 Mat mat; 19 Mat_SeqAIJ *gmata; 20 PetscMPIInt tag; 21 MPI_Status status; 22 PetscTruth aij; 23 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 24 25 PetscFunctionBegin; 26 CHKMEMQ; 27 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 28 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 29 if (!rank) { 30 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 31 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 32 } 33 if (reuse == MAT_INITIAL_MATRIX) { 34 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 35 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 36 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 37 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 38 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 39 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 40 rowners[0] = 0; 41 for (i=2; i<=size; i++) { 42 rowners[i] += rowners[i-1]; 43 } 44 rstart = rowners[rank]; 45 rend = rowners[rank+1]; 46 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 47 if (!rank) { 48 gmata = (Mat_SeqAIJ*) gmat->data; 49 /* send row lengths to all processors */ 50 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 51 for (i=1; i<size; i++) { 52 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 53 } 54 /* determine number diagonal and off-diagonal counts */ 55 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 56 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 57 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 58 jj = 0; 59 for (i=0; i<m; i++) { 60 for (j=0; j<dlens[i]; j++) { 61 if (gmata->j[jj] < rstart) ld[i]++; 62 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 63 jj++; 64 } 65 } 66 /* send column indices to other processes */ 67 for (i=1; i<size; i++) { 68 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 69 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 70 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 } 72 73 /* send numerical values to other processes */ 74 for (i=1; i<size; i++) { 75 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 76 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 77 } 78 gmataa = gmata->a; 79 gmataj = gmata->j; 80 81 } else { 82 /* receive row lengths */ 83 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 84 /* receive column indices */ 85 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 86 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 87 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 88 /* determine number diagonal and off-diagonal counts */ 89 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 90 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 91 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 92 jj = 0; 93 for (i=0; i<m; i++) { 94 for (j=0; j<dlens[i]; j++) { 95 if (gmataj[jj] < rstart) ld[i]++; 96 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 97 jj++; 98 } 99 } 100 /* receive numerical values */ 101 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 103 } 104 /* set preallocation */ 105 for (i=0; i<m; i++) { 106 dlens[i] -= olens[i]; 107 } 108 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 109 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 110 111 for (i=0; i<m; i++) { 112 dlens[i] += olens[i]; 113 } 114 cnt = 0; 115 for (i=0; i<m; i++) { 116 row = rstart + i; 117 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 118 cnt += dlens[i]; 119 } 120 if (rank) { 121 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 122 } 123 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 124 ierr = PetscFree(rowners);CHKERRQ(ierr); 125 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 126 *inmat = mat; 127 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 128 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 129 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 130 mat = *inmat; 131 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 132 if (!rank) { 133 /* send numerical values to other processes */ 134 gmata = (Mat_SeqAIJ*) gmat->data; 135 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 136 gmataa = gmata->a; 137 for (i=1; i<size; i++) { 138 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 139 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 140 } 141 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 142 } else { 143 /* receive numerical values from process 0*/ 144 nz = Ad->nz + Ao->nz; 145 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 146 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 147 } 148 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 149 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 150 ad = Ad->a; 151 ao = Ao->a; 152 if (mat->rmap->n) { 153 i = 0; 154 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 155 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 156 } 157 for (i=1; i<mat->rmap->n; i++) { 158 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 159 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 160 } 161 i--; 162 if (mat->rmap->n) { 163 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 164 } 165 if (rank) { 166 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 167 } 168 } 169 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 CHKMEMQ; 172 PetscFunctionReturn(0); 173 } 174 175 /* 176 Local utility routine that creates a mapping from the global column 177 number to the local number in the off-diagonal part of the local 178 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 179 a slightly higher hash table cost; without it it is not scalable (each processor 180 has an order N integer array but is fast to acess. 181 */ 182 #undef __FUNCT__ 183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 187 PetscErrorCode ierr; 188 PetscInt n = aij->B->cmap->n,i; 189 190 PetscFunctionBegin; 191 #if defined (PETSC_USE_CTABLE) 192 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 193 for (i=0; i<n; i++){ 194 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 195 } 196 #else 197 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 199 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 201 #endif 202 PetscFunctionReturn(0); 203 } 204 205 206 #define CHUNKSIZE 15 207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 208 { \ 209 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 210 lastcol1 = col;\ 211 while (high1-low1 > 5) { \ 212 t = (low1+high1)/2; \ 213 if (rp1[t] > col) high1 = t; \ 214 else low1 = t; \ 215 } \ 216 for (_i=low1; _i<high1; _i++) { \ 217 if (rp1[_i] > col) break; \ 218 if (rp1[_i] == col) { \ 219 if (addv == ADD_VALUES) ap1[_i] += value; \ 220 else ap1[_i] = value; \ 221 goto a_noinsert; \ 222 } \ 223 } \ 224 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 225 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 226 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 227 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 228 N = nrow1++ - 1; a->nz++; high1++; \ 229 /* shift up all the later entries in this row */ \ 230 for (ii=N; ii>=_i; ii--) { \ 231 rp1[ii+1] = rp1[ii]; \ 232 ap1[ii+1] = ap1[ii]; \ 233 } \ 234 rp1[_i] = col; \ 235 ap1[_i] = value; \ 236 a_noinsert: ; \ 237 ailen[row] = nrow1; \ 238 } 239 240 241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 242 { \ 243 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 244 lastcol2 = col;\ 245 while (high2-low2 > 5) { \ 246 t = (low2+high2)/2; \ 247 if (rp2[t] > col) high2 = t; \ 248 else low2 = t; \ 249 } \ 250 for (_i=low2; _i<high2; _i++) { \ 251 if (rp2[_i] > col) break; \ 252 if (rp2[_i] == col) { \ 253 if (addv == ADD_VALUES) ap2[_i] += value; \ 254 else ap2[_i] = value; \ 255 goto b_noinsert; \ 256 } \ 257 } \ 258 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 261 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 262 N = nrow2++ - 1; b->nz++; high2++; \ 263 /* shift up all the later entries in this row */ \ 264 for (ii=N; ii>=_i; ii--) { \ 265 rp2[ii+1] = rp2[ii]; \ 266 ap2[ii+1] = ap2[ii]; \ 267 } \ 268 rp2[_i] = col; \ 269 ap2[_i] = value; \ 270 b_noinsert: ; \ 271 bilen[row] = nrow2; \ 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 277 { 278 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 280 PetscErrorCode ierr; 281 PetscInt l,*garray = mat->garray,diag; 282 283 PetscFunctionBegin; 284 /* code only works for square matrices A */ 285 286 /* find size of row to the left of the diagonal part */ 287 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 288 row = row - diag; 289 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 290 if (garray[b->j[b->i[row]+l]] > diag) break; 291 } 292 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 293 294 /* diagonal part */ 295 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 296 297 /* right of diagonal part */ 298 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIAIJ" 304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscScalar value; 308 PetscErrorCode ierr; 309 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 310 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 311 PetscTruth roworiented = aij->roworiented; 312 313 /* Some Variables required in the macro */ 314 Mat A = aij->A; 315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 316 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 317 MatScalar *aa = a->a; 318 PetscTruth ignorezeroentries = a->ignorezeroentries; 319 Mat B = aij->B; 320 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 321 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 322 MatScalar *ba = b->a; 323 324 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 325 PetscInt nonew = a->nonew; 326 MatScalar *ap1,*ap2; 327 328 PetscFunctionBegin; 329 if (v) PetscValidScalarPointer(v,6); 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 544 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 550 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 551 { 552 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 553 PetscErrorCode ierr; 554 555 PetscFunctionBegin; 556 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 557 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatZeroRows_MPIAIJ" 563 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 564 { 565 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 568 PetscInt i,*owners = A->rmap->range; 569 PetscInt *nprocs,j,idx,nsends,row; 570 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 571 PetscInt *rvalues,count,base,slen,*source; 572 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 573 MPI_Comm comm = ((PetscObject)A)->comm; 574 MPI_Request *send_waits,*recv_waits; 575 MPI_Status recv_status,*send_status; 576 #if defined(PETSC_DEBUG) 577 PetscTruth found = PETSC_FALSE; 578 #endif 579 580 PetscFunctionBegin; 581 /* first count number of contributors to each processor */ 582 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 583 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 584 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 585 j = 0; 586 for (i=0; i<N; i++) { 587 if (lastidx > (idx = rows[i])) j = 0; 588 lastidx = idx; 589 for (; j<size; j++) { 590 if (idx >= owners[j] && idx < owners[j+1]) { 591 nprocs[2*j]++; 592 nprocs[2*j+1] = 1; 593 owner[i] = j; 594 #if defined(PETSC_DEBUG) 595 found = PETSC_TRUE; 596 #endif 597 break; 598 } 599 } 600 #if defined(PETSC_DEBUG) 601 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 602 found = PETSC_FALSE; 603 #endif 604 } 605 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 606 607 /* inform other processors of number of messages and max length*/ 608 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 609 610 /* post receives: */ 611 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 612 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 613 for (i=0; i<nrecvs; i++) { 614 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 615 } 616 617 /* do sends: 618 1) starts[i] gives the starting index in svalues for stuff going to 619 the ith processor 620 */ 621 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 622 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 623 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 624 starts[0] = 0; 625 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 626 for (i=0; i<N; i++) { 627 svalues[starts[owner[i]]++] = rows[i]; 628 } 629 630 starts[0] = 0; 631 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 632 count = 0; 633 for (i=0; i<size; i++) { 634 if (nprocs[2*i+1]) { 635 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 636 } 637 } 638 ierr = PetscFree(starts);CHKERRQ(ierr); 639 640 base = owners[rank]; 641 642 /* wait on receives */ 643 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 644 source = lens + nrecvs; 645 count = nrecvs; slen = 0; 646 while (count) { 647 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 648 /* unpack receives into our local space */ 649 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 650 source[imdex] = recv_status.MPI_SOURCE; 651 lens[imdex] = n; 652 slen += n; 653 count--; 654 } 655 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 656 657 /* move the data into the send scatter */ 658 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 659 count = 0; 660 for (i=0; i<nrecvs; i++) { 661 values = rvalues + i*nmax; 662 for (j=0; j<lens[i]; j++) { 663 lrows[count++] = values[j] - base; 664 } 665 } 666 ierr = PetscFree(rvalues);CHKERRQ(ierr); 667 ierr = PetscFree(lens);CHKERRQ(ierr); 668 ierr = PetscFree(owner);CHKERRQ(ierr); 669 ierr = PetscFree(nprocs);CHKERRQ(ierr); 670 671 /* actually zap the local rows */ 672 /* 673 Zero the required rows. If the "diagonal block" of the matrix 674 is square and the user wishes to set the diagonal we use separate 675 code so that MatSetValues() is not called for each diagonal allocating 676 new memory, thus calling lots of mallocs and slowing things down. 677 678 */ 679 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 680 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 681 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 682 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 683 } else if (diag != 0.0) { 684 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 685 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 686 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 687 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 688 } 689 for (i = 0; i < slen; i++) { 690 row = lrows[i] + rstart; 691 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 692 } 693 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 695 } else { 696 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 697 } 698 ierr = PetscFree(lrows);CHKERRQ(ierr); 699 700 /* wait on sends */ 701 if (nsends) { 702 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 703 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 704 ierr = PetscFree(send_status);CHKERRQ(ierr); 705 } 706 ierr = PetscFree(send_waits);CHKERRQ(ierr); 707 ierr = PetscFree(svalues);CHKERRQ(ierr); 708 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatMult_MPIAIJ" 714 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 715 { 716 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscInt nt; 719 720 PetscFunctionBegin; 721 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 722 if (nt != A->cmap->n) { 723 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 724 } 725 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 726 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 727 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 728 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 734 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 735 { 736 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 737 PetscErrorCode ierr; 738 739 PetscFunctionBegin; 740 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatMultAdd_MPIAIJ" 746 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 747 { 748 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 753 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 754 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 755 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 756 PetscFunctionReturn(0); 757 } 758 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 761 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 762 { 763 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 764 PetscErrorCode ierr; 765 PetscTruth merged; 766 767 PetscFunctionBegin; 768 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 769 /* do nondiagonal part */ 770 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 771 if (!merged) { 772 /* send it on its way */ 773 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 774 /* do local part */ 775 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 776 /* receive remote parts: note this assumes the values are not actually */ 777 /* added in yy until the next line, */ 778 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 779 } else { 780 /* do local part */ 781 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 782 /* send it on its way */ 783 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 784 /* values actually were received in the Begin() but we need to call this nop */ 785 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 786 } 787 PetscFunctionReturn(0); 788 } 789 790 EXTERN_C_BEGIN 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 793 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 794 { 795 MPI_Comm comm; 796 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 797 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 798 IS Me,Notme; 799 PetscErrorCode ierr; 800 PetscInt M,N,first,last,*notme,i; 801 PetscMPIInt size; 802 803 PetscFunctionBegin; 804 805 /* Easy test: symmetric diagonal block */ 806 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 807 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 808 if (!*f) PetscFunctionReturn(0); 809 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 810 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 811 if (size == 1) PetscFunctionReturn(0); 812 813 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 814 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 815 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 816 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 817 for (i=0; i<first; i++) notme[i] = i; 818 for (i=last; i<M; i++) notme[i-last+first] = i; 819 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 820 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 821 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 822 Aoff = Aoffs[0]; 823 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 824 Boff = Boffs[0]; 825 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 826 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 827 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 828 ierr = ISDestroy(Me);CHKERRQ(ierr); 829 ierr = ISDestroy(Notme);CHKERRQ(ierr); 830 831 PetscFunctionReturn(0); 832 } 833 EXTERN_C_END 834 835 #undef __FUNCT__ 836 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 837 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 838 { 839 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 840 PetscErrorCode ierr; 841 842 PetscFunctionBegin; 843 /* do nondiagonal part */ 844 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 845 /* send it on its way */ 846 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 847 /* do local part */ 848 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 849 /* receive remote parts */ 850 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 851 PetscFunctionReturn(0); 852 } 853 854 /* 855 This only works correctly for square matrices where the subblock A->A is the 856 diagonal block 857 */ 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 860 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 861 { 862 PetscErrorCode ierr; 863 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 864 865 PetscFunctionBegin; 866 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 867 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 868 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 869 } 870 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatScale_MPIAIJ" 876 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 877 { 878 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 883 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatDestroy_MPIAIJ" 889 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 890 { 891 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 892 PetscErrorCode ierr; 893 894 PetscFunctionBegin; 895 #if defined(PETSC_USE_LOG) 896 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 897 #endif 898 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 899 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 900 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 901 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 902 #if defined (PETSC_USE_CTABLE) 903 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 904 #else 905 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 906 #endif 907 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 908 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 909 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 910 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 911 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 912 ierr = PetscFree(aij);CHKERRQ(ierr); 913 914 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 922 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 923 PetscFunctionReturn(0); 924 } 925 926 #undef __FUNCT__ 927 #define __FUNCT__ "MatView_MPIAIJ_Binary" 928 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 929 { 930 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 931 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 932 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 933 PetscErrorCode ierr; 934 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 935 int fd; 936 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 937 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 938 PetscScalar *column_values; 939 940 PetscFunctionBegin; 941 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 942 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 943 nz = A->nz + B->nz; 944 if (!rank) { 945 header[0] = MAT_FILE_COOKIE; 946 header[1] = mat->rmap->N; 947 header[2] = mat->cmap->N; 948 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 949 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 950 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 951 /* get largest number of rows any processor has */ 952 rlen = mat->rmap->n; 953 range = mat->rmap->range; 954 for (i=1; i<size; i++) { 955 rlen = PetscMax(rlen,range[i+1] - range[i]); 956 } 957 } else { 958 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 959 rlen = mat->rmap->n; 960 } 961 962 /* load up the local row counts */ 963 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 964 for (i=0; i<mat->rmap->n; i++) { 965 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 966 } 967 968 /* store the row lengths to the file */ 969 if (!rank) { 970 MPI_Status status; 971 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 972 for (i=1; i<size; i++) { 973 rlen = range[i+1] - range[i]; 974 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 975 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 976 } 977 } else { 978 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 979 } 980 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 981 982 /* load up the local column indices */ 983 nzmax = nz; /* )th processor needs space a largest processor needs */ 984 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 985 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 986 cnt = 0; 987 for (i=0; i<mat->rmap->n; i++) { 988 for (j=B->i[i]; j<B->i[i+1]; j++) { 989 if ( (col = garray[B->j[j]]) > cstart) break; 990 column_indices[cnt++] = col; 991 } 992 for (k=A->i[i]; k<A->i[i+1]; k++) { 993 column_indices[cnt++] = A->j[k] + cstart; 994 } 995 for (; j<B->i[i+1]; j++) { 996 column_indices[cnt++] = garray[B->j[j]]; 997 } 998 } 999 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1000 1001 /* store the column indices to the file */ 1002 if (!rank) { 1003 MPI_Status status; 1004 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1005 for (i=1; i<size; i++) { 1006 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1007 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1008 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1009 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1010 } 1011 } else { 1012 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1013 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1014 } 1015 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1016 1017 /* load up the local column values */ 1018 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1019 cnt = 0; 1020 for (i=0; i<mat->rmap->n; i++) { 1021 for (j=B->i[i]; j<B->i[i+1]; j++) { 1022 if ( garray[B->j[j]] > cstart) break; 1023 column_values[cnt++] = B->a[j]; 1024 } 1025 for (k=A->i[i]; k<A->i[i+1]; k++) { 1026 column_values[cnt++] = A->a[k]; 1027 } 1028 for (; j<B->i[i+1]; j++) { 1029 column_values[cnt++] = B->a[j]; 1030 } 1031 } 1032 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1033 1034 /* store the column values to the file */ 1035 if (!rank) { 1036 MPI_Status status; 1037 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1038 for (i=1; i<size; i++) { 1039 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1040 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1041 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1042 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1043 } 1044 } else { 1045 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1046 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1047 } 1048 ierr = PetscFree(column_values);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1054 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1055 { 1056 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1057 PetscErrorCode ierr; 1058 PetscMPIInt rank = aij->rank,size = aij->size; 1059 PetscTruth isdraw,iascii,isbinary; 1060 PetscViewer sviewer; 1061 PetscViewerFormat format; 1062 1063 PetscFunctionBegin; 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1065 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1066 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1067 if (iascii) { 1068 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1069 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1070 MatInfo info; 1071 PetscTruth inodes; 1072 1073 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1074 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1075 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1076 if (!inodes) { 1077 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1078 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1079 } else { 1080 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1081 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1082 } 1083 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1084 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1085 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1086 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1087 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1089 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1092 PetscInt inodecount,inodelimit,*inodes; 1093 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1094 if (inodes) { 1095 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1096 } else { 1097 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1098 } 1099 PetscFunctionReturn(0); 1100 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1101 PetscFunctionReturn(0); 1102 } 1103 } else if (isbinary) { 1104 if (size == 1) { 1105 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1106 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1107 } else { 1108 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1109 } 1110 PetscFunctionReturn(0); 1111 } else if (isdraw) { 1112 PetscDraw draw; 1113 PetscTruth isnull; 1114 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1115 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1116 } 1117 1118 if (size == 1) { 1119 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1120 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1121 } else { 1122 /* assemble the entire matrix onto first processor. */ 1123 Mat A; 1124 Mat_SeqAIJ *Aloc; 1125 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1126 MatScalar *a; 1127 1128 if (mat->rmap->N > 1024) { 1129 PetscTruth flg = PETSC_FALSE; 1130 1131 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1132 if (!flg) { 1133 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1134 } 1135 } 1136 1137 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1138 if (!rank) { 1139 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1140 } else { 1141 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1142 } 1143 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1144 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1145 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1146 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1147 1148 /* copy over the A part */ 1149 Aloc = (Mat_SeqAIJ*)aij->A->data; 1150 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1151 row = mat->rmap->rstart; 1152 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1153 for (i=0; i<m; i++) { 1154 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1155 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1156 } 1157 aj = Aloc->j; 1158 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1159 1160 /* copy over the B part */ 1161 Aloc = (Mat_SeqAIJ*)aij->B->data; 1162 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1163 row = mat->rmap->rstart; 1164 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1165 ct = cols; 1166 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1167 for (i=0; i<m; i++) { 1168 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1169 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1170 } 1171 ierr = PetscFree(ct);CHKERRQ(ierr); 1172 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1174 /* 1175 Everyone has to call to draw the matrix since the graphics waits are 1176 synchronized across all processors that share the PetscDraw object 1177 */ 1178 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1179 if (!rank) { 1180 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1181 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1182 } 1183 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1184 ierr = MatDestroy(A);CHKERRQ(ierr); 1185 } 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "MatView_MPIAIJ" 1191 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1192 { 1193 PetscErrorCode ierr; 1194 PetscTruth iascii,isdraw,issocket,isbinary; 1195 1196 PetscFunctionBegin; 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1200 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1201 if (iascii || isdraw || isbinary || issocket) { 1202 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1203 } else { 1204 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1205 } 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatSOR_MPIAIJ" 1211 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1212 { 1213 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1214 PetscErrorCode ierr; 1215 Vec bb1 = 0; 1216 PetscTruth hasop; 1217 1218 PetscFunctionBegin; 1219 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1220 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1221 } 1222 1223 if (flag == SOR_APPLY_UPPER) { 1224 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1229 if (flag & SOR_ZERO_INITIAL_GUESS) { 1230 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1231 its--; 1232 } 1233 1234 while (its--) { 1235 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1236 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1237 1238 /* update rhs: bb1 = bb - B*x */ 1239 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1240 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1241 1242 /* local sweep */ 1243 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1244 } 1245 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1246 if (flag & SOR_ZERO_INITIAL_GUESS) { 1247 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1248 its--; 1249 } 1250 while (its--) { 1251 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1253 1254 /* update rhs: bb1 = bb - B*x */ 1255 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1256 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1257 1258 /* local sweep */ 1259 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1260 } 1261 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1262 if (flag & SOR_ZERO_INITIAL_GUESS) { 1263 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1264 its--; 1265 } 1266 while (its--) { 1267 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 1270 /* update rhs: bb1 = bb - B*x */ 1271 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1272 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1273 1274 /* local sweep */ 1275 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1276 } 1277 } else if (flag & SOR_EISENSTAT) { 1278 Vec xx1; 1279 1280 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1281 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1282 1283 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1285 if (!mat->diag) { 1286 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1287 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1288 } 1289 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1290 if (hasop) { 1291 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1292 } else { 1293 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1294 } 1295 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1296 1297 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1298 1299 /* local sweep */ 1300 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1301 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1302 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1303 } else { 1304 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1305 } 1306 1307 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatPermute_MPIAIJ" 1313 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1314 { 1315 MPI_Comm comm,pcomm; 1316 PetscInt first,local_size,nrows; 1317 const PetscInt *rows; 1318 PetscMPIInt size; 1319 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1324 /* make a collective version of 'rowp' */ 1325 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1326 if (pcomm==comm) { 1327 crowp = rowp; 1328 } else { 1329 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1330 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1331 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1332 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1333 } 1334 /* collect the global row permutation and invert it */ 1335 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1336 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1337 if (pcomm!=comm) { 1338 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1339 } 1340 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1341 /* get the local target indices */ 1342 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1343 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1344 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1345 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1346 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1347 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1348 /* the column permutation is so much easier; 1349 make a local version of 'colp' and invert it */ 1350 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1351 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1352 if (size==1) { 1353 lcolp = colp; 1354 } else { 1355 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1356 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1357 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1358 } 1359 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1360 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1361 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1362 if (size>1) { 1363 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1364 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1365 } 1366 /* now we just get the submatrix */ 1367 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1368 /* clean up */ 1369 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1370 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1376 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1377 { 1378 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1379 Mat A = mat->A,B = mat->B; 1380 PetscErrorCode ierr; 1381 PetscReal isend[5],irecv[5]; 1382 1383 PetscFunctionBegin; 1384 info->block_size = 1.0; 1385 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1386 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1387 isend[3] = info->memory; isend[4] = info->mallocs; 1388 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1389 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1390 isend[3] += info->memory; isend[4] += info->mallocs; 1391 if (flag == MAT_LOCAL) { 1392 info->nz_used = isend[0]; 1393 info->nz_allocated = isend[1]; 1394 info->nz_unneeded = isend[2]; 1395 info->memory = isend[3]; 1396 info->mallocs = isend[4]; 1397 } else if (flag == MAT_GLOBAL_MAX) { 1398 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1399 info->nz_used = irecv[0]; 1400 info->nz_allocated = irecv[1]; 1401 info->nz_unneeded = irecv[2]; 1402 info->memory = irecv[3]; 1403 info->mallocs = irecv[4]; 1404 } else if (flag == MAT_GLOBAL_SUM) { 1405 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1406 info->nz_used = irecv[0]; 1407 info->nz_allocated = irecv[1]; 1408 info->nz_unneeded = irecv[2]; 1409 info->memory = irecv[3]; 1410 info->mallocs = irecv[4]; 1411 } 1412 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1413 info->fill_ratio_needed = 0; 1414 info->factor_mallocs = 0; 1415 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "MatSetOption_MPIAIJ" 1421 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1422 { 1423 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1424 PetscErrorCode ierr; 1425 1426 PetscFunctionBegin; 1427 switch (op) { 1428 case MAT_NEW_NONZERO_LOCATIONS: 1429 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1430 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1431 case MAT_KEEP_NONZERO_PATTERN: 1432 case MAT_NEW_NONZERO_LOCATION_ERR: 1433 case MAT_USE_INODES: 1434 case MAT_IGNORE_ZERO_ENTRIES: 1435 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1436 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1437 break; 1438 case MAT_ROW_ORIENTED: 1439 a->roworiented = flg; 1440 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1441 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1442 break; 1443 case MAT_NEW_DIAGONALS: 1444 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1445 break; 1446 case MAT_IGNORE_OFF_PROC_ENTRIES: 1447 a->donotstash = PETSC_TRUE; 1448 break; 1449 case MAT_SYMMETRIC: 1450 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1451 break; 1452 case MAT_STRUCTURALLY_SYMMETRIC: 1453 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1454 break; 1455 case MAT_HERMITIAN: 1456 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1457 break; 1458 case MAT_SYMMETRY_ETERNAL: 1459 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1460 break; 1461 default: 1462 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1463 } 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatGetRow_MPIAIJ" 1469 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1470 { 1471 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1472 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1473 PetscErrorCode ierr; 1474 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1475 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1476 PetscInt *cmap,*idx_p; 1477 1478 PetscFunctionBegin; 1479 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1480 mat->getrowactive = PETSC_TRUE; 1481 1482 if (!mat->rowvalues && (idx || v)) { 1483 /* 1484 allocate enough space to hold information from the longest row. 1485 */ 1486 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1487 PetscInt max = 1,tmp; 1488 for (i=0; i<matin->rmap->n; i++) { 1489 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1490 if (max < tmp) { max = tmp; } 1491 } 1492 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1493 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1494 } 1495 1496 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1497 lrow = row - rstart; 1498 1499 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1500 if (!v) {pvA = 0; pvB = 0;} 1501 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1502 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1503 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1504 nztot = nzA + nzB; 1505 1506 cmap = mat->garray; 1507 if (v || idx) { 1508 if (nztot) { 1509 /* Sort by increasing column numbers, assuming A and B already sorted */ 1510 PetscInt imark = -1; 1511 if (v) { 1512 *v = v_p = mat->rowvalues; 1513 for (i=0; i<nzB; i++) { 1514 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1515 else break; 1516 } 1517 imark = i; 1518 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1519 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1520 } 1521 if (idx) { 1522 *idx = idx_p = mat->rowindices; 1523 if (imark > -1) { 1524 for (i=0; i<imark; i++) { 1525 idx_p[i] = cmap[cworkB[i]]; 1526 } 1527 } else { 1528 for (i=0; i<nzB; i++) { 1529 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1530 else break; 1531 } 1532 imark = i; 1533 } 1534 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1535 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1536 } 1537 } else { 1538 if (idx) *idx = 0; 1539 if (v) *v = 0; 1540 } 1541 } 1542 *nz = nztot; 1543 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1544 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1545 PetscFunctionReturn(0); 1546 } 1547 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1550 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1551 { 1552 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1553 1554 PetscFunctionBegin; 1555 if (!aij->getrowactive) { 1556 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1557 } 1558 aij->getrowactive = PETSC_FALSE; 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatNorm_MPIAIJ" 1564 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1565 { 1566 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1567 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1568 PetscErrorCode ierr; 1569 PetscInt i,j,cstart = mat->cmap->rstart; 1570 PetscReal sum = 0.0; 1571 MatScalar *v; 1572 1573 PetscFunctionBegin; 1574 if (aij->size == 1) { 1575 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1576 } else { 1577 if (type == NORM_FROBENIUS) { 1578 v = amat->a; 1579 for (i=0; i<amat->nz; i++) { 1580 #if defined(PETSC_USE_COMPLEX) 1581 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1582 #else 1583 sum += (*v)*(*v); v++; 1584 #endif 1585 } 1586 v = bmat->a; 1587 for (i=0; i<bmat->nz; i++) { 1588 #if defined(PETSC_USE_COMPLEX) 1589 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1590 #else 1591 sum += (*v)*(*v); v++; 1592 #endif 1593 } 1594 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1595 *norm = sqrt(*norm); 1596 } else if (type == NORM_1) { /* max column norm */ 1597 PetscReal *tmp,*tmp2; 1598 PetscInt *jj,*garray = aij->garray; 1599 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1600 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1601 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1602 *norm = 0.0; 1603 v = amat->a; jj = amat->j; 1604 for (j=0; j<amat->nz; j++) { 1605 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1606 } 1607 v = bmat->a; jj = bmat->j; 1608 for (j=0; j<bmat->nz; j++) { 1609 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1610 } 1611 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1612 for (j=0; j<mat->cmap->N; j++) { 1613 if (tmp2[j] > *norm) *norm = tmp2[j]; 1614 } 1615 ierr = PetscFree(tmp);CHKERRQ(ierr); 1616 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1617 } else if (type == NORM_INFINITY) { /* max row norm */ 1618 PetscReal ntemp = 0.0; 1619 for (j=0; j<aij->A->rmap->n; j++) { 1620 v = amat->a + amat->i[j]; 1621 sum = 0.0; 1622 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1623 sum += PetscAbsScalar(*v); v++; 1624 } 1625 v = bmat->a + bmat->i[j]; 1626 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1627 sum += PetscAbsScalar(*v); v++; 1628 } 1629 if (sum > ntemp) ntemp = sum; 1630 } 1631 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1632 } else { 1633 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1634 } 1635 } 1636 PetscFunctionReturn(0); 1637 } 1638 1639 #undef __FUNCT__ 1640 #define __FUNCT__ "MatTranspose_MPIAIJ" 1641 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1642 { 1643 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1644 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1645 PetscErrorCode ierr; 1646 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1647 PetscInt cstart=A->cmap->rstart,ncol; 1648 Mat B; 1649 MatScalar *array; 1650 1651 PetscFunctionBegin; 1652 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1653 1654 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1655 ai = Aloc->i; aj = Aloc->j; 1656 bi = Bloc->i; bj = Bloc->j; 1657 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1658 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1659 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1660 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1661 for (i=0; i<ai[ma]; i++){ 1662 d_nnz[aj[i]] ++; 1663 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1664 } 1665 1666 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1667 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1668 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1669 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1670 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1671 } else { 1672 B = *matout; 1673 } 1674 1675 /* copy over the A part */ 1676 array = Aloc->a; 1677 row = A->rmap->rstart; 1678 for (i=0; i<ma; i++) { 1679 ncol = ai[i+1]-ai[i]; 1680 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1681 row++; array += ncol; aj += ncol; 1682 } 1683 aj = Aloc->j; 1684 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1685 1686 /* copy over the B part */ 1687 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1688 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1689 array = Bloc->a; 1690 row = A->rmap->rstart; 1691 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1692 cols_tmp = cols; 1693 for (i=0; i<mb; i++) { 1694 ncol = bi[i+1]-bi[i]; 1695 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1696 row++; array += ncol; cols_tmp += ncol; 1697 } 1698 ierr = PetscFree(cols);CHKERRQ(ierr); 1699 1700 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1701 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1702 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1703 *matout = B; 1704 } else { 1705 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1706 } 1707 PetscFunctionReturn(0); 1708 } 1709 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1712 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1713 { 1714 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1715 Mat a = aij->A,b = aij->B; 1716 PetscErrorCode ierr; 1717 PetscInt s1,s2,s3; 1718 1719 PetscFunctionBegin; 1720 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1721 if (rr) { 1722 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1723 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1724 /* Overlap communication with computation. */ 1725 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1726 } 1727 if (ll) { 1728 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1729 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1730 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1731 } 1732 /* scale the diagonal block */ 1733 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1734 1735 if (rr) { 1736 /* Do a scatter end and then right scale the off-diagonal block */ 1737 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1738 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1739 } 1740 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1746 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1747 { 1748 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1749 PetscErrorCode ierr; 1750 1751 PetscFunctionBegin; 1752 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1753 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1754 PetscFunctionReturn(0); 1755 } 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1758 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1759 { 1760 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1761 PetscErrorCode ierr; 1762 1763 PetscFunctionBegin; 1764 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1765 PetscFunctionReturn(0); 1766 } 1767 1768 #undef __FUNCT__ 1769 #define __FUNCT__ "MatEqual_MPIAIJ" 1770 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1771 { 1772 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1773 Mat a,b,c,d; 1774 PetscTruth flg; 1775 PetscErrorCode ierr; 1776 1777 PetscFunctionBegin; 1778 a = matA->A; b = matA->B; 1779 c = matB->A; d = matB->B; 1780 1781 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1782 if (flg) { 1783 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1784 } 1785 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1786 PetscFunctionReturn(0); 1787 } 1788 1789 #undef __FUNCT__ 1790 #define __FUNCT__ "MatCopy_MPIAIJ" 1791 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1792 { 1793 PetscErrorCode ierr; 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1795 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1796 1797 PetscFunctionBegin; 1798 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1799 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1800 /* because of the column compression in the off-processor part of the matrix a->B, 1801 the number of columns in a->B and b->B may be different, hence we cannot call 1802 the MatCopy() directly on the two parts. If need be, we can provide a more 1803 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1804 then copying the submatrices */ 1805 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1806 } else { 1807 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1808 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1809 } 1810 PetscFunctionReturn(0); 1811 } 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1815 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1816 { 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 #include "petscblaslapack.h" 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatAXPY_MPIAIJ" 1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1828 { 1829 PetscErrorCode ierr; 1830 PetscInt i; 1831 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1832 PetscBLASInt bnz,one=1; 1833 Mat_SeqAIJ *x,*y; 1834 1835 PetscFunctionBegin; 1836 if (str == SAME_NONZERO_PATTERN) { 1837 PetscScalar alpha = a; 1838 x = (Mat_SeqAIJ *)xx->A->data; 1839 y = (Mat_SeqAIJ *)yy->A->data; 1840 bnz = PetscBLASIntCast(x->nz); 1841 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1842 x = (Mat_SeqAIJ *)xx->B->data; 1843 y = (Mat_SeqAIJ *)yy->B->data; 1844 bnz = PetscBLASIntCast(x->nz); 1845 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1846 } else if (str == SUBSET_NONZERO_PATTERN) { 1847 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1848 1849 x = (Mat_SeqAIJ *)xx->B->data; 1850 y = (Mat_SeqAIJ *)yy->B->data; 1851 if (y->xtoy && y->XtoY != xx->B) { 1852 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1853 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1854 } 1855 if (!y->xtoy) { /* get xtoy */ 1856 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1857 y->XtoY = xx->B; 1858 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1859 } 1860 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1861 } else { 1862 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1863 } 1864 PetscFunctionReturn(0); 1865 } 1866 1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatConjugate_MPIAIJ" 1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1872 { 1873 #if defined(PETSC_USE_COMPLEX) 1874 PetscErrorCode ierr; 1875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1876 1877 PetscFunctionBegin; 1878 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1879 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1880 #else 1881 PetscFunctionBegin; 1882 #endif 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatRealPart_MPIAIJ" 1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1889 { 1890 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1891 PetscErrorCode ierr; 1892 1893 PetscFunctionBegin; 1894 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1895 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1902 { 1903 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1908 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #ifdef PETSC_HAVE_PBGL 1913 1914 #include <boost/parallel/mpi/bsp_process_group.hpp> 1915 #include <boost/graph/distributed/ilu_default_graph.hpp> 1916 #include <boost/graph/distributed/ilu_0_block.hpp> 1917 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1918 #include <boost/graph/distributed/petsc/interface.hpp> 1919 #include <boost/multi_array.hpp> 1920 #include <boost/parallel/distributed_property_map->hpp> 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1924 /* 1925 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1926 */ 1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1928 { 1929 namespace petsc = boost::distributed::petsc; 1930 1931 namespace graph_dist = boost::graph::distributed; 1932 using boost::graph::distributed::ilu_default::process_group_type; 1933 using boost::graph::ilu_permuted; 1934 1935 PetscTruth row_identity, col_identity; 1936 PetscContainer c; 1937 PetscInt m, n, M, N; 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1942 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1943 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1944 if (!row_identity || !col_identity) { 1945 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1946 } 1947 1948 process_group_type pg; 1949 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1950 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1951 lgraph_type& level_graph = *lgraph_p; 1952 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1953 1954 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1955 ilu_permuted(level_graph); 1956 1957 /* put together the new matrix */ 1958 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1959 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1960 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1961 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1962 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1963 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1965 1966 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1967 ierr = PetscContainerSetPointer(c, lgraph_p); 1968 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1975 { 1976 PetscFunctionBegin; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatSolve_MPIAIJ" 1982 /* 1983 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1984 */ 1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1986 { 1987 namespace graph_dist = boost::graph::distributed; 1988 1989 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1990 lgraph_type* lgraph_p; 1991 PetscContainer c; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1996 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1997 ierr = VecCopy(b, x);CHKERRQ(ierr); 1998 1999 PetscScalar* array_x; 2000 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2001 PetscInt sx; 2002 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2003 2004 PetscScalar* array_b; 2005 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2006 PetscInt sb; 2007 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2008 2009 lgraph_type& level_graph = *lgraph_p; 2010 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2011 2012 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2013 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2014 ref_x(array_x, boost::extents[num_vertices(graph)]); 2015 2016 typedef boost::iterator_property_map<array_ref_type::iterator, 2017 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2018 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2019 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2020 2021 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2022 2023 PetscFunctionReturn(0); 2024 } 2025 #endif 2026 2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2028 PetscInt nzlocal,nsends,nrecvs; 2029 PetscMPIInt *send_rank; 2030 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 2031 PetscScalar *sbuf_a,**rbuf_a; 2032 PetscErrorCode (*MatDestroy)(Mat); 2033 } Mat_Redundant; 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2038 { 2039 PetscErrorCode ierr; 2040 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2041 PetscInt i; 2042 2043 PetscFunctionBegin; 2044 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 2045 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2046 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2047 for (i=0; i<redund->nrecvs; i++){ 2048 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2049 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2050 } 2051 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2052 ierr = PetscFree(redund);CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatDestroy_MatRedundant" 2058 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2059 { 2060 PetscErrorCode ierr; 2061 PetscContainer container; 2062 Mat_Redundant *redund=PETSC_NULL; 2063 2064 PetscFunctionBegin; 2065 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2066 if (container) { 2067 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2068 } else { 2069 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2070 } 2071 A->ops->destroy = redund->MatDestroy; 2072 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2073 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2074 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2081 { 2082 PetscMPIInt rank,size; 2083 MPI_Comm comm=((PetscObject)mat)->comm; 2084 PetscErrorCode ierr; 2085 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2086 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2087 PetscInt *rowrange=mat->rmap->range; 2088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2089 Mat A=aij->A,B=aij->B,C=*matredundant; 2090 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2091 PetscScalar *sbuf_a; 2092 PetscInt nzlocal=a->nz+b->nz; 2093 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2094 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2095 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2096 MatScalar *aworkA,*aworkB; 2097 PetscScalar *vals; 2098 PetscMPIInt tag1,tag2,tag3,imdex; 2099 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2100 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2101 MPI_Status recv_status,*send_status; 2102 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2103 PetscInt **rbuf_j=PETSC_NULL; 2104 PetscScalar **rbuf_a=PETSC_NULL; 2105 Mat_Redundant *redund=PETSC_NULL; 2106 PetscContainer container; 2107 2108 PetscFunctionBegin; 2109 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2110 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2111 2112 if (reuse == MAT_REUSE_MATRIX) { 2113 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2114 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2115 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2116 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2117 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2118 if (container) { 2119 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2120 } else { 2121 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2122 } 2123 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2124 2125 nsends = redund->nsends; 2126 nrecvs = redund->nrecvs; 2127 send_rank = redund->send_rank; recv_rank = send_rank + size; 2128 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2129 sbuf_j = redund->sbuf_j; 2130 sbuf_a = redund->sbuf_a; 2131 rbuf_j = redund->rbuf_j; 2132 rbuf_a = redund->rbuf_a; 2133 } 2134 2135 if (reuse == MAT_INITIAL_MATRIX){ 2136 PetscMPIInt subrank,subsize; 2137 PetscInt nleftover,np_subcomm; 2138 /* get the destination processors' id send_rank, nsends and nrecvs */ 2139 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2140 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2141 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2142 recv_rank = send_rank + size; 2143 np_subcomm = size/nsubcomm; 2144 nleftover = size - nsubcomm*np_subcomm; 2145 nsends = 0; nrecvs = 0; 2146 for (i=0; i<size; i++){ /* i=rank*/ 2147 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2148 send_rank[nsends] = i; nsends++; 2149 recv_rank[nrecvs++] = i; 2150 } 2151 } 2152 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2153 i = size-nleftover-1; 2154 j = 0; 2155 while (j < nsubcomm - nleftover){ 2156 send_rank[nsends++] = i; 2157 i--; j++; 2158 } 2159 } 2160 2161 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2162 for (i=0; i<nleftover; i++){ 2163 recv_rank[nrecvs++] = size-nleftover+i; 2164 } 2165 } 2166 2167 /* allocate sbuf_j, sbuf_a */ 2168 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2169 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2170 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2171 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2172 2173 /* copy mat's local entries into the buffers */ 2174 if (reuse == MAT_INITIAL_MATRIX){ 2175 rownz_max = 0; 2176 rptr = sbuf_j; 2177 cols = sbuf_j + rend-rstart + 1; 2178 vals = sbuf_a; 2179 rptr[0] = 0; 2180 for (i=0; i<rend-rstart; i++){ 2181 row = i + rstart; 2182 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2183 ncols = nzA + nzB; 2184 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2185 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2186 /* load the column indices for this row into cols */ 2187 lwrite = 0; 2188 for (l=0; l<nzB; l++) { 2189 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2190 vals[lwrite] = aworkB[l]; 2191 cols[lwrite++] = ctmp; 2192 } 2193 } 2194 for (l=0; l<nzA; l++){ 2195 vals[lwrite] = aworkA[l]; 2196 cols[lwrite++] = cstart + cworkA[l]; 2197 } 2198 for (l=0; l<nzB; l++) { 2199 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2200 vals[lwrite] = aworkB[l]; 2201 cols[lwrite++] = ctmp; 2202 } 2203 } 2204 vals += ncols; 2205 cols += ncols; 2206 rptr[i+1] = rptr[i] + ncols; 2207 if (rownz_max < ncols) rownz_max = ncols; 2208 } 2209 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2210 } else { /* only copy matrix values into sbuf_a */ 2211 rptr = sbuf_j; 2212 vals = sbuf_a; 2213 rptr[0] = 0; 2214 for (i=0; i<rend-rstart; i++){ 2215 row = i + rstart; 2216 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2217 ncols = nzA + nzB; 2218 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2219 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2220 lwrite = 0; 2221 for (l=0; l<nzB; l++) { 2222 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2223 } 2224 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2225 for (l=0; l<nzB; l++) { 2226 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2227 } 2228 vals += ncols; 2229 rptr[i+1] = rptr[i] + ncols; 2230 } 2231 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2232 2233 /* send nzlocal to others, and recv other's nzlocal */ 2234 /*--------------------------------------------------*/ 2235 if (reuse == MAT_INITIAL_MATRIX){ 2236 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2237 s_waits2 = s_waits3 + nsends; 2238 s_waits1 = s_waits2 + nsends; 2239 r_waits1 = s_waits1 + nsends; 2240 r_waits2 = r_waits1 + nrecvs; 2241 r_waits3 = r_waits2 + nrecvs; 2242 } else { 2243 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2244 r_waits3 = s_waits3 + nsends; 2245 } 2246 2247 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2248 if (reuse == MAT_INITIAL_MATRIX){ 2249 /* get new tags to keep the communication clean */ 2250 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2251 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2252 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2253 rbuf_nz = sbuf_nz + nsends; 2254 2255 /* post receives of other's nzlocal */ 2256 for (i=0; i<nrecvs; i++){ 2257 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2258 } 2259 /* send nzlocal to others */ 2260 for (i=0; i<nsends; i++){ 2261 sbuf_nz[i] = nzlocal; 2262 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2263 } 2264 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2265 count = nrecvs; 2266 while (count) { 2267 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2268 recv_rank[imdex] = recv_status.MPI_SOURCE; 2269 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2270 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2271 2272 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2273 rbuf_nz[imdex] += i + 2; 2274 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2275 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2276 count--; 2277 } 2278 /* wait on sends of nzlocal */ 2279 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2280 /* send mat->i,j to others, and recv from other's */ 2281 /*------------------------------------------------*/ 2282 for (i=0; i<nsends; i++){ 2283 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2284 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2285 } 2286 /* wait on receives of mat->i,j */ 2287 /*------------------------------*/ 2288 count = nrecvs; 2289 while (count) { 2290 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2291 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2292 count--; 2293 } 2294 /* wait on sends of mat->i,j */ 2295 /*---------------------------*/ 2296 if (nsends) { 2297 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2298 } 2299 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2300 2301 /* post receives, send and receive mat->a */ 2302 /*----------------------------------------*/ 2303 for (imdex=0; imdex<nrecvs; imdex++) { 2304 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2305 } 2306 for (i=0; i<nsends; i++){ 2307 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2308 } 2309 count = nrecvs; 2310 while (count) { 2311 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2312 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2313 count--; 2314 } 2315 if (nsends) { 2316 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2317 } 2318 2319 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2320 2321 /* create redundant matrix */ 2322 /*-------------------------*/ 2323 if (reuse == MAT_INITIAL_MATRIX){ 2324 /* compute rownz_max for preallocation */ 2325 for (imdex=0; imdex<nrecvs; imdex++){ 2326 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2327 rptr = rbuf_j[imdex]; 2328 for (i=0; i<j; i++){ 2329 ncols = rptr[i+1] - rptr[i]; 2330 if (rownz_max < ncols) rownz_max = ncols; 2331 } 2332 } 2333 2334 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2335 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2336 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2337 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2338 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2339 } else { 2340 C = *matredundant; 2341 } 2342 2343 /* insert local matrix entries */ 2344 rptr = sbuf_j; 2345 cols = sbuf_j + rend-rstart + 1; 2346 vals = sbuf_a; 2347 for (i=0; i<rend-rstart; i++){ 2348 row = i + rstart; 2349 ncols = rptr[i+1] - rptr[i]; 2350 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2351 vals += ncols; 2352 cols += ncols; 2353 } 2354 /* insert received matrix entries */ 2355 for (imdex=0; imdex<nrecvs; imdex++){ 2356 rstart = rowrange[recv_rank[imdex]]; 2357 rend = rowrange[recv_rank[imdex]+1]; 2358 rptr = rbuf_j[imdex]; 2359 cols = rbuf_j[imdex] + rend-rstart + 1; 2360 vals = rbuf_a[imdex]; 2361 for (i=0; i<rend-rstart; i++){ 2362 row = i + rstart; 2363 ncols = rptr[i+1] - rptr[i]; 2364 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2365 vals += ncols; 2366 cols += ncols; 2367 } 2368 } 2369 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2370 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2371 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2372 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2373 if (reuse == MAT_INITIAL_MATRIX){ 2374 PetscContainer container; 2375 *matredundant = C; 2376 /* create a supporting struct and attach it to C for reuse */ 2377 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2378 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2379 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2380 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2381 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2382 2383 redund->nzlocal = nzlocal; 2384 redund->nsends = nsends; 2385 redund->nrecvs = nrecvs; 2386 redund->send_rank = send_rank; 2387 redund->sbuf_nz = sbuf_nz; 2388 redund->sbuf_j = sbuf_j; 2389 redund->sbuf_a = sbuf_a; 2390 redund->rbuf_j = rbuf_j; 2391 redund->rbuf_a = rbuf_a; 2392 2393 redund->MatDestroy = C->ops->destroy; 2394 C->ops->destroy = MatDestroy_MatRedundant; 2395 } 2396 PetscFunctionReturn(0); 2397 } 2398 2399 #undef __FUNCT__ 2400 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2401 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2402 { 2403 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2404 PetscErrorCode ierr; 2405 PetscInt i,*idxb = 0; 2406 PetscScalar *va,*vb; 2407 Vec vtmp; 2408 2409 PetscFunctionBegin; 2410 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2411 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2412 if (idx) { 2413 for (i=0; i<A->rmap->n; i++) { 2414 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2415 } 2416 } 2417 2418 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2419 if (idx) { 2420 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2421 } 2422 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2423 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2424 2425 for (i=0; i<A->rmap->n; i++){ 2426 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2427 va[i] = vb[i]; 2428 if (idx) idx[i] = a->garray[idxb[i]]; 2429 } 2430 } 2431 2432 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2433 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2434 if (idxb) { 2435 ierr = PetscFree(idxb);CHKERRQ(ierr); 2436 } 2437 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2438 PetscFunctionReturn(0); 2439 } 2440 2441 #undef __FUNCT__ 2442 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2443 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2444 { 2445 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2446 PetscErrorCode ierr; 2447 PetscInt i,*idxb = 0; 2448 PetscScalar *va,*vb; 2449 Vec vtmp; 2450 2451 PetscFunctionBegin; 2452 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2453 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2454 if (idx) { 2455 for (i=0; i<A->cmap->n; i++) { 2456 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2457 } 2458 } 2459 2460 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2461 if (idx) { 2462 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2463 } 2464 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2465 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2466 2467 for (i=0; i<A->rmap->n; i++){ 2468 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2469 va[i] = vb[i]; 2470 if (idx) idx[i] = a->garray[idxb[i]]; 2471 } 2472 } 2473 2474 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2475 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2476 if (idxb) { 2477 ierr = PetscFree(idxb);CHKERRQ(ierr); 2478 } 2479 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2480 PetscFunctionReturn(0); 2481 } 2482 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2485 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2486 { 2487 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2488 PetscInt n = A->rmap->n; 2489 PetscInt cstart = A->cmap->rstart; 2490 PetscInt *cmap = mat->garray; 2491 PetscInt *diagIdx, *offdiagIdx; 2492 Vec diagV, offdiagV; 2493 PetscScalar *a, *diagA, *offdiagA; 2494 PetscInt r; 2495 PetscErrorCode ierr; 2496 2497 PetscFunctionBegin; 2498 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2499 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2500 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2501 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2502 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2503 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2504 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2505 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2506 for(r = 0; r < n; ++r) { 2507 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2508 a[r] = diagA[r]; 2509 idx[r] = cstart + diagIdx[r]; 2510 } else { 2511 a[r] = offdiagA[r]; 2512 idx[r] = cmap[offdiagIdx[r]]; 2513 } 2514 } 2515 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2516 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2517 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2518 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2519 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2520 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2521 PetscFunctionReturn(0); 2522 } 2523 2524 #undef __FUNCT__ 2525 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2526 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2527 { 2528 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2529 PetscInt n = A->rmap->n; 2530 PetscInt cstart = A->cmap->rstart; 2531 PetscInt *cmap = mat->garray; 2532 PetscInt *diagIdx, *offdiagIdx; 2533 Vec diagV, offdiagV; 2534 PetscScalar *a, *diagA, *offdiagA; 2535 PetscInt r; 2536 PetscErrorCode ierr; 2537 2538 PetscFunctionBegin; 2539 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2540 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2541 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2542 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2543 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2544 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2545 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2546 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2547 for(r = 0; r < n; ++r) { 2548 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2549 a[r] = diagA[r]; 2550 idx[r] = cstart + diagIdx[r]; 2551 } else { 2552 a[r] = offdiagA[r]; 2553 idx[r] = cmap[offdiagIdx[r]]; 2554 } 2555 } 2556 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2557 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2558 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2559 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2560 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2561 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2562 PetscFunctionReturn(0); 2563 } 2564 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2567 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2568 { 2569 PetscErrorCode ierr; 2570 Mat *dummy; 2571 2572 PetscFunctionBegin; 2573 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2574 *newmat = *dummy; 2575 ierr = PetscFree(dummy);CHKERRQ(ierr); 2576 PetscFunctionReturn(0); 2577 } 2578 2579 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2580 /* -------------------------------------------------------------------*/ 2581 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2582 MatGetRow_MPIAIJ, 2583 MatRestoreRow_MPIAIJ, 2584 MatMult_MPIAIJ, 2585 /* 4*/ MatMultAdd_MPIAIJ, 2586 MatMultTranspose_MPIAIJ, 2587 MatMultTransposeAdd_MPIAIJ, 2588 #ifdef PETSC_HAVE_PBGL 2589 MatSolve_MPIAIJ, 2590 #else 2591 0, 2592 #endif 2593 0, 2594 0, 2595 /*10*/ 0, 2596 0, 2597 0, 2598 MatSOR_MPIAIJ, 2599 MatTranspose_MPIAIJ, 2600 /*15*/ MatGetInfo_MPIAIJ, 2601 MatEqual_MPIAIJ, 2602 MatGetDiagonal_MPIAIJ, 2603 MatDiagonalScale_MPIAIJ, 2604 MatNorm_MPIAIJ, 2605 /*20*/ MatAssemblyBegin_MPIAIJ, 2606 MatAssemblyEnd_MPIAIJ, 2607 MatSetOption_MPIAIJ, 2608 MatZeroEntries_MPIAIJ, 2609 /*24*/ MatZeroRows_MPIAIJ, 2610 0, 2611 #ifdef PETSC_HAVE_PBGL 2612 0, 2613 #else 2614 0, 2615 #endif 2616 0, 2617 0, 2618 /*29*/ MatSetUpPreallocation_MPIAIJ, 2619 #ifdef PETSC_HAVE_PBGL 2620 0, 2621 #else 2622 0, 2623 #endif 2624 0, 2625 0, 2626 0, 2627 /*34*/ MatDuplicate_MPIAIJ, 2628 0, 2629 0, 2630 0, 2631 0, 2632 /*39*/ MatAXPY_MPIAIJ, 2633 MatGetSubMatrices_MPIAIJ, 2634 MatIncreaseOverlap_MPIAIJ, 2635 MatGetValues_MPIAIJ, 2636 MatCopy_MPIAIJ, 2637 /*44*/ MatGetRowMax_MPIAIJ, 2638 MatScale_MPIAIJ, 2639 0, 2640 0, 2641 0, 2642 /*49*/ MatSetBlockSize_MPIAIJ, 2643 0, 2644 0, 2645 0, 2646 0, 2647 /*54*/ MatFDColoringCreate_MPIAIJ, 2648 0, 2649 MatSetUnfactored_MPIAIJ, 2650 MatPermute_MPIAIJ, 2651 0, 2652 /*59*/ MatGetSubMatrix_MPIAIJ, 2653 MatDestroy_MPIAIJ, 2654 MatView_MPIAIJ, 2655 0, 2656 0, 2657 /*64*/ 0, 2658 0, 2659 0, 2660 0, 2661 0, 2662 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2663 MatGetRowMinAbs_MPIAIJ, 2664 0, 2665 MatSetColoring_MPIAIJ, 2666 #if defined(PETSC_HAVE_ADIC) 2667 MatSetValuesAdic_MPIAIJ, 2668 #else 2669 0, 2670 #endif 2671 MatSetValuesAdifor_MPIAIJ, 2672 /*75*/ MatFDColoringApply_AIJ, 2673 0, 2674 0, 2675 0, 2676 0, 2677 /*80*/ 0, 2678 0, 2679 0, 2680 /*83*/ MatLoad_MPIAIJ, 2681 0, 2682 0, 2683 0, 2684 0, 2685 0, 2686 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2687 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2688 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2689 MatPtAP_Basic, 2690 MatPtAPSymbolic_MPIAIJ, 2691 /*94*/ MatPtAPNumeric_MPIAIJ, 2692 0, 2693 0, 2694 0, 2695 0, 2696 /*99*/ 0, 2697 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2698 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2699 MatConjugate_MPIAIJ, 2700 0, 2701 /*104*/MatSetValuesRow_MPIAIJ, 2702 MatRealPart_MPIAIJ, 2703 MatImaginaryPart_MPIAIJ, 2704 0, 2705 0, 2706 /*109*/0, 2707 MatGetRedundantMatrix_MPIAIJ, 2708 MatGetRowMin_MPIAIJ, 2709 0, 2710 0, 2711 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2712 0, 2713 0, 2714 0, 2715 0, 2716 0 2717 }; 2718 2719 /* ----------------------------------------------------------------------------------------*/ 2720 2721 EXTERN_C_BEGIN 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2724 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2725 { 2726 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2727 PetscErrorCode ierr; 2728 2729 PetscFunctionBegin; 2730 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2731 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2732 PetscFunctionReturn(0); 2733 } 2734 EXTERN_C_END 2735 2736 EXTERN_C_BEGIN 2737 #undef __FUNCT__ 2738 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2739 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2740 { 2741 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2742 PetscErrorCode ierr; 2743 2744 PetscFunctionBegin; 2745 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2746 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2747 PetscFunctionReturn(0); 2748 } 2749 EXTERN_C_END 2750 2751 #include "petscpc.h" 2752 EXTERN_C_BEGIN 2753 #undef __FUNCT__ 2754 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2755 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2756 { 2757 Mat_MPIAIJ *b; 2758 PetscErrorCode ierr; 2759 PetscInt i; 2760 2761 PetscFunctionBegin; 2762 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2763 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2764 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2765 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2766 2767 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2768 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2769 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2770 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2771 if (d_nnz) { 2772 for (i=0; i<B->rmap->n; i++) { 2773 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2774 } 2775 } 2776 if (o_nnz) { 2777 for (i=0; i<B->rmap->n; i++) { 2778 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2779 } 2780 } 2781 b = (Mat_MPIAIJ*)B->data; 2782 2783 if (!B->preallocated) { 2784 /* Explicitly create 2 MATSEQAIJ matrices. */ 2785 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2786 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2787 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2788 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2789 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2790 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2791 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2792 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2793 } 2794 2795 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2796 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2797 B->preallocated = PETSC_TRUE; 2798 PetscFunctionReturn(0); 2799 } 2800 EXTERN_C_END 2801 2802 #undef __FUNCT__ 2803 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2804 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2805 { 2806 Mat mat; 2807 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2808 PetscErrorCode ierr; 2809 2810 PetscFunctionBegin; 2811 *newmat = 0; 2812 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2813 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2814 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2815 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2816 a = (Mat_MPIAIJ*)mat->data; 2817 2818 mat->factor = matin->factor; 2819 mat->rmap->bs = matin->rmap->bs; 2820 mat->assembled = PETSC_TRUE; 2821 mat->insertmode = NOT_SET_VALUES; 2822 mat->preallocated = PETSC_TRUE; 2823 2824 a->size = oldmat->size; 2825 a->rank = oldmat->rank; 2826 a->donotstash = oldmat->donotstash; 2827 a->roworiented = oldmat->roworiented; 2828 a->rowindices = 0; 2829 a->rowvalues = 0; 2830 a->getrowactive = PETSC_FALSE; 2831 2832 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2833 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2834 2835 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2836 if (oldmat->colmap) { 2837 #if defined (PETSC_USE_CTABLE) 2838 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2839 #else 2840 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2841 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2842 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2843 #endif 2844 } else a->colmap = 0; 2845 if (oldmat->garray) { 2846 PetscInt len; 2847 len = oldmat->B->cmap->n; 2848 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2849 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2850 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2851 } else a->garray = 0; 2852 2853 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2854 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2855 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2856 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2857 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2858 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2859 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2860 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2861 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2862 *newmat = mat; 2863 PetscFunctionReturn(0); 2864 } 2865 2866 #include "petscsys.h" 2867 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatLoad_MPIAIJ" 2870 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2871 { 2872 Mat A; 2873 PetscScalar *vals,*svals; 2874 MPI_Comm comm = ((PetscObject)viewer)->comm; 2875 MPI_Status status; 2876 PetscErrorCode ierr; 2877 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2878 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2879 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2880 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2881 PetscInt cend,cstart,n,*rowners; 2882 int fd; 2883 2884 PetscFunctionBegin; 2885 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2886 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2887 if (!rank) { 2888 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2889 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2890 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2891 } 2892 2893 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2894 M = header[1]; N = header[2]; 2895 /* determine ownership of all rows */ 2896 m = M/size + ((M % size) > rank); 2897 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2898 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2899 2900 /* First process needs enough room for process with most rows */ 2901 if (!rank) { 2902 mmax = rowners[1]; 2903 for (i=2; i<size; i++) { 2904 mmax = PetscMax(mmax,rowners[i]); 2905 } 2906 } else mmax = m; 2907 2908 rowners[0] = 0; 2909 for (i=2; i<=size; i++) { 2910 rowners[i] += rowners[i-1]; 2911 } 2912 rstart = rowners[rank]; 2913 rend = rowners[rank+1]; 2914 2915 /* distribute row lengths to all processors */ 2916 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2917 if (!rank) { 2918 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2919 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2920 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2921 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2922 for (j=0; j<m; j++) { 2923 procsnz[0] += ourlens[j]; 2924 } 2925 for (i=1; i<size; i++) { 2926 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2927 /* calculate the number of nonzeros on each processor */ 2928 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2929 procsnz[i] += rowlengths[j]; 2930 } 2931 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2932 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2933 } 2934 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2935 } else { 2936 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2937 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2938 } 2939 2940 if (!rank) { 2941 /* determine max buffer needed and allocate it */ 2942 maxnz = 0; 2943 for (i=0; i<size; i++) { 2944 maxnz = PetscMax(maxnz,procsnz[i]); 2945 } 2946 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2947 2948 /* read in my part of the matrix column indices */ 2949 nz = procsnz[0]; 2950 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2951 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2952 2953 /* read in every one elses and ship off */ 2954 for (i=1; i<size; i++) { 2955 nz = procsnz[i]; 2956 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2957 mpicnt = PetscMPIIntCast(nz); 2958 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2959 } 2960 ierr = PetscFree(cols);CHKERRQ(ierr); 2961 } else { 2962 /* determine buffer space needed for message */ 2963 nz = 0; 2964 for (i=0; i<m; i++) { 2965 nz += ourlens[i]; 2966 } 2967 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2968 2969 /* receive message of column indices*/ 2970 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2971 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2972 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2973 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2974 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2975 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2976 } 2977 2978 /* determine column ownership if matrix is not square */ 2979 if (N != M) { 2980 n = N/size + ((N % size) > rank); 2981 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2982 cstart = cend - n; 2983 } else { 2984 cstart = rstart; 2985 cend = rend; 2986 n = cend - cstart; 2987 } 2988 2989 /* loop over local rows, determining number of off diagonal entries */ 2990 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2991 jj = 0; 2992 for (i=0; i<m; i++) { 2993 for (j=0; j<ourlens[i]; j++) { 2994 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2995 jj++; 2996 } 2997 } 2998 2999 /* create our matrix */ 3000 for (i=0; i<m; i++) { 3001 ourlens[i] -= offlens[i]; 3002 } 3003 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3004 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 3005 ierr = MatSetType(A,type);CHKERRQ(ierr); 3006 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3007 3008 for (i=0; i<m; i++) { 3009 ourlens[i] += offlens[i]; 3010 } 3011 3012 if (!rank) { 3013 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3014 3015 /* read in my part of the matrix numerical values */ 3016 nz = procsnz[0]; 3017 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3018 3019 /* insert into matrix */ 3020 jj = rstart; 3021 smycols = mycols; 3022 svals = vals; 3023 for (i=0; i<m; i++) { 3024 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3025 smycols += ourlens[i]; 3026 svals += ourlens[i]; 3027 jj++; 3028 } 3029 3030 /* read in other processors and ship out */ 3031 for (i=1; i<size; i++) { 3032 nz = procsnz[i]; 3033 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3034 mpicnt = PetscMPIIntCast(nz); 3035 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3036 } 3037 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3038 } else { 3039 /* receive numeric values */ 3040 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3041 3042 /* receive message of values*/ 3043 mpicnt = PetscMPIIntCast(nz); 3044 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3045 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3046 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3047 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3048 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3049 3050 /* insert into matrix */ 3051 jj = rstart; 3052 smycols = mycols; 3053 svals = vals; 3054 for (i=0; i<m; i++) { 3055 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3056 smycols += ourlens[i]; 3057 svals += ourlens[i]; 3058 jj++; 3059 } 3060 } 3061 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3062 ierr = PetscFree(vals);CHKERRQ(ierr); 3063 ierr = PetscFree(mycols);CHKERRQ(ierr); 3064 ierr = PetscFree(rowners);CHKERRQ(ierr); 3065 3066 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3067 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3068 *newmat = A; 3069 PetscFunctionReturn(0); 3070 } 3071 3072 #undef __FUNCT__ 3073 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3074 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3075 { 3076 PetscErrorCode ierr; 3077 IS iscol_local; 3078 PetscInt csize; 3079 3080 PetscFunctionBegin; 3081 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3082 if (call == MAT_REUSE_MATRIX) { 3083 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3084 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3085 } else { 3086 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3087 } 3088 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3089 if (call == MAT_INITIAL_MATRIX) { 3090 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3091 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3092 } 3093 PetscFunctionReturn(0); 3094 } 3095 3096 #undef __FUNCT__ 3097 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3098 /* 3099 Not great since it makes two copies of the submatrix, first an SeqAIJ 3100 in local and then by concatenating the local matrices the end result. 3101 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3102 3103 Note: This requires a sequential iscol with all indices. 3104 */ 3105 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3106 { 3107 PetscErrorCode ierr; 3108 PetscMPIInt rank,size; 3109 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3110 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3111 Mat *local,M,Mreuse; 3112 MatScalar *vwork,*aa; 3113 MPI_Comm comm = ((PetscObject)mat)->comm; 3114 Mat_SeqAIJ *aij; 3115 3116 3117 PetscFunctionBegin; 3118 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3119 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3120 3121 if (call == MAT_REUSE_MATRIX) { 3122 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3123 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3124 local = &Mreuse; 3125 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3126 } else { 3127 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3128 Mreuse = *local; 3129 ierr = PetscFree(local);CHKERRQ(ierr); 3130 } 3131 3132 /* 3133 m - number of local rows 3134 n - number of columns (same on all processors) 3135 rstart - first row in new global matrix generated 3136 */ 3137 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3138 if (call == MAT_INITIAL_MATRIX) { 3139 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3140 ii = aij->i; 3141 jj = aij->j; 3142 3143 /* 3144 Determine the number of non-zeros in the diagonal and off-diagonal 3145 portions of the matrix in order to do correct preallocation 3146 */ 3147 3148 /* first get start and end of "diagonal" columns */ 3149 if (csize == PETSC_DECIDE) { 3150 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3151 if (mglobal == n) { /* square matrix */ 3152 nlocal = m; 3153 } else { 3154 nlocal = n/size + ((n % size) > rank); 3155 } 3156 } else { 3157 nlocal = csize; 3158 } 3159 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3160 rstart = rend - nlocal; 3161 if (rank == size - 1 && rend != n) { 3162 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3163 } 3164 3165 /* next, compute all the lengths */ 3166 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3167 olens = dlens + m; 3168 for (i=0; i<m; i++) { 3169 jend = ii[i+1] - ii[i]; 3170 olen = 0; 3171 dlen = 0; 3172 for (j=0; j<jend; j++) { 3173 if (*jj < rstart || *jj >= rend) olen++; 3174 else dlen++; 3175 jj++; 3176 } 3177 olens[i] = olen; 3178 dlens[i] = dlen; 3179 } 3180 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3181 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3182 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3183 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3184 ierr = PetscFree(dlens);CHKERRQ(ierr); 3185 } else { 3186 PetscInt ml,nl; 3187 3188 M = *newmat; 3189 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3190 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3191 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3192 /* 3193 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3194 rather than the slower MatSetValues(). 3195 */ 3196 M->was_assembled = PETSC_TRUE; 3197 M->assembled = PETSC_FALSE; 3198 } 3199 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3200 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3201 ii = aij->i; 3202 jj = aij->j; 3203 aa = aij->a; 3204 for (i=0; i<m; i++) { 3205 row = rstart + i; 3206 nz = ii[i+1] - ii[i]; 3207 cwork = jj; jj += nz; 3208 vwork = aa; aa += nz; 3209 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3210 } 3211 3212 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3213 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3214 *newmat = M; 3215 3216 /* save submatrix used in processor for next request */ 3217 if (call == MAT_INITIAL_MATRIX) { 3218 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3219 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3220 } 3221 3222 PetscFunctionReturn(0); 3223 } 3224 3225 EXTERN_C_BEGIN 3226 #undef __FUNCT__ 3227 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3228 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3229 { 3230 PetscInt m,cstart, cend,j,nnz,i,d; 3231 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3232 const PetscInt *JJ; 3233 PetscScalar *values; 3234 PetscErrorCode ierr; 3235 3236 PetscFunctionBegin; 3237 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3238 3239 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3240 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3241 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3242 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3243 m = B->rmap->n; 3244 cstart = B->cmap->rstart; 3245 cend = B->cmap->rend; 3246 rstart = B->rmap->rstart; 3247 3248 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3249 o_nnz = d_nnz + m; 3250 3251 #if defined(PETSC_USE_DEBUGGING) 3252 for (i=0; i<m; i++) { 3253 nnz = Ii[i+1]- Ii[i]; 3254 JJ = J + Ii[i]; 3255 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3256 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3257 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3258 } 3259 #endif 3260 3261 for (i=0; i<m; i++) { 3262 nnz = Ii[i+1]- Ii[i]; 3263 JJ = J + Ii[i]; 3264 nnz_max = PetscMax(nnz_max,nnz); 3265 d = 0; 3266 for (j=0; j<nnz; j++) { 3267 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3268 } 3269 d_nnz[i] = d; 3270 o_nnz[i] = nnz - d; 3271 } 3272 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3273 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3274 3275 if (v) values = (PetscScalar*)v; 3276 else { 3277 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3278 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3279 } 3280 3281 for (i=0; i<m; i++) { 3282 ii = i + rstart; 3283 nnz = Ii[i+1]- Ii[i]; 3284 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3285 } 3286 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3287 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3288 3289 if (!v) { 3290 ierr = PetscFree(values);CHKERRQ(ierr); 3291 } 3292 PetscFunctionReturn(0); 3293 } 3294 EXTERN_C_END 3295 3296 #undef __FUNCT__ 3297 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3298 /*@ 3299 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3300 (the default parallel PETSc format). 3301 3302 Collective on MPI_Comm 3303 3304 Input Parameters: 3305 + B - the matrix 3306 . i - the indices into j for the start of each local row (starts with zero) 3307 . j - the column indices for each local row (starts with zero) 3308 - v - optional values in the matrix 3309 3310 Level: developer 3311 3312 Notes: 3313 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3314 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3315 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3316 3317 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3318 3319 The format which is used for the sparse matrix input, is equivalent to a 3320 row-major ordering.. i.e for the following matrix, the input data expected is 3321 as shown: 3322 3323 1 0 0 3324 2 0 3 P0 3325 ------- 3326 4 5 6 P1 3327 3328 Process0 [P0]: rows_owned=[0,1] 3329 i = {0,1,3} [size = nrow+1 = 2+1] 3330 j = {0,0,2} [size = nz = 6] 3331 v = {1,2,3} [size = nz = 6] 3332 3333 Process1 [P1]: rows_owned=[2] 3334 i = {0,3} [size = nrow+1 = 1+1] 3335 j = {0,1,2} [size = nz = 6] 3336 v = {4,5,6} [size = nz = 6] 3337 3338 .keywords: matrix, aij, compressed row, sparse, parallel 3339 3340 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3341 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3342 @*/ 3343 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3344 { 3345 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3346 3347 PetscFunctionBegin; 3348 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3349 if (f) { 3350 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3351 } 3352 PetscFunctionReturn(0); 3353 } 3354 3355 #undef __FUNCT__ 3356 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3357 /*@C 3358 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3359 (the default parallel PETSc format). For good matrix assembly performance 3360 the user should preallocate the matrix storage by setting the parameters 3361 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3362 performance can be increased by more than a factor of 50. 3363 3364 Collective on MPI_Comm 3365 3366 Input Parameters: 3367 + A - the matrix 3368 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3369 (same value is used for all local rows) 3370 . d_nnz - array containing the number of nonzeros in the various rows of the 3371 DIAGONAL portion of the local submatrix (possibly different for each row) 3372 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3373 The size of this array is equal to the number of local rows, i.e 'm'. 3374 You must leave room for the diagonal entry even if it is zero. 3375 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3376 submatrix (same value is used for all local rows). 3377 - o_nnz - array containing the number of nonzeros in the various rows of the 3378 OFF-DIAGONAL portion of the local submatrix (possibly different for 3379 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3380 structure. The size of this array is equal to the number 3381 of local rows, i.e 'm'. 3382 3383 If the *_nnz parameter is given then the *_nz parameter is ignored 3384 3385 The AIJ format (also called the Yale sparse matrix format or 3386 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3387 storage. The stored row and column indices begin with zero. See the users manual for details. 3388 3389 The parallel matrix is partitioned such that the first m0 rows belong to 3390 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3391 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3392 3393 The DIAGONAL portion of the local submatrix of a processor can be defined 3394 as the submatrix which is obtained by extraction the part corresponding 3395 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3396 first row that belongs to the processor, and r2 is the last row belonging 3397 to the this processor. This is a square mxm matrix. The remaining portion 3398 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3399 3400 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3401 3402 You can call MatGetInfo() to get information on how effective the preallocation was; 3403 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3404 You can also run with the option -info and look for messages with the string 3405 malloc in them to see if additional memory allocation was needed. 3406 3407 Example usage: 3408 3409 Consider the following 8x8 matrix with 34 non-zero values, that is 3410 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3411 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3412 as follows: 3413 3414 .vb 3415 1 2 0 | 0 3 0 | 0 4 3416 Proc0 0 5 6 | 7 0 0 | 8 0 3417 9 0 10 | 11 0 0 | 12 0 3418 ------------------------------------- 3419 13 0 14 | 15 16 17 | 0 0 3420 Proc1 0 18 0 | 19 20 21 | 0 0 3421 0 0 0 | 22 23 0 | 24 0 3422 ------------------------------------- 3423 Proc2 25 26 27 | 0 0 28 | 29 0 3424 30 0 0 | 31 32 33 | 0 34 3425 .ve 3426 3427 This can be represented as a collection of submatrices as: 3428 3429 .vb 3430 A B C 3431 D E F 3432 G H I 3433 .ve 3434 3435 Where the submatrices A,B,C are owned by proc0, D,E,F are 3436 owned by proc1, G,H,I are owned by proc2. 3437 3438 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3439 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3440 The 'M','N' parameters are 8,8, and have the same values on all procs. 3441 3442 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3443 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3444 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3445 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3446 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3447 matrix, ans [DF] as another SeqAIJ matrix. 3448 3449 When d_nz, o_nz parameters are specified, d_nz storage elements are 3450 allocated for every row of the local diagonal submatrix, and o_nz 3451 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3452 One way to choose d_nz and o_nz is to use the max nonzerors per local 3453 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3454 In this case, the values of d_nz,o_nz are: 3455 .vb 3456 proc0 : dnz = 2, o_nz = 2 3457 proc1 : dnz = 3, o_nz = 2 3458 proc2 : dnz = 1, o_nz = 4 3459 .ve 3460 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3461 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3462 for proc3. i.e we are using 12+15+10=37 storage locations to store 3463 34 values. 3464 3465 When d_nnz, o_nnz parameters are specified, the storage is specified 3466 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3467 In the above case the values for d_nnz,o_nnz are: 3468 .vb 3469 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3470 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3471 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3472 .ve 3473 Here the space allocated is sum of all the above values i.e 34, and 3474 hence pre-allocation is perfect. 3475 3476 Level: intermediate 3477 3478 .keywords: matrix, aij, compressed row, sparse, parallel 3479 3480 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3481 MPIAIJ, MatGetInfo() 3482 @*/ 3483 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3484 { 3485 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3486 3487 PetscFunctionBegin; 3488 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3489 if (f) { 3490 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3491 } 3492 PetscFunctionReturn(0); 3493 } 3494 3495 #undef __FUNCT__ 3496 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3497 /*@ 3498 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3499 CSR format the local rows. 3500 3501 Collective on MPI_Comm 3502 3503 Input Parameters: 3504 + comm - MPI communicator 3505 . m - number of local rows (Cannot be PETSC_DECIDE) 3506 . n - This value should be the same as the local size used in creating the 3507 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3508 calculated if N is given) For square matrices n is almost always m. 3509 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3510 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3511 . i - row indices 3512 . j - column indices 3513 - a - matrix values 3514 3515 Output Parameter: 3516 . mat - the matrix 3517 3518 Level: intermediate 3519 3520 Notes: 3521 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3522 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3523 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3524 3525 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3526 3527 The format which is used for the sparse matrix input, is equivalent to a 3528 row-major ordering.. i.e for the following matrix, the input data expected is 3529 as shown: 3530 3531 1 0 0 3532 2 0 3 P0 3533 ------- 3534 4 5 6 P1 3535 3536 Process0 [P0]: rows_owned=[0,1] 3537 i = {0,1,3} [size = nrow+1 = 2+1] 3538 j = {0,0,2} [size = nz = 6] 3539 v = {1,2,3} [size = nz = 6] 3540 3541 Process1 [P1]: rows_owned=[2] 3542 i = {0,3} [size = nrow+1 = 1+1] 3543 j = {0,1,2} [size = nz = 6] 3544 v = {4,5,6} [size = nz = 6] 3545 3546 .keywords: matrix, aij, compressed row, sparse, parallel 3547 3548 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3549 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3550 @*/ 3551 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3552 { 3553 PetscErrorCode ierr; 3554 3555 PetscFunctionBegin; 3556 if (i[0]) { 3557 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3558 } 3559 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3560 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3561 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3562 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3563 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3564 PetscFunctionReturn(0); 3565 } 3566 3567 #undef __FUNCT__ 3568 #define __FUNCT__ "MatCreateMPIAIJ" 3569 /*@C 3570 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3571 (the default parallel PETSc format). For good matrix assembly performance 3572 the user should preallocate the matrix storage by setting the parameters 3573 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3574 performance can be increased by more than a factor of 50. 3575 3576 Collective on MPI_Comm 3577 3578 Input Parameters: 3579 + comm - MPI communicator 3580 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3581 This value should be the same as the local size used in creating the 3582 y vector for the matrix-vector product y = Ax. 3583 . n - This value should be the same as the local size used in creating the 3584 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3585 calculated if N is given) For square matrices n is almost always m. 3586 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3587 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3588 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3589 (same value is used for all local rows) 3590 . d_nnz - array containing the number of nonzeros in the various rows of the 3591 DIAGONAL portion of the local submatrix (possibly different for each row) 3592 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3593 The size of this array is equal to the number of local rows, i.e 'm'. 3594 You must leave room for the diagonal entry even if it is zero. 3595 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3596 submatrix (same value is used for all local rows). 3597 - o_nnz - array containing the number of nonzeros in the various rows of the 3598 OFF-DIAGONAL portion of the local submatrix (possibly different for 3599 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3600 structure. The size of this array is equal to the number 3601 of local rows, i.e 'm'. 3602 3603 Output Parameter: 3604 . A - the matrix 3605 3606 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3607 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3608 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3609 3610 Notes: 3611 If the *_nnz parameter is given then the *_nz parameter is ignored 3612 3613 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3614 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3615 storage requirements for this matrix. 3616 3617 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3618 processor than it must be used on all processors that share the object for 3619 that argument. 3620 3621 The user MUST specify either the local or global matrix dimensions 3622 (possibly both). 3623 3624 The parallel matrix is partitioned across processors such that the 3625 first m0 rows belong to process 0, the next m1 rows belong to 3626 process 1, the next m2 rows belong to process 2 etc.. where 3627 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3628 values corresponding to [m x N] submatrix. 3629 3630 The columns are logically partitioned with the n0 columns belonging 3631 to 0th partition, the next n1 columns belonging to the next 3632 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3633 3634 The DIAGONAL portion of the local submatrix on any given processor 3635 is the submatrix corresponding to the rows and columns m,n 3636 corresponding to the given processor. i.e diagonal matrix on 3637 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3638 etc. The remaining portion of the local submatrix [m x (N-n)] 3639 constitute the OFF-DIAGONAL portion. The example below better 3640 illustrates this concept. 3641 3642 For a square global matrix we define each processor's diagonal portion 3643 to be its local rows and the corresponding columns (a square submatrix); 3644 each processor's off-diagonal portion encompasses the remainder of the 3645 local matrix (a rectangular submatrix). 3646 3647 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3648 3649 When calling this routine with a single process communicator, a matrix of 3650 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3651 type of communicator, use the construction mechanism: 3652 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3653 3654 By default, this format uses inodes (identical nodes) when possible. 3655 We search for consecutive rows with the same nonzero structure, thereby 3656 reusing matrix information to achieve increased efficiency. 3657 3658 Options Database Keys: 3659 + -mat_no_inode - Do not use inodes 3660 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3661 - -mat_aij_oneindex - Internally use indexing starting at 1 3662 rather than 0. Note that when calling MatSetValues(), 3663 the user still MUST index entries starting at 0! 3664 3665 3666 Example usage: 3667 3668 Consider the following 8x8 matrix with 34 non-zero values, that is 3669 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3670 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3671 as follows: 3672 3673 .vb 3674 1 2 0 | 0 3 0 | 0 4 3675 Proc0 0 5 6 | 7 0 0 | 8 0 3676 9 0 10 | 11 0 0 | 12 0 3677 ------------------------------------- 3678 13 0 14 | 15 16 17 | 0 0 3679 Proc1 0 18 0 | 19 20 21 | 0 0 3680 0 0 0 | 22 23 0 | 24 0 3681 ------------------------------------- 3682 Proc2 25 26 27 | 0 0 28 | 29 0 3683 30 0 0 | 31 32 33 | 0 34 3684 .ve 3685 3686 This can be represented as a collection of submatrices as: 3687 3688 .vb 3689 A B C 3690 D E F 3691 G H I 3692 .ve 3693 3694 Where the submatrices A,B,C are owned by proc0, D,E,F are 3695 owned by proc1, G,H,I are owned by proc2. 3696 3697 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3698 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3699 The 'M','N' parameters are 8,8, and have the same values on all procs. 3700 3701 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3702 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3703 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3704 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3705 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3706 matrix, ans [DF] as another SeqAIJ matrix. 3707 3708 When d_nz, o_nz parameters are specified, d_nz storage elements are 3709 allocated for every row of the local diagonal submatrix, and o_nz 3710 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3711 One way to choose d_nz and o_nz is to use the max nonzerors per local 3712 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3713 In this case, the values of d_nz,o_nz are: 3714 .vb 3715 proc0 : dnz = 2, o_nz = 2 3716 proc1 : dnz = 3, o_nz = 2 3717 proc2 : dnz = 1, o_nz = 4 3718 .ve 3719 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3720 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3721 for proc3. i.e we are using 12+15+10=37 storage locations to store 3722 34 values. 3723 3724 When d_nnz, o_nnz parameters are specified, the storage is specified 3725 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3726 In the above case the values for d_nnz,o_nnz are: 3727 .vb 3728 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3729 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3730 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3731 .ve 3732 Here the space allocated is sum of all the above values i.e 34, and 3733 hence pre-allocation is perfect. 3734 3735 Level: intermediate 3736 3737 .keywords: matrix, aij, compressed row, sparse, parallel 3738 3739 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3740 MPIAIJ, MatCreateMPIAIJWithArrays() 3741 @*/ 3742 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3743 { 3744 PetscErrorCode ierr; 3745 PetscMPIInt size; 3746 3747 PetscFunctionBegin; 3748 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3749 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3750 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3751 if (size > 1) { 3752 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3753 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3754 } else { 3755 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3756 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3757 } 3758 PetscFunctionReturn(0); 3759 } 3760 3761 #undef __FUNCT__ 3762 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3763 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3764 { 3765 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3766 3767 PetscFunctionBegin; 3768 *Ad = a->A; 3769 *Ao = a->B; 3770 *colmap = a->garray; 3771 PetscFunctionReturn(0); 3772 } 3773 3774 #undef __FUNCT__ 3775 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3776 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3777 { 3778 PetscErrorCode ierr; 3779 PetscInt i; 3780 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3781 3782 PetscFunctionBegin; 3783 if (coloring->ctype == IS_COLORING_GLOBAL) { 3784 ISColoringValue *allcolors,*colors; 3785 ISColoring ocoloring; 3786 3787 /* set coloring for diagonal portion */ 3788 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3789 3790 /* set coloring for off-diagonal portion */ 3791 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3792 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3793 for (i=0; i<a->B->cmap->n; i++) { 3794 colors[i] = allcolors[a->garray[i]]; 3795 } 3796 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3797 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3798 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3799 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3800 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3801 ISColoringValue *colors; 3802 PetscInt *larray; 3803 ISColoring ocoloring; 3804 3805 /* set coloring for diagonal portion */ 3806 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3807 for (i=0; i<a->A->cmap->n; i++) { 3808 larray[i] = i + A->cmap->rstart; 3809 } 3810 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3811 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3812 for (i=0; i<a->A->cmap->n; i++) { 3813 colors[i] = coloring->colors[larray[i]]; 3814 } 3815 ierr = PetscFree(larray);CHKERRQ(ierr); 3816 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3817 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3818 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3819 3820 /* set coloring for off-diagonal portion */ 3821 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3822 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3823 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3824 for (i=0; i<a->B->cmap->n; i++) { 3825 colors[i] = coloring->colors[larray[i]]; 3826 } 3827 ierr = PetscFree(larray);CHKERRQ(ierr); 3828 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3829 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3830 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3831 } else { 3832 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3833 } 3834 3835 PetscFunctionReturn(0); 3836 } 3837 3838 #if defined(PETSC_HAVE_ADIC) 3839 #undef __FUNCT__ 3840 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3841 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3842 { 3843 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3844 PetscErrorCode ierr; 3845 3846 PetscFunctionBegin; 3847 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3848 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3849 PetscFunctionReturn(0); 3850 } 3851 #endif 3852 3853 #undef __FUNCT__ 3854 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3855 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3856 { 3857 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3858 PetscErrorCode ierr; 3859 3860 PetscFunctionBegin; 3861 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3862 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3863 PetscFunctionReturn(0); 3864 } 3865 3866 #undef __FUNCT__ 3867 #define __FUNCT__ "MatMerge" 3868 /*@ 3869 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3870 matrices from each processor 3871 3872 Collective on MPI_Comm 3873 3874 Input Parameters: 3875 + comm - the communicators the parallel matrix will live on 3876 . inmat - the input sequential matrices 3877 . n - number of local columns (or PETSC_DECIDE) 3878 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3879 3880 Output Parameter: 3881 . outmat - the parallel matrix generated 3882 3883 Level: advanced 3884 3885 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3886 3887 @*/ 3888 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3889 { 3890 PetscErrorCode ierr; 3891 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3892 PetscInt *indx; 3893 PetscScalar *values; 3894 3895 PetscFunctionBegin; 3896 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3897 if (scall == MAT_INITIAL_MATRIX){ 3898 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3899 if (n == PETSC_DECIDE){ 3900 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3901 } 3902 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3903 rstart -= m; 3904 3905 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3906 for (i=0;i<m;i++) { 3907 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3908 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3909 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3910 } 3911 /* This routine will ONLY return MPIAIJ type matrix */ 3912 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3913 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3914 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3915 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3916 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3917 3918 } else if (scall == MAT_REUSE_MATRIX){ 3919 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3920 } else { 3921 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3922 } 3923 3924 for (i=0;i<m;i++) { 3925 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3926 Ii = i + rstart; 3927 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3928 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3929 } 3930 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3931 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3932 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3933 3934 PetscFunctionReturn(0); 3935 } 3936 3937 #undef __FUNCT__ 3938 #define __FUNCT__ "MatFileSplit" 3939 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3940 { 3941 PetscErrorCode ierr; 3942 PetscMPIInt rank; 3943 PetscInt m,N,i,rstart,nnz; 3944 size_t len; 3945 const PetscInt *indx; 3946 PetscViewer out; 3947 char *name; 3948 Mat B; 3949 const PetscScalar *values; 3950 3951 PetscFunctionBegin; 3952 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3953 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3954 /* Should this be the type of the diagonal block of A? */ 3955 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3956 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3957 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3958 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3959 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3960 for (i=0;i<m;i++) { 3961 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3962 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3963 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3964 } 3965 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3966 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3967 3968 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3969 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3970 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3971 sprintf(name,"%s.%d",outfile,rank); 3972 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3973 ierr = PetscFree(name); 3974 ierr = MatView(B,out);CHKERRQ(ierr); 3975 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3976 ierr = MatDestroy(B);CHKERRQ(ierr); 3977 PetscFunctionReturn(0); 3978 } 3979 3980 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3981 #undef __FUNCT__ 3982 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3983 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3984 { 3985 PetscErrorCode ierr; 3986 Mat_Merge_SeqsToMPI *merge; 3987 PetscContainer container; 3988 3989 PetscFunctionBegin; 3990 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3991 if (container) { 3992 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3993 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3994 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3999 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4000 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4001 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4002 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4003 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4004 4005 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4006 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4007 } 4008 ierr = PetscFree(merge);CHKERRQ(ierr); 4009 4010 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4011 PetscFunctionReturn(0); 4012 } 4013 4014 #include "../src/mat/utils/freespace.h" 4015 #include "petscbt.h" 4016 4017 #undef __FUNCT__ 4018 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4019 /*@C 4020 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4021 matrices from each processor 4022 4023 Collective on MPI_Comm 4024 4025 Input Parameters: 4026 + comm - the communicators the parallel matrix will live on 4027 . seqmat - the input sequential matrices 4028 . m - number of local rows (or PETSC_DECIDE) 4029 . n - number of local columns (or PETSC_DECIDE) 4030 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4031 4032 Output Parameter: 4033 . mpimat - the parallel matrix generated 4034 4035 Level: advanced 4036 4037 Notes: 4038 The dimensions of the sequential matrix in each processor MUST be the same. 4039 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4040 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4041 @*/ 4042 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4043 { 4044 PetscErrorCode ierr; 4045 MPI_Comm comm=((PetscObject)mpimat)->comm; 4046 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4047 PetscMPIInt size,rank,taga,*len_s; 4048 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4049 PetscInt proc,m; 4050 PetscInt **buf_ri,**buf_rj; 4051 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4052 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4053 MPI_Request *s_waits,*r_waits; 4054 MPI_Status *status; 4055 MatScalar *aa=a->a; 4056 MatScalar **abuf_r,*ba_i; 4057 Mat_Merge_SeqsToMPI *merge; 4058 PetscContainer container; 4059 4060 PetscFunctionBegin; 4061 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4062 4063 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4064 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4065 4066 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4067 if (container) { 4068 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4069 } 4070 bi = merge->bi; 4071 bj = merge->bj; 4072 buf_ri = merge->buf_ri; 4073 buf_rj = merge->buf_rj; 4074 4075 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4076 owners = merge->rowmap->range; 4077 len_s = merge->len_s; 4078 4079 /* send and recv matrix values */ 4080 /*-----------------------------*/ 4081 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4082 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4083 4084 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4085 for (proc=0,k=0; proc<size; proc++){ 4086 if (!len_s[proc]) continue; 4087 i = owners[proc]; 4088 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4089 k++; 4090 } 4091 4092 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4093 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4094 ierr = PetscFree(status);CHKERRQ(ierr); 4095 4096 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4097 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4098 4099 /* insert mat values of mpimat */ 4100 /*----------------------------*/ 4101 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4102 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4103 nextrow = buf_ri_k + merge->nrecv; 4104 nextai = nextrow + merge->nrecv; 4105 4106 for (k=0; k<merge->nrecv; k++){ 4107 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4108 nrows = *(buf_ri_k[k]); 4109 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4110 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4111 } 4112 4113 /* set values of ba */ 4114 m = merge->rowmap->n; 4115 for (i=0; i<m; i++) { 4116 arow = owners[rank] + i; 4117 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4118 bnzi = bi[i+1] - bi[i]; 4119 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4120 4121 /* add local non-zero vals of this proc's seqmat into ba */ 4122 anzi = ai[arow+1] - ai[arow]; 4123 aj = a->j + ai[arow]; 4124 aa = a->a + ai[arow]; 4125 nextaj = 0; 4126 for (j=0; nextaj<anzi; j++){ 4127 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4128 ba_i[j] += aa[nextaj++]; 4129 } 4130 } 4131 4132 /* add received vals into ba */ 4133 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4134 /* i-th row */ 4135 if (i == *nextrow[k]) { 4136 anzi = *(nextai[k]+1) - *nextai[k]; 4137 aj = buf_rj[k] + *(nextai[k]); 4138 aa = abuf_r[k] + *(nextai[k]); 4139 nextaj = 0; 4140 for (j=0; nextaj<anzi; j++){ 4141 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4142 ba_i[j] += aa[nextaj++]; 4143 } 4144 } 4145 nextrow[k]++; nextai[k]++; 4146 } 4147 } 4148 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4149 } 4150 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4151 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4152 4153 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4154 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4155 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4156 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4157 PetscFunctionReturn(0); 4158 } 4159 4160 #undef __FUNCT__ 4161 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4162 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4163 { 4164 PetscErrorCode ierr; 4165 Mat B_mpi; 4166 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4167 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4168 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4169 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4170 PetscInt len,proc,*dnz,*onz; 4171 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4172 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4173 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4174 MPI_Status *status; 4175 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4176 PetscBT lnkbt; 4177 Mat_Merge_SeqsToMPI *merge; 4178 PetscContainer container; 4179 4180 PetscFunctionBegin; 4181 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4182 4183 /* make sure it is a PETSc comm */ 4184 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4185 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4186 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4187 4188 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4189 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4190 4191 /* determine row ownership */ 4192 /*---------------------------------------------------------*/ 4193 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4194 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4195 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4196 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4197 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4198 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4199 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4200 4201 m = merge->rowmap->n; 4202 M = merge->rowmap->N; 4203 owners = merge->rowmap->range; 4204 4205 /* determine the number of messages to send, their lengths */ 4206 /*---------------------------------------------------------*/ 4207 len_s = merge->len_s; 4208 4209 len = 0; /* length of buf_si[] */ 4210 merge->nsend = 0; 4211 for (proc=0; proc<size; proc++){ 4212 len_si[proc] = 0; 4213 if (proc == rank){ 4214 len_s[proc] = 0; 4215 } else { 4216 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4217 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4218 } 4219 if (len_s[proc]) { 4220 merge->nsend++; 4221 nrows = 0; 4222 for (i=owners[proc]; i<owners[proc+1]; i++){ 4223 if (ai[i+1] > ai[i]) nrows++; 4224 } 4225 len_si[proc] = 2*(nrows+1); 4226 len += len_si[proc]; 4227 } 4228 } 4229 4230 /* determine the number and length of messages to receive for ij-structure */ 4231 /*-------------------------------------------------------------------------*/ 4232 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4233 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4234 4235 /* post the Irecv of j-structure */ 4236 /*-------------------------------*/ 4237 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4238 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4239 4240 /* post the Isend of j-structure */ 4241 /*--------------------------------*/ 4242 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4243 sj_waits = si_waits + merge->nsend; 4244 4245 for (proc=0, k=0; proc<size; proc++){ 4246 if (!len_s[proc]) continue; 4247 i = owners[proc]; 4248 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4249 k++; 4250 } 4251 4252 /* receives and sends of j-structure are complete */ 4253 /*------------------------------------------------*/ 4254 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4255 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4256 4257 /* send and recv i-structure */ 4258 /*---------------------------*/ 4259 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4260 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4261 4262 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4263 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4264 for (proc=0,k=0; proc<size; proc++){ 4265 if (!len_s[proc]) continue; 4266 /* form outgoing message for i-structure: 4267 buf_si[0]: nrows to be sent 4268 [1:nrows]: row index (global) 4269 [nrows+1:2*nrows+1]: i-structure index 4270 */ 4271 /*-------------------------------------------*/ 4272 nrows = len_si[proc]/2 - 1; 4273 buf_si_i = buf_si + nrows+1; 4274 buf_si[0] = nrows; 4275 buf_si_i[0] = 0; 4276 nrows = 0; 4277 for (i=owners[proc]; i<owners[proc+1]; i++){ 4278 anzi = ai[i+1] - ai[i]; 4279 if (anzi) { 4280 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4281 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4282 nrows++; 4283 } 4284 } 4285 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4286 k++; 4287 buf_si += len_si[proc]; 4288 } 4289 4290 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4291 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4292 4293 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4294 for (i=0; i<merge->nrecv; i++){ 4295 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4296 } 4297 4298 ierr = PetscFree(len_si);CHKERRQ(ierr); 4299 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4300 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4301 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4302 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4303 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4304 ierr = PetscFree(status);CHKERRQ(ierr); 4305 4306 /* compute a local seq matrix in each processor */ 4307 /*----------------------------------------------*/ 4308 /* allocate bi array and free space for accumulating nonzero column info */ 4309 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4310 bi[0] = 0; 4311 4312 /* create and initialize a linked list */ 4313 nlnk = N+1; 4314 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4315 4316 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4317 len = 0; 4318 len = ai[owners[rank+1]] - ai[owners[rank]]; 4319 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4320 current_space = free_space; 4321 4322 /* determine symbolic info for each local row */ 4323 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4324 nextrow = buf_ri_k + merge->nrecv; 4325 nextai = nextrow + merge->nrecv; 4326 for (k=0; k<merge->nrecv; k++){ 4327 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4328 nrows = *buf_ri_k[k]; 4329 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4330 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4331 } 4332 4333 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4334 len = 0; 4335 for (i=0;i<m;i++) { 4336 bnzi = 0; 4337 /* add local non-zero cols of this proc's seqmat into lnk */ 4338 arow = owners[rank] + i; 4339 anzi = ai[arow+1] - ai[arow]; 4340 aj = a->j + ai[arow]; 4341 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4342 bnzi += nlnk; 4343 /* add received col data into lnk */ 4344 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4345 if (i == *nextrow[k]) { /* i-th row */ 4346 anzi = *(nextai[k]+1) - *nextai[k]; 4347 aj = buf_rj[k] + *nextai[k]; 4348 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4349 bnzi += nlnk; 4350 nextrow[k]++; nextai[k]++; 4351 } 4352 } 4353 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4354 4355 /* if free space is not available, make more free space */ 4356 if (current_space->local_remaining<bnzi) { 4357 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4358 nspacedouble++; 4359 } 4360 /* copy data into free space, then initialize lnk */ 4361 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4362 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4363 4364 current_space->array += bnzi; 4365 current_space->local_used += bnzi; 4366 current_space->local_remaining -= bnzi; 4367 4368 bi[i+1] = bi[i] + bnzi; 4369 } 4370 4371 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4372 4373 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4374 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4375 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4376 4377 /* create symbolic parallel matrix B_mpi */ 4378 /*---------------------------------------*/ 4379 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4380 if (n==PETSC_DECIDE) { 4381 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4382 } else { 4383 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4384 } 4385 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4386 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4387 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4388 4389 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4390 B_mpi->assembled = PETSC_FALSE; 4391 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4392 merge->bi = bi; 4393 merge->bj = bj; 4394 merge->buf_ri = buf_ri; 4395 merge->buf_rj = buf_rj; 4396 merge->coi = PETSC_NULL; 4397 merge->coj = PETSC_NULL; 4398 merge->owners_co = PETSC_NULL; 4399 4400 /* attach the supporting struct to B_mpi for reuse */ 4401 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4402 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4403 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4404 *mpimat = B_mpi; 4405 4406 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4407 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4408 PetscFunctionReturn(0); 4409 } 4410 4411 #undef __FUNCT__ 4412 #define __FUNCT__ "MatMerge_SeqsToMPI" 4413 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4414 { 4415 PetscErrorCode ierr; 4416 4417 PetscFunctionBegin; 4418 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4419 if (scall == MAT_INITIAL_MATRIX){ 4420 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4421 } 4422 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4423 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4424 PetscFunctionReturn(0); 4425 } 4426 4427 #undef __FUNCT__ 4428 #define __FUNCT__ "MatGetLocalMat" 4429 /*@ 4430 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4431 4432 Not Collective 4433 4434 Input Parameters: 4435 + A - the matrix 4436 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4437 4438 Output Parameter: 4439 . A_loc - the local sequential matrix generated 4440 4441 Level: developer 4442 4443 @*/ 4444 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4445 { 4446 PetscErrorCode ierr; 4447 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4448 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4449 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4450 MatScalar *aa=a->a,*ba=b->a,*cam; 4451 PetscScalar *ca; 4452 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4453 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4454 4455 PetscFunctionBegin; 4456 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4457 if (scall == MAT_INITIAL_MATRIX){ 4458 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4459 ci[0] = 0; 4460 for (i=0; i<am; i++){ 4461 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4462 } 4463 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4464 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4465 k = 0; 4466 for (i=0; i<am; i++) { 4467 ncols_o = bi[i+1] - bi[i]; 4468 ncols_d = ai[i+1] - ai[i]; 4469 /* off-diagonal portion of A */ 4470 for (jo=0; jo<ncols_o; jo++) { 4471 col = cmap[*bj]; 4472 if (col >= cstart) break; 4473 cj[k] = col; bj++; 4474 ca[k++] = *ba++; 4475 } 4476 /* diagonal portion of A */ 4477 for (j=0; j<ncols_d; j++) { 4478 cj[k] = cstart + *aj++; 4479 ca[k++] = *aa++; 4480 } 4481 /* off-diagonal portion of A */ 4482 for (j=jo; j<ncols_o; j++) { 4483 cj[k] = cmap[*bj++]; 4484 ca[k++] = *ba++; 4485 } 4486 } 4487 /* put together the new matrix */ 4488 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4489 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4490 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4491 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4492 mat->free_a = PETSC_TRUE; 4493 mat->free_ij = PETSC_TRUE; 4494 mat->nonew = 0; 4495 } else if (scall == MAT_REUSE_MATRIX){ 4496 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4497 ci = mat->i; cj = mat->j; cam = mat->a; 4498 for (i=0; i<am; i++) { 4499 /* off-diagonal portion of A */ 4500 ncols_o = bi[i+1] - bi[i]; 4501 for (jo=0; jo<ncols_o; jo++) { 4502 col = cmap[*bj]; 4503 if (col >= cstart) break; 4504 *cam++ = *ba++; bj++; 4505 } 4506 /* diagonal portion of A */ 4507 ncols_d = ai[i+1] - ai[i]; 4508 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4509 /* off-diagonal portion of A */ 4510 for (j=jo; j<ncols_o; j++) { 4511 *cam++ = *ba++; bj++; 4512 } 4513 } 4514 } else { 4515 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4516 } 4517 4518 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4519 PetscFunctionReturn(0); 4520 } 4521 4522 #undef __FUNCT__ 4523 #define __FUNCT__ "MatGetLocalMatCondensed" 4524 /*@C 4525 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4526 4527 Not Collective 4528 4529 Input Parameters: 4530 + A - the matrix 4531 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4532 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4533 4534 Output Parameter: 4535 . A_loc - the local sequential matrix generated 4536 4537 Level: developer 4538 4539 @*/ 4540 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4541 { 4542 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4543 PetscErrorCode ierr; 4544 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4545 IS isrowa,iscola; 4546 Mat *aloc; 4547 4548 PetscFunctionBegin; 4549 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4550 if (!row){ 4551 start = A->rmap->rstart; end = A->rmap->rend; 4552 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4553 } else { 4554 isrowa = *row; 4555 } 4556 if (!col){ 4557 start = A->cmap->rstart; 4558 cmap = a->garray; 4559 nzA = a->A->cmap->n; 4560 nzB = a->B->cmap->n; 4561 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4562 ncols = 0; 4563 for (i=0; i<nzB; i++) { 4564 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4565 else break; 4566 } 4567 imark = i; 4568 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4569 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4570 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4571 ierr = PetscFree(idx);CHKERRQ(ierr); 4572 } else { 4573 iscola = *col; 4574 } 4575 if (scall != MAT_INITIAL_MATRIX){ 4576 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4577 aloc[0] = *A_loc; 4578 } 4579 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4580 *A_loc = aloc[0]; 4581 ierr = PetscFree(aloc);CHKERRQ(ierr); 4582 if (!row){ 4583 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4584 } 4585 if (!col){ 4586 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4587 } 4588 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4589 PetscFunctionReturn(0); 4590 } 4591 4592 #undef __FUNCT__ 4593 #define __FUNCT__ "MatGetBrowsOfAcols" 4594 /*@C 4595 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4596 4597 Collective on Mat 4598 4599 Input Parameters: 4600 + A,B - the matrices in mpiaij format 4601 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4602 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4603 4604 Output Parameter: 4605 + rowb, colb - index sets of rows and columns of B to extract 4606 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4607 - B_seq - the sequential matrix generated 4608 4609 Level: developer 4610 4611 @*/ 4612 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4613 { 4614 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4615 PetscErrorCode ierr; 4616 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4617 IS isrowb,iscolb; 4618 Mat *bseq; 4619 4620 PetscFunctionBegin; 4621 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4622 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4623 } 4624 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4625 4626 if (scall == MAT_INITIAL_MATRIX){ 4627 start = A->cmap->rstart; 4628 cmap = a->garray; 4629 nzA = a->A->cmap->n; 4630 nzB = a->B->cmap->n; 4631 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4632 ncols = 0; 4633 for (i=0; i<nzB; i++) { /* row < local row index */ 4634 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4635 else break; 4636 } 4637 imark = i; 4638 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4639 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4640 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4641 ierr = PetscFree(idx);CHKERRQ(ierr); 4642 *brstart = imark; 4643 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4644 } else { 4645 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4646 isrowb = *rowb; iscolb = *colb; 4647 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4648 bseq[0] = *B_seq; 4649 } 4650 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4651 *B_seq = bseq[0]; 4652 ierr = PetscFree(bseq);CHKERRQ(ierr); 4653 if (!rowb){ 4654 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4655 } else { 4656 *rowb = isrowb; 4657 } 4658 if (!colb){ 4659 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4660 } else { 4661 *colb = iscolb; 4662 } 4663 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4664 PetscFunctionReturn(0); 4665 } 4666 4667 #undef __FUNCT__ 4668 #define __FUNCT__ "MatGetBrowsOfAoCols" 4669 /*@C 4670 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4671 of the OFF-DIAGONAL portion of local A 4672 4673 Collective on Mat 4674 4675 Input Parameters: 4676 + A,B - the matrices in mpiaij format 4677 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4678 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4679 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4680 4681 Output Parameter: 4682 + B_oth - the sequential matrix generated 4683 4684 Level: developer 4685 4686 @*/ 4687 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4688 { 4689 VecScatter_MPI_General *gen_to,*gen_from; 4690 PetscErrorCode ierr; 4691 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4692 Mat_SeqAIJ *b_oth; 4693 VecScatter ctx=a->Mvctx; 4694 MPI_Comm comm=((PetscObject)ctx)->comm; 4695 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4696 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4697 PetscScalar *rvalues,*svalues; 4698 MatScalar *b_otha,*bufa,*bufA; 4699 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4700 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4701 MPI_Status *sstatus,rstatus; 4702 PetscMPIInt jj; 4703 PetscInt *cols,sbs,rbs; 4704 PetscScalar *vals; 4705 4706 PetscFunctionBegin; 4707 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4708 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4709 } 4710 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4711 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4712 4713 gen_to = (VecScatter_MPI_General*)ctx->todata; 4714 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4715 rvalues = gen_from->values; /* holds the length of receiving row */ 4716 svalues = gen_to->values; /* holds the length of sending row */ 4717 nrecvs = gen_from->n; 4718 nsends = gen_to->n; 4719 4720 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4721 srow = gen_to->indices; /* local row index to be sent */ 4722 sstarts = gen_to->starts; 4723 sprocs = gen_to->procs; 4724 sstatus = gen_to->sstatus; 4725 sbs = gen_to->bs; 4726 rstarts = gen_from->starts; 4727 rprocs = gen_from->procs; 4728 rbs = gen_from->bs; 4729 4730 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4731 if (scall == MAT_INITIAL_MATRIX){ 4732 /* i-array */ 4733 /*---------*/ 4734 /* post receives */ 4735 for (i=0; i<nrecvs; i++){ 4736 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4737 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4738 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4739 } 4740 4741 /* pack the outgoing message */ 4742 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4743 rstartsj = sstartsj + nsends +1; 4744 sstartsj[0] = 0; rstartsj[0] = 0; 4745 len = 0; /* total length of j or a array to be sent */ 4746 k = 0; 4747 for (i=0; i<nsends; i++){ 4748 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4749 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4750 for (j=0; j<nrows; j++) { 4751 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4752 for (l=0; l<sbs; l++){ 4753 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4754 rowlen[j*sbs+l] = ncols; 4755 len += ncols; 4756 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4757 } 4758 k++; 4759 } 4760 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4761 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4762 } 4763 /* recvs and sends of i-array are completed */ 4764 i = nrecvs; 4765 while (i--) { 4766 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4767 } 4768 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4769 4770 /* allocate buffers for sending j and a arrays */ 4771 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4772 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4773 4774 /* create i-array of B_oth */ 4775 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4776 b_othi[0] = 0; 4777 len = 0; /* total length of j or a array to be received */ 4778 k = 0; 4779 for (i=0; i<nrecvs; i++){ 4780 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4781 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4782 for (j=0; j<nrows; j++) { 4783 b_othi[k+1] = b_othi[k] + rowlen[j]; 4784 len += rowlen[j]; k++; 4785 } 4786 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4787 } 4788 4789 /* allocate space for j and a arrrays of B_oth */ 4790 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4791 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4792 4793 /* j-array */ 4794 /*---------*/ 4795 /* post receives of j-array */ 4796 for (i=0; i<nrecvs; i++){ 4797 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4798 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4799 } 4800 4801 /* pack the outgoing message j-array */ 4802 k = 0; 4803 for (i=0; i<nsends; i++){ 4804 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4805 bufJ = bufj+sstartsj[i]; 4806 for (j=0; j<nrows; j++) { 4807 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4808 for (ll=0; ll<sbs; ll++){ 4809 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4810 for (l=0; l<ncols; l++){ 4811 *bufJ++ = cols[l]; 4812 } 4813 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4814 } 4815 } 4816 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4817 } 4818 4819 /* recvs and sends of j-array are completed */ 4820 i = nrecvs; 4821 while (i--) { 4822 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4823 } 4824 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4825 } else if (scall == MAT_REUSE_MATRIX){ 4826 sstartsj = *startsj; 4827 rstartsj = sstartsj + nsends +1; 4828 bufa = *bufa_ptr; 4829 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4830 b_otha = b_oth->a; 4831 } else { 4832 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4833 } 4834 4835 /* a-array */ 4836 /*---------*/ 4837 /* post receives of a-array */ 4838 for (i=0; i<nrecvs; i++){ 4839 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4840 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4841 } 4842 4843 /* pack the outgoing message a-array */ 4844 k = 0; 4845 for (i=0; i<nsends; i++){ 4846 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4847 bufA = bufa+sstartsj[i]; 4848 for (j=0; j<nrows; j++) { 4849 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4850 for (ll=0; ll<sbs; ll++){ 4851 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4852 for (l=0; l<ncols; l++){ 4853 *bufA++ = vals[l]; 4854 } 4855 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4856 } 4857 } 4858 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4859 } 4860 /* recvs and sends of a-array are completed */ 4861 i = nrecvs; 4862 while (i--) { 4863 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4864 } 4865 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4866 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4867 4868 if (scall == MAT_INITIAL_MATRIX){ 4869 /* put together the new matrix */ 4870 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4871 4872 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4873 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4874 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4875 b_oth->free_a = PETSC_TRUE; 4876 b_oth->free_ij = PETSC_TRUE; 4877 b_oth->nonew = 0; 4878 4879 ierr = PetscFree(bufj);CHKERRQ(ierr); 4880 if (!startsj || !bufa_ptr){ 4881 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4882 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4883 } else { 4884 *startsj = sstartsj; 4885 *bufa_ptr = bufa; 4886 } 4887 } 4888 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4889 PetscFunctionReturn(0); 4890 } 4891 4892 #undef __FUNCT__ 4893 #define __FUNCT__ "MatGetCommunicationStructs" 4894 /*@C 4895 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4896 4897 Not Collective 4898 4899 Input Parameters: 4900 . A - The matrix in mpiaij format 4901 4902 Output Parameter: 4903 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4904 . colmap - A map from global column index to local index into lvec 4905 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4906 4907 Level: developer 4908 4909 @*/ 4910 #if defined (PETSC_USE_CTABLE) 4911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4912 #else 4913 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4914 #endif 4915 { 4916 Mat_MPIAIJ *a; 4917 4918 PetscFunctionBegin; 4919 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4920 PetscValidPointer(lvec, 2) 4921 PetscValidPointer(colmap, 3) 4922 PetscValidPointer(multScatter, 4) 4923 a = (Mat_MPIAIJ *) A->data; 4924 if (lvec) *lvec = a->lvec; 4925 if (colmap) *colmap = a->colmap; 4926 if (multScatter) *multScatter = a->Mvctx; 4927 PetscFunctionReturn(0); 4928 } 4929 4930 EXTERN_C_BEGIN 4931 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4932 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4933 EXTERN_C_END 4934 4935 #undef __FUNCT__ 4936 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4937 /* 4938 Computes (B'*A')' since computing B*A directly is untenable 4939 4940 n p p 4941 ( ) ( ) ( ) 4942 m ( A ) * n ( B ) = m ( C ) 4943 ( ) ( ) ( ) 4944 4945 */ 4946 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4947 { 4948 PetscErrorCode ierr; 4949 Mat At,Bt,Ct; 4950 4951 PetscFunctionBegin; 4952 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4953 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4954 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4955 ierr = MatDestroy(At);CHKERRQ(ierr); 4956 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4957 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4958 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4959 PetscFunctionReturn(0); 4960 } 4961 4962 #undef __FUNCT__ 4963 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4964 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4965 { 4966 PetscErrorCode ierr; 4967 PetscInt m=A->rmap->n,n=B->cmap->n; 4968 Mat Cmat; 4969 4970 PetscFunctionBegin; 4971 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4972 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4973 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4974 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4975 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4976 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4977 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4978 *C = Cmat; 4979 PetscFunctionReturn(0); 4980 } 4981 4982 /* ----------------------------------------------------------------*/ 4983 #undef __FUNCT__ 4984 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4985 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4986 { 4987 PetscErrorCode ierr; 4988 4989 PetscFunctionBegin; 4990 if (scall == MAT_INITIAL_MATRIX){ 4991 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4992 } 4993 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4994 PetscFunctionReturn(0); 4995 } 4996 4997 EXTERN_C_BEGIN 4998 #if defined(PETSC_HAVE_MUMPS) 4999 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 5000 #endif 5001 #if defined(PETSC_HAVE_PASTIX) 5002 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5003 #endif 5004 #if defined(PETSC_HAVE_SUPERLU_DIST) 5005 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5006 #endif 5007 #if defined(PETSC_HAVE_SPOOLES) 5008 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5009 #endif 5010 EXTERN_C_END 5011 5012 /*MC 5013 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5014 5015 Options Database Keys: 5016 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5017 5018 Level: beginner 5019 5020 .seealso: MatCreateMPIAIJ() 5021 M*/ 5022 5023 EXTERN_C_BEGIN 5024 #undef __FUNCT__ 5025 #define __FUNCT__ "MatCreate_MPIAIJ" 5026 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5027 { 5028 Mat_MPIAIJ *b; 5029 PetscErrorCode ierr; 5030 PetscMPIInt size; 5031 5032 PetscFunctionBegin; 5033 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5034 5035 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5036 B->data = (void*)b; 5037 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5038 B->rmap->bs = 1; 5039 B->assembled = PETSC_FALSE; 5040 B->mapping = 0; 5041 5042 B->insertmode = NOT_SET_VALUES; 5043 b->size = size; 5044 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5045 5046 /* build cache for off array entries formed */ 5047 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5048 b->donotstash = PETSC_FALSE; 5049 b->colmap = 0; 5050 b->garray = 0; 5051 b->roworiented = PETSC_TRUE; 5052 5053 /* stuff used for matrix vector multiply */ 5054 b->lvec = PETSC_NULL; 5055 b->Mvctx = PETSC_NULL; 5056 5057 /* stuff for MatGetRow() */ 5058 b->rowindices = 0; 5059 b->rowvalues = 0; 5060 b->getrowactive = PETSC_FALSE; 5061 5062 #if defined(PETSC_HAVE_SPOOLES) 5063 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5064 "MatGetFactor_mpiaij_spooles", 5065 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5066 #endif 5067 #if defined(PETSC_HAVE_MUMPS) 5068 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5069 "MatGetFactor_mpiaij_mumps", 5070 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5071 #endif 5072 #if defined(PETSC_HAVE_PASTIX) 5073 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5074 "MatGetFactor_mpiaij_pastix", 5075 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5076 #endif 5077 #if defined(PETSC_HAVE_SUPERLU_DIST) 5078 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5079 "MatGetFactor_mpiaij_superlu_dist", 5080 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5081 #endif 5082 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5083 "MatStoreValues_MPIAIJ", 5084 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5085 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5086 "MatRetrieveValues_MPIAIJ", 5087 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5088 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5089 "MatGetDiagonalBlock_MPIAIJ", 5090 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5091 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5092 "MatIsTranspose_MPIAIJ", 5093 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5094 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5095 "MatMPIAIJSetPreallocation_MPIAIJ", 5096 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5098 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5099 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5100 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5101 "MatDiagonalScaleLocal_MPIAIJ", 5102 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5104 "MatConvert_MPIAIJ_MPICSRPERM", 5105 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5106 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5107 "MatConvert_MPIAIJ_MPICRL", 5108 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5110 "MatConvert_MPIAIJ_MPISBAIJ", 5111 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5112 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5113 "MatMatMult_MPIDense_MPIAIJ", 5114 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5115 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5116 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5117 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5118 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5119 "MatMatMultNumeric_MPIDense_MPIAIJ", 5120 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5121 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5122 PetscFunctionReturn(0); 5123 } 5124 EXTERN_C_END 5125 5126 #undef __FUNCT__ 5127 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5128 /*@ 5129 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5130 and "off-diagonal" part of the matrix in CSR format. 5131 5132 Collective on MPI_Comm 5133 5134 Input Parameters: 5135 + comm - MPI communicator 5136 . m - number of local rows (Cannot be PETSC_DECIDE) 5137 . n - This value should be the same as the local size used in creating the 5138 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5139 calculated if N is given) For square matrices n is almost always m. 5140 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5141 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5142 . i - row indices for "diagonal" portion of matrix 5143 . j - column indices 5144 . a - matrix values 5145 . oi - row indices for "off-diagonal" portion of matrix 5146 . oj - column indices 5147 - oa - matrix values 5148 5149 Output Parameter: 5150 . mat - the matrix 5151 5152 Level: advanced 5153 5154 Notes: 5155 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5156 5157 The i and j indices are 0 based 5158 5159 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5160 5161 This sets local rows and cannot be used to set off-processor values. 5162 5163 You cannot later use MatSetValues() to change values in this matrix. 5164 5165 .keywords: matrix, aij, compressed row, sparse, parallel 5166 5167 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5168 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5169 @*/ 5170 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5171 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5172 { 5173 PetscErrorCode ierr; 5174 Mat_MPIAIJ *maij; 5175 5176 PetscFunctionBegin; 5177 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5178 if (i[0]) { 5179 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5180 } 5181 if (oi[0]) { 5182 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5183 } 5184 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5185 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5186 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5187 maij = (Mat_MPIAIJ*) (*mat)->data; 5188 maij->donotstash = PETSC_TRUE; 5189 (*mat)->preallocated = PETSC_TRUE; 5190 5191 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5192 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5193 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5194 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5195 5196 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5197 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5198 5199 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5200 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5201 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5202 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5203 5204 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5205 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5206 PetscFunctionReturn(0); 5207 } 5208 5209 /* 5210 Special version for direct calls from Fortran 5211 */ 5212 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5213 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5214 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5215 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5216 #endif 5217 5218 /* Change these macros so can be used in void function */ 5219 #undef CHKERRQ 5220 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5221 #undef SETERRQ2 5222 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5223 #undef SETERRQ 5224 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5225 5226 EXTERN_C_BEGIN 5227 #undef __FUNCT__ 5228 #define __FUNCT__ "matsetvaluesmpiaij_" 5229 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5230 { 5231 Mat mat = *mmat; 5232 PetscInt m = *mm, n = *mn; 5233 InsertMode addv = *maddv; 5234 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5235 PetscScalar value; 5236 PetscErrorCode ierr; 5237 5238 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5239 if (mat->insertmode == NOT_SET_VALUES) { 5240 mat->insertmode = addv; 5241 } 5242 #if defined(PETSC_USE_DEBUG) 5243 else if (mat->insertmode != addv) { 5244 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5245 } 5246 #endif 5247 { 5248 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5249 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5250 PetscTruth roworiented = aij->roworiented; 5251 5252 /* Some Variables required in the macro */ 5253 Mat A = aij->A; 5254 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5255 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5256 MatScalar *aa = a->a; 5257 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5258 Mat B = aij->B; 5259 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5260 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5261 MatScalar *ba = b->a; 5262 5263 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5264 PetscInt nonew = a->nonew; 5265 MatScalar *ap1,*ap2; 5266 5267 PetscFunctionBegin; 5268 for (i=0; i<m; i++) { 5269 if (im[i] < 0) continue; 5270 #if defined(PETSC_USE_DEBUG) 5271 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5272 #endif 5273 if (im[i] >= rstart && im[i] < rend) { 5274 row = im[i] - rstart; 5275 lastcol1 = -1; 5276 rp1 = aj + ai[row]; 5277 ap1 = aa + ai[row]; 5278 rmax1 = aimax[row]; 5279 nrow1 = ailen[row]; 5280 low1 = 0; 5281 high1 = nrow1; 5282 lastcol2 = -1; 5283 rp2 = bj + bi[row]; 5284 ap2 = ba + bi[row]; 5285 rmax2 = bimax[row]; 5286 nrow2 = bilen[row]; 5287 low2 = 0; 5288 high2 = nrow2; 5289 5290 for (j=0; j<n; j++) { 5291 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5292 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5293 if (in[j] >= cstart && in[j] < cend){ 5294 col = in[j] - cstart; 5295 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5296 } else if (in[j] < 0) continue; 5297 #if defined(PETSC_USE_DEBUG) 5298 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5299 #endif 5300 else { 5301 if (mat->was_assembled) { 5302 if (!aij->colmap) { 5303 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5304 } 5305 #if defined (PETSC_USE_CTABLE) 5306 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5307 col--; 5308 #else 5309 col = aij->colmap[in[j]] - 1; 5310 #endif 5311 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5312 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5313 col = in[j]; 5314 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5315 B = aij->B; 5316 b = (Mat_SeqAIJ*)B->data; 5317 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5318 rp2 = bj + bi[row]; 5319 ap2 = ba + bi[row]; 5320 rmax2 = bimax[row]; 5321 nrow2 = bilen[row]; 5322 low2 = 0; 5323 high2 = nrow2; 5324 bm = aij->B->rmap->n; 5325 ba = b->a; 5326 } 5327 } else col = in[j]; 5328 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5329 } 5330 } 5331 } else { 5332 if (!aij->donotstash) { 5333 if (roworiented) { 5334 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5335 } else { 5336 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5337 } 5338 } 5339 } 5340 }} 5341 PetscFunctionReturnVoid(); 5342 } 5343 EXTERN_C_END 5344 5345