1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 if (v) PetscValidScalarPointer(v,6); 331 for (i=0; i<m; i++) { 332 if (im[i] < 0) continue; 333 #if defined(PETSC_USE_DEBUG) 334 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 335 #endif 336 if (im[i] >= rstart && im[i] < rend) { 337 row = im[i] - rstart; 338 lastcol1 = -1; 339 rp1 = aj + ai[row]; 340 ap1 = aa + ai[row]; 341 rmax1 = aimax[row]; 342 nrow1 = ailen[row]; 343 low1 = 0; 344 high1 = nrow1; 345 lastcol2 = -1; 346 rp2 = bj + bi[row]; 347 ap2 = ba + bi[row]; 348 rmax2 = bimax[row]; 349 nrow2 = bilen[row]; 350 low2 = 0; 351 high2 = nrow2; 352 353 for (j=0; j<n; j++) { 354 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 355 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 356 if (in[j] >= cstart && in[j] < cend){ 357 col = in[j] - cstart; 358 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 359 } else if (in[j] < 0) continue; 360 #if defined(PETSC_USE_DEBUG) 361 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 362 #endif 363 else { 364 if (mat->was_assembled) { 365 if (!aij->colmap) { 366 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 367 } 368 #if defined (PETSC_USE_CTABLE) 369 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 370 col--; 371 #else 372 col = aij->colmap[in[j]] - 1; 373 #endif 374 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 375 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 376 col = in[j]; 377 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 378 B = aij->B; 379 b = (Mat_SeqAIJ*)B->data; 380 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 381 rp2 = bj + bi[row]; 382 ap2 = ba + bi[row]; 383 rmax2 = bimax[row]; 384 nrow2 = bilen[row]; 385 low2 = 0; 386 high2 = nrow2; 387 bm = aij->B->rmap->n; 388 ba = b->a; 389 } 390 } else col = in[j]; 391 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 392 } 393 } 394 } else { 395 if (!aij->donotstash) { 396 if (roworiented) { 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } else { 399 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 400 } 401 } 402 } 403 } 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatGetValues_MPIAIJ" 409 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 414 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 415 416 PetscFunctionBegin; 417 for (i=0; i<m; i++) { 418 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 419 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 420 if (idxm[i] >= rstart && idxm[i] < rend) { 421 row = idxm[i] - rstart; 422 for (j=0; j<n; j++) { 423 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 424 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 425 if (idxn[j] >= cstart && idxn[j] < cend){ 426 col = idxn[j] - cstart; 427 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 428 } else { 429 if (!aij->colmap) { 430 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 431 } 432 #if defined (PETSC_USE_CTABLE) 433 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 434 col --; 435 #else 436 col = aij->colmap[idxn[j]] - 1; 437 #endif 438 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 439 else { 440 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 441 } 442 } 443 } 444 } else { 445 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 446 } 447 } 448 PetscFunctionReturn(0); 449 } 450 451 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 455 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 456 { 457 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 458 PetscErrorCode ierr; 459 PetscInt nstash,reallocs; 460 InsertMode addv; 461 462 PetscFunctionBegin; 463 if (aij->donotstash) { 464 PetscFunctionReturn(0); 465 } 466 467 /* make sure all processors are either in INSERTMODE or ADDMODE */ 468 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 469 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 mat->insertmode = addv; /* in case this processor had no cache */ 471 472 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 473 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 474 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 481 { 482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 483 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 484 PetscErrorCode ierr; 485 PetscMPIInt n; 486 PetscInt i,j,rstart,ncols,flg; 487 PetscInt *row,*col; 488 PetscTruth other_disassembled; 489 PetscScalar *val; 490 InsertMode addv = mat->insertmode; 491 492 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 493 PetscFunctionBegin; 494 if (!aij->donotstash) { 495 while (1) { 496 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 497 if (!flg) break; 498 499 for (i=0; i<n;) { 500 /* Now identify the consecutive vals belonging to the same row */ 501 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 502 if (j < n) ncols = j-i; 503 else ncols = n-i; 504 /* Now assemble all these values with a single function call */ 505 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 506 i = j; 507 } 508 } 509 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 510 } 511 a->compressedrow.use = PETSC_FALSE; 512 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 513 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 514 515 /* determine if any processor has disassembled, if so we must 516 also disassemble ourselfs, in order that we may reassemble. */ 517 /* 518 if nonzero structure of submatrix B cannot change then we know that 519 no processor disassembled thus we can skip this stuff 520 */ 521 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 522 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 523 if (mat->was_assembled && !other_disassembled) { 524 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 525 } 526 } 527 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 528 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 529 } 530 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 531 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 532 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 533 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 534 535 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 536 aij->rowvalues = 0; 537 538 /* used by MatAXPY() */ 539 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 540 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 541 542 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 543 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 550 { 551 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 556 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatZeroRows_MPIAIJ" 562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 563 { 564 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 567 PetscInt i,*owners = A->rmap->range; 568 PetscInt *nprocs,j,idx,nsends,row; 569 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 570 PetscInt *rvalues,count,base,slen,*source; 571 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 572 MPI_Comm comm = ((PetscObject)A)->comm; 573 MPI_Request *send_waits,*recv_waits; 574 MPI_Status recv_status,*send_status; 575 #if defined(PETSC_DEBUG) 576 PetscTruth found = PETSC_FALSE; 577 #endif 578 579 PetscFunctionBegin; 580 /* first count number of contributors to each processor */ 581 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 582 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 583 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 584 j = 0; 585 for (i=0; i<N; i++) { 586 if (lastidx > (idx = rows[i])) j = 0; 587 lastidx = idx; 588 for (; j<size; j++) { 589 if (idx >= owners[j] && idx < owners[j+1]) { 590 nprocs[2*j]++; 591 nprocs[2*j+1] = 1; 592 owner[i] = j; 593 #if defined(PETSC_DEBUG) 594 found = PETSC_TRUE; 595 #endif 596 break; 597 } 598 } 599 #if defined(PETSC_DEBUG) 600 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 601 found = PETSC_FALSE; 602 #endif 603 } 604 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 605 606 /* inform other processors of number of messages and max length*/ 607 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 608 609 /* post receives: */ 610 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 611 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 612 for (i=0; i<nrecvs; i++) { 613 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 614 } 615 616 /* do sends: 617 1) starts[i] gives the starting index in svalues for stuff going to 618 the ith processor 619 */ 620 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 621 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 622 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 623 starts[0] = 0; 624 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 625 for (i=0; i<N; i++) { 626 svalues[starts[owner[i]]++] = rows[i]; 627 } 628 629 starts[0] = 0; 630 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 631 count = 0; 632 for (i=0; i<size; i++) { 633 if (nprocs[2*i+1]) { 634 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 635 } 636 } 637 ierr = PetscFree(starts);CHKERRQ(ierr); 638 639 base = owners[rank]; 640 641 /* wait on receives */ 642 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 643 count = nrecvs; slen = 0; 644 while (count) { 645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 646 /* unpack receives into our local space */ 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 648 source[imdex] = recv_status.MPI_SOURCE; 649 lens[imdex] = n; 650 slen += n; 651 count--; 652 } 653 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 654 655 /* move the data into the send scatter */ 656 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 657 count = 0; 658 for (i=0; i<nrecvs; i++) { 659 values = rvalues + i*nmax; 660 for (j=0; j<lens[i]; j++) { 661 lrows[count++] = values[j] - base; 662 } 663 } 664 ierr = PetscFree(rvalues);CHKERRQ(ierr); 665 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 666 ierr = PetscFree(owner);CHKERRQ(ierr); 667 ierr = PetscFree(nprocs);CHKERRQ(ierr); 668 669 /* actually zap the local rows */ 670 /* 671 Zero the required rows. If the "diagonal block" of the matrix 672 is square and the user wishes to set the diagonal we use separate 673 code so that MatSetValues() is not called for each diagonal allocating 674 new memory, thus calling lots of mallocs and slowing things down. 675 676 */ 677 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 678 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 679 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 680 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 681 } else if (diag != 0.0) { 682 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 683 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 684 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 685 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 686 } 687 for (i = 0; i < slen; i++) { 688 row = lrows[i] + rstart; 689 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 690 } 691 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 692 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 } else { 694 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 695 } 696 ierr = PetscFree(lrows);CHKERRQ(ierr); 697 698 /* wait on sends */ 699 if (nsends) { 700 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 701 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 702 ierr = PetscFree(send_status);CHKERRQ(ierr); 703 } 704 ierr = PetscFree(send_waits);CHKERRQ(ierr); 705 ierr = PetscFree(svalues);CHKERRQ(ierr); 706 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatMult_MPIAIJ" 712 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 713 { 714 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 715 PetscErrorCode ierr; 716 PetscInt nt; 717 718 PetscFunctionBegin; 719 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 720 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 721 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 722 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 723 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 724 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 730 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 731 { 732 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 733 PetscErrorCode ierr; 734 735 PetscFunctionBegin; 736 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatMultAdd_MPIAIJ" 742 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 743 { 744 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 745 PetscErrorCode ierr; 746 747 PetscFunctionBegin; 748 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 749 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 750 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 751 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 752 PetscFunctionReturn(0); 753 } 754 755 #undef __FUNCT__ 756 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 757 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 758 { 759 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 760 PetscErrorCode ierr; 761 PetscTruth merged; 762 763 PetscFunctionBegin; 764 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 765 /* do nondiagonal part */ 766 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 767 if (!merged) { 768 /* send it on its way */ 769 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 770 /* do local part */ 771 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 772 /* receive remote parts: note this assumes the values are not actually */ 773 /* added in yy until the next line, */ 774 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 775 } else { 776 /* do local part */ 777 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 778 /* send it on its way */ 779 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 /* values actually were received in the Begin() but we need to call this nop */ 781 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 782 } 783 PetscFunctionReturn(0); 784 } 785 786 EXTERN_C_BEGIN 787 #undef __FUNCT__ 788 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 789 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 790 { 791 MPI_Comm comm; 792 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 793 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 794 IS Me,Notme; 795 PetscErrorCode ierr; 796 PetscInt M,N,first,last,*notme,i; 797 PetscMPIInt size; 798 799 PetscFunctionBegin; 800 801 /* Easy test: symmetric diagonal block */ 802 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 803 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 804 if (!*f) PetscFunctionReturn(0); 805 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 806 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 807 if (size == 1) PetscFunctionReturn(0); 808 809 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 810 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 811 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 812 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 813 for (i=0; i<first; i++) notme[i] = i; 814 for (i=last; i<M; i++) notme[i-last+first] = i; 815 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 816 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 817 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 818 Aoff = Aoffs[0]; 819 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 820 Boff = Boffs[0]; 821 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 822 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 823 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 824 ierr = ISDestroy(Me);CHKERRQ(ierr); 825 ierr = ISDestroy(Notme);CHKERRQ(ierr); 826 827 PetscFunctionReturn(0); 828 } 829 EXTERN_C_END 830 831 #undef __FUNCT__ 832 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 833 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 834 { 835 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 836 PetscErrorCode ierr; 837 838 PetscFunctionBegin; 839 /* do nondiagonal part */ 840 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 841 /* send it on its way */ 842 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 843 /* do local part */ 844 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 845 /* receive remote parts */ 846 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 847 PetscFunctionReturn(0); 848 } 849 850 /* 851 This only works correctly for square matrices where the subblock A->A is the 852 diagonal block 853 */ 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 856 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 857 { 858 PetscErrorCode ierr; 859 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 860 861 PetscFunctionBegin; 862 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 863 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 864 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatScale_MPIAIJ" 870 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 871 { 872 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 873 PetscErrorCode ierr; 874 875 PetscFunctionBegin; 876 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 877 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 878 PetscFunctionReturn(0); 879 } 880 881 #undef __FUNCT__ 882 #define __FUNCT__ "MatDestroy_MPIAIJ" 883 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 884 { 885 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 886 PetscErrorCode ierr; 887 888 PetscFunctionBegin; 889 #if defined(PETSC_USE_LOG) 890 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 891 #endif 892 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 893 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 894 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 895 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 896 #if defined (PETSC_USE_CTABLE) 897 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 898 #else 899 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 900 #endif 901 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 902 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 903 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 904 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 905 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 906 ierr = PetscFree(aij);CHKERRQ(ierr); 907 908 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 917 PetscFunctionReturn(0); 918 } 919 920 #undef __FUNCT__ 921 #define __FUNCT__ "MatView_MPIAIJ_Binary" 922 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 923 { 924 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 925 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 926 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 927 PetscErrorCode ierr; 928 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 929 int fd; 930 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 931 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 932 PetscScalar *column_values; 933 934 PetscFunctionBegin; 935 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 936 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 937 nz = A->nz + B->nz; 938 if (!rank) { 939 header[0] = MAT_FILE_CLASSID; 940 header[1] = mat->rmap->N; 941 header[2] = mat->cmap->N; 942 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 943 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 944 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 945 /* get largest number of rows any processor has */ 946 rlen = mat->rmap->n; 947 range = mat->rmap->range; 948 for (i=1; i<size; i++) { 949 rlen = PetscMax(rlen,range[i+1] - range[i]); 950 } 951 } else { 952 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 953 rlen = mat->rmap->n; 954 } 955 956 /* load up the local row counts */ 957 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 958 for (i=0; i<mat->rmap->n; i++) { 959 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 960 } 961 962 /* store the row lengths to the file */ 963 if (!rank) { 964 MPI_Status status; 965 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 966 for (i=1; i<size; i++) { 967 rlen = range[i+1] - range[i]; 968 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 969 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 970 } 971 } else { 972 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 973 } 974 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 975 976 /* load up the local column indices */ 977 nzmax = nz; /* )th processor needs space a largest processor needs */ 978 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 979 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 980 cnt = 0; 981 for (i=0; i<mat->rmap->n; i++) { 982 for (j=B->i[i]; j<B->i[i+1]; j++) { 983 if ( (col = garray[B->j[j]]) > cstart) break; 984 column_indices[cnt++] = col; 985 } 986 for (k=A->i[i]; k<A->i[i+1]; k++) { 987 column_indices[cnt++] = A->j[k] + cstart; 988 } 989 for (; j<B->i[i+1]; j++) { 990 column_indices[cnt++] = garray[B->j[j]]; 991 } 992 } 993 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 994 995 /* store the column indices to the file */ 996 if (!rank) { 997 MPI_Status status; 998 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 999 for (i=1; i<size; i++) { 1000 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1001 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1002 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1003 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1004 } 1005 } else { 1006 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1007 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1008 } 1009 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1010 1011 /* load up the local column values */ 1012 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1013 cnt = 0; 1014 for (i=0; i<mat->rmap->n; i++) { 1015 for (j=B->i[i]; j<B->i[i+1]; j++) { 1016 if ( garray[B->j[j]] > cstart) break; 1017 column_values[cnt++] = B->a[j]; 1018 } 1019 for (k=A->i[i]; k<A->i[i+1]; k++) { 1020 column_values[cnt++] = A->a[k]; 1021 } 1022 for (; j<B->i[i+1]; j++) { 1023 column_values[cnt++] = B->a[j]; 1024 } 1025 } 1026 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1027 1028 /* store the column values to the file */ 1029 if (!rank) { 1030 MPI_Status status; 1031 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1032 for (i=1; i<size; i++) { 1033 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1034 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1035 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1036 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1037 } 1038 } else { 1039 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1040 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1041 } 1042 ierr = PetscFree(column_values);CHKERRQ(ierr); 1043 PetscFunctionReturn(0); 1044 } 1045 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1048 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1049 { 1050 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1051 PetscErrorCode ierr; 1052 PetscMPIInt rank = aij->rank,size = aij->size; 1053 PetscTruth isdraw,iascii,isbinary; 1054 PetscViewer sviewer; 1055 PetscViewerFormat format; 1056 1057 PetscFunctionBegin; 1058 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1059 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1060 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1061 if (iascii) { 1062 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1063 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1064 MatInfo info; 1065 PetscTruth inodes; 1066 1067 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1068 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1069 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1070 if (!inodes) { 1071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1072 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1073 } else { 1074 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1075 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1076 } 1077 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1079 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1081 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1082 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1083 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1086 PetscInt inodecount,inodelimit,*inodes; 1087 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1088 if (inodes) { 1089 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1090 } else { 1091 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1092 } 1093 PetscFunctionReturn(0); 1094 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1095 PetscFunctionReturn(0); 1096 } 1097 } else if (isbinary) { 1098 if (size == 1) { 1099 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1100 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1101 } else { 1102 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1103 } 1104 PetscFunctionReturn(0); 1105 } else if (isdraw) { 1106 PetscDraw draw; 1107 PetscTruth isnull; 1108 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1109 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1110 } 1111 1112 if (size == 1) { 1113 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1114 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1115 } else { 1116 /* assemble the entire matrix onto first processor. */ 1117 Mat A; 1118 Mat_SeqAIJ *Aloc; 1119 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1120 MatScalar *a; 1121 1122 if (mat->rmap->N > 1024) { 1123 PetscTruth flg = PETSC_FALSE; 1124 1125 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1126 if (!flg) { 1127 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1128 } 1129 } 1130 1131 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1132 if (!rank) { 1133 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1134 } else { 1135 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1136 } 1137 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1138 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1139 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1140 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1141 1142 /* copy over the A part */ 1143 Aloc = (Mat_SeqAIJ*)aij->A->data; 1144 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1145 row = mat->rmap->rstart; 1146 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1147 for (i=0; i<m; i++) { 1148 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1149 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1150 } 1151 aj = Aloc->j; 1152 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1153 1154 /* copy over the B part */ 1155 Aloc = (Mat_SeqAIJ*)aij->B->data; 1156 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1157 row = mat->rmap->rstart; 1158 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1159 ct = cols; 1160 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1161 for (i=0; i<m; i++) { 1162 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1163 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1164 } 1165 ierr = PetscFree(ct);CHKERRQ(ierr); 1166 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1167 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1168 /* 1169 Everyone has to call to draw the matrix since the graphics waits are 1170 synchronized across all processors that share the PetscDraw object 1171 */ 1172 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1173 if (!rank) { 1174 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1175 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1176 } 1177 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 ierr = MatDestroy(A);CHKERRQ(ierr); 1179 } 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatView_MPIAIJ" 1185 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1186 { 1187 PetscErrorCode ierr; 1188 PetscTruth iascii,isdraw,issocket,isbinary; 1189 1190 PetscFunctionBegin; 1191 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1192 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1193 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1194 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1195 if (iascii || isdraw || isbinary || issocket) { 1196 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1197 } else { 1198 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1199 } 1200 PetscFunctionReturn(0); 1201 } 1202 1203 #undef __FUNCT__ 1204 #define __FUNCT__ "MatSOR_MPIAIJ" 1205 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1206 { 1207 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1208 PetscErrorCode ierr; 1209 Vec bb1 = 0; 1210 PetscTruth hasop; 1211 1212 PetscFunctionBegin; 1213 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1214 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1215 } 1216 1217 if (flag == SOR_APPLY_UPPER) { 1218 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1223 if (flag & SOR_ZERO_INITIAL_GUESS) { 1224 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1225 its--; 1226 } 1227 1228 while (its--) { 1229 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1231 1232 /* update rhs: bb1 = bb - B*x */ 1233 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1234 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1235 1236 /* local sweep */ 1237 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1238 } 1239 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1240 if (flag & SOR_ZERO_INITIAL_GUESS) { 1241 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1242 its--; 1243 } 1244 while (its--) { 1245 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1247 1248 /* update rhs: bb1 = bb - B*x */ 1249 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1250 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1251 1252 /* local sweep */ 1253 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1254 } 1255 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1256 if (flag & SOR_ZERO_INITIAL_GUESS) { 1257 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1258 its--; 1259 } 1260 while (its--) { 1261 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1262 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 1264 /* update rhs: bb1 = bb - B*x */ 1265 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1266 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1267 1268 /* local sweep */ 1269 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1270 } 1271 } else if (flag & SOR_EISENSTAT) { 1272 Vec xx1; 1273 1274 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1275 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1276 1277 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1278 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1279 if (!mat->diag) { 1280 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1281 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1282 } 1283 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1284 if (hasop) { 1285 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1286 } else { 1287 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1288 } 1289 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1290 1291 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1292 1293 /* local sweep */ 1294 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1295 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1296 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1297 } else { 1298 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1299 } 1300 1301 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatPermute_MPIAIJ" 1307 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1308 { 1309 MPI_Comm comm,pcomm; 1310 PetscInt first,local_size,nrows; 1311 const PetscInt *rows; 1312 PetscMPIInt size; 1313 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1314 PetscErrorCode ierr; 1315 1316 PetscFunctionBegin; 1317 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1318 /* make a collective version of 'rowp' */ 1319 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1320 if (pcomm==comm) { 1321 crowp = rowp; 1322 } else { 1323 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1324 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1325 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1326 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1327 } 1328 /* collect the global row permutation and invert it */ 1329 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1330 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1331 if (pcomm!=comm) { 1332 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1333 } 1334 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1335 /* get the local target indices */ 1336 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1337 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1338 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1339 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1340 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1341 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1342 /* the column permutation is so much easier; 1343 make a local version of 'colp' and invert it */ 1344 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1345 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1346 if (size==1) { 1347 lcolp = colp; 1348 } else { 1349 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1350 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1351 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1352 } 1353 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1354 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1355 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1356 if (size>1) { 1357 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1358 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1359 } 1360 /* now we just get the submatrix */ 1361 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1362 /* clean up */ 1363 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1364 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1370 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1371 { 1372 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1373 Mat A = mat->A,B = mat->B; 1374 PetscErrorCode ierr; 1375 PetscReal isend[5],irecv[5]; 1376 1377 PetscFunctionBegin; 1378 info->block_size = 1.0; 1379 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1380 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1381 isend[3] = info->memory; isend[4] = info->mallocs; 1382 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1383 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1384 isend[3] += info->memory; isend[4] += info->mallocs; 1385 if (flag == MAT_LOCAL) { 1386 info->nz_used = isend[0]; 1387 info->nz_allocated = isend[1]; 1388 info->nz_unneeded = isend[2]; 1389 info->memory = isend[3]; 1390 info->mallocs = isend[4]; 1391 } else if (flag == MAT_GLOBAL_MAX) { 1392 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1393 info->nz_used = irecv[0]; 1394 info->nz_allocated = irecv[1]; 1395 info->nz_unneeded = irecv[2]; 1396 info->memory = irecv[3]; 1397 info->mallocs = irecv[4]; 1398 } else if (flag == MAT_GLOBAL_SUM) { 1399 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1400 info->nz_used = irecv[0]; 1401 info->nz_allocated = irecv[1]; 1402 info->nz_unneeded = irecv[2]; 1403 info->memory = irecv[3]; 1404 info->mallocs = irecv[4]; 1405 } 1406 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1407 info->fill_ratio_needed = 0; 1408 info->factor_mallocs = 0; 1409 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatSetOption_MPIAIJ" 1415 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1416 { 1417 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1418 PetscErrorCode ierr; 1419 1420 PetscFunctionBegin; 1421 switch (op) { 1422 case MAT_NEW_NONZERO_LOCATIONS: 1423 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1424 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1425 case MAT_KEEP_NONZERO_PATTERN: 1426 case MAT_NEW_NONZERO_LOCATION_ERR: 1427 case MAT_USE_INODES: 1428 case MAT_IGNORE_ZERO_ENTRIES: 1429 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1430 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1431 break; 1432 case MAT_ROW_ORIENTED: 1433 a->roworiented = flg; 1434 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1435 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1436 break; 1437 case MAT_NEW_DIAGONALS: 1438 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1439 break; 1440 case MAT_IGNORE_OFF_PROC_ENTRIES: 1441 a->donotstash = PETSC_TRUE; 1442 break; 1443 case MAT_SYMMETRIC: 1444 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1445 break; 1446 case MAT_STRUCTURALLY_SYMMETRIC: 1447 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1448 break; 1449 case MAT_HERMITIAN: 1450 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1451 break; 1452 case MAT_SYMMETRY_ETERNAL: 1453 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1454 break; 1455 default: 1456 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1457 } 1458 PetscFunctionReturn(0); 1459 } 1460 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "MatGetRow_MPIAIJ" 1463 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1464 { 1465 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1466 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1467 PetscErrorCode ierr; 1468 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1469 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1470 PetscInt *cmap,*idx_p; 1471 1472 PetscFunctionBegin; 1473 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1474 mat->getrowactive = PETSC_TRUE; 1475 1476 if (!mat->rowvalues && (idx || v)) { 1477 /* 1478 allocate enough space to hold information from the longest row. 1479 */ 1480 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1481 PetscInt max = 1,tmp; 1482 for (i=0; i<matin->rmap->n; i++) { 1483 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1484 if (max < tmp) { max = tmp; } 1485 } 1486 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1487 } 1488 1489 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1490 lrow = row - rstart; 1491 1492 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1493 if (!v) {pvA = 0; pvB = 0;} 1494 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1495 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1496 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1497 nztot = nzA + nzB; 1498 1499 cmap = mat->garray; 1500 if (v || idx) { 1501 if (nztot) { 1502 /* Sort by increasing column numbers, assuming A and B already sorted */ 1503 PetscInt imark = -1; 1504 if (v) { 1505 *v = v_p = mat->rowvalues; 1506 for (i=0; i<nzB; i++) { 1507 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1508 else break; 1509 } 1510 imark = i; 1511 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1512 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1513 } 1514 if (idx) { 1515 *idx = idx_p = mat->rowindices; 1516 if (imark > -1) { 1517 for (i=0; i<imark; i++) { 1518 idx_p[i] = cmap[cworkB[i]]; 1519 } 1520 } else { 1521 for (i=0; i<nzB; i++) { 1522 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1523 else break; 1524 } 1525 imark = i; 1526 } 1527 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1528 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1529 } 1530 } else { 1531 if (idx) *idx = 0; 1532 if (v) *v = 0; 1533 } 1534 } 1535 *nz = nztot; 1536 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1537 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1543 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1544 { 1545 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1546 1547 PetscFunctionBegin; 1548 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1549 aij->getrowactive = PETSC_FALSE; 1550 PetscFunctionReturn(0); 1551 } 1552 1553 #undef __FUNCT__ 1554 #define __FUNCT__ "MatNorm_MPIAIJ" 1555 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1556 { 1557 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1558 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1559 PetscErrorCode ierr; 1560 PetscInt i,j,cstart = mat->cmap->rstart; 1561 PetscReal sum = 0.0; 1562 MatScalar *v; 1563 1564 PetscFunctionBegin; 1565 if (aij->size == 1) { 1566 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1567 } else { 1568 if (type == NORM_FROBENIUS) { 1569 v = amat->a; 1570 for (i=0; i<amat->nz; i++) { 1571 #if defined(PETSC_USE_COMPLEX) 1572 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1573 #else 1574 sum += (*v)*(*v); v++; 1575 #endif 1576 } 1577 v = bmat->a; 1578 for (i=0; i<bmat->nz; i++) { 1579 #if defined(PETSC_USE_COMPLEX) 1580 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1581 #else 1582 sum += (*v)*(*v); v++; 1583 #endif 1584 } 1585 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1586 *norm = sqrt(*norm); 1587 } else if (type == NORM_1) { /* max column norm */ 1588 PetscReal *tmp,*tmp2; 1589 PetscInt *jj,*garray = aij->garray; 1590 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1591 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1592 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1593 *norm = 0.0; 1594 v = amat->a; jj = amat->j; 1595 for (j=0; j<amat->nz; j++) { 1596 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1597 } 1598 v = bmat->a; jj = bmat->j; 1599 for (j=0; j<bmat->nz; j++) { 1600 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1601 } 1602 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1603 for (j=0; j<mat->cmap->N; j++) { 1604 if (tmp2[j] > *norm) *norm = tmp2[j]; 1605 } 1606 ierr = PetscFree(tmp);CHKERRQ(ierr); 1607 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1608 } else if (type == NORM_INFINITY) { /* max row norm */ 1609 PetscReal ntemp = 0.0; 1610 for (j=0; j<aij->A->rmap->n; j++) { 1611 v = amat->a + amat->i[j]; 1612 sum = 0.0; 1613 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1614 sum += PetscAbsScalar(*v); v++; 1615 } 1616 v = bmat->a + bmat->i[j]; 1617 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1618 sum += PetscAbsScalar(*v); v++; 1619 } 1620 if (sum > ntemp) ntemp = sum; 1621 } 1622 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1623 } else { 1624 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1625 } 1626 } 1627 PetscFunctionReturn(0); 1628 } 1629 1630 #undef __FUNCT__ 1631 #define __FUNCT__ "MatTranspose_MPIAIJ" 1632 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1633 { 1634 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1635 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1636 PetscErrorCode ierr; 1637 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1638 PetscInt cstart=A->cmap->rstart,ncol; 1639 Mat B; 1640 MatScalar *array; 1641 1642 PetscFunctionBegin; 1643 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1644 1645 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1646 ai = Aloc->i; aj = Aloc->j; 1647 bi = Bloc->i; bj = Bloc->j; 1648 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1649 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1650 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1651 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1652 for (i=0; i<ai[ma]; i++){ 1653 d_nnz[aj[i]] ++; 1654 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1655 } 1656 1657 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1658 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1659 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1660 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1661 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1662 } else { 1663 B = *matout; 1664 } 1665 1666 /* copy over the A part */ 1667 array = Aloc->a; 1668 row = A->rmap->rstart; 1669 for (i=0; i<ma; i++) { 1670 ncol = ai[i+1]-ai[i]; 1671 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1672 row++; array += ncol; aj += ncol; 1673 } 1674 aj = Aloc->j; 1675 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1676 1677 /* copy over the B part */ 1678 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1679 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1680 array = Bloc->a; 1681 row = A->rmap->rstart; 1682 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1683 cols_tmp = cols; 1684 for (i=0; i<mb; i++) { 1685 ncol = bi[i+1]-bi[i]; 1686 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1687 row++; array += ncol; cols_tmp += ncol; 1688 } 1689 ierr = PetscFree(cols);CHKERRQ(ierr); 1690 1691 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1692 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1693 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1694 *matout = B; 1695 } else { 1696 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1697 } 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1703 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1704 { 1705 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1706 Mat a = aij->A,b = aij->B; 1707 PetscErrorCode ierr; 1708 PetscInt s1,s2,s3; 1709 1710 PetscFunctionBegin; 1711 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1712 if (rr) { 1713 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1714 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1715 /* Overlap communication with computation. */ 1716 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1717 } 1718 if (ll) { 1719 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1720 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1721 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1722 } 1723 /* scale the diagonal block */ 1724 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1725 1726 if (rr) { 1727 /* Do a scatter end and then right scale the off-diagonal block */ 1728 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1729 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1730 } 1731 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1737 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1738 { 1739 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1740 PetscErrorCode ierr; 1741 1742 PetscFunctionBegin; 1743 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1744 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1745 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1746 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1747 PetscFunctionReturn(0); 1748 } 1749 #undef __FUNCT__ 1750 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1751 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1752 { 1753 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1754 PetscErrorCode ierr; 1755 1756 PetscFunctionBegin; 1757 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatEqual_MPIAIJ" 1763 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1764 { 1765 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1766 Mat a,b,c,d; 1767 PetscTruth flg; 1768 PetscErrorCode ierr; 1769 1770 PetscFunctionBegin; 1771 a = matA->A; b = matA->B; 1772 c = matB->A; d = matB->B; 1773 1774 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1775 if (flg) { 1776 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1777 } 1778 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1779 PetscFunctionReturn(0); 1780 } 1781 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatCopy_MPIAIJ" 1784 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1785 { 1786 PetscErrorCode ierr; 1787 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1788 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1789 1790 PetscFunctionBegin; 1791 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1792 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1793 /* because of the column compression in the off-processor part of the matrix a->B, 1794 the number of columns in a->B and b->B may be different, hence we cannot call 1795 the MatCopy() directly on the two parts. If need be, we can provide a more 1796 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1797 then copying the submatrices */ 1798 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1799 } else { 1800 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1801 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1802 } 1803 PetscFunctionReturn(0); 1804 } 1805 1806 #undef __FUNCT__ 1807 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1808 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1809 { 1810 PetscErrorCode ierr; 1811 1812 PetscFunctionBegin; 1813 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1814 PetscFunctionReturn(0); 1815 } 1816 1817 #undef __FUNCT__ 1818 #define __FUNCT__ "MatAXPY_MPIAIJ" 1819 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1820 { 1821 PetscErrorCode ierr; 1822 PetscInt i; 1823 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1824 PetscBLASInt bnz,one=1; 1825 Mat_SeqAIJ *x,*y; 1826 1827 PetscFunctionBegin; 1828 if (str == SAME_NONZERO_PATTERN) { 1829 PetscScalar alpha = a; 1830 x = (Mat_SeqAIJ *)xx->A->data; 1831 y = (Mat_SeqAIJ *)yy->A->data; 1832 bnz = PetscBLASIntCast(x->nz); 1833 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1834 x = (Mat_SeqAIJ *)xx->B->data; 1835 y = (Mat_SeqAIJ *)yy->B->data; 1836 bnz = PetscBLASIntCast(x->nz); 1837 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1838 } else if (str == SUBSET_NONZERO_PATTERN) { 1839 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1840 1841 x = (Mat_SeqAIJ *)xx->B->data; 1842 y = (Mat_SeqAIJ *)yy->B->data; 1843 if (y->xtoy && y->XtoY != xx->B) { 1844 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1845 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1846 } 1847 if (!y->xtoy) { /* get xtoy */ 1848 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1849 y->XtoY = xx->B; 1850 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1851 } 1852 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1853 } else { 1854 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatConjugate_MPIAIJ" 1863 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1864 { 1865 #if defined(PETSC_USE_COMPLEX) 1866 PetscErrorCode ierr; 1867 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1868 1869 PetscFunctionBegin; 1870 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1871 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1872 #else 1873 PetscFunctionBegin; 1874 #endif 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatRealPart_MPIAIJ" 1880 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1881 { 1882 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1883 PetscErrorCode ierr; 1884 1885 PetscFunctionBegin; 1886 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1887 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1888 PetscFunctionReturn(0); 1889 } 1890 1891 #undef __FUNCT__ 1892 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1893 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1894 { 1895 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1896 PetscErrorCode ierr; 1897 1898 PetscFunctionBegin; 1899 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1900 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1901 PetscFunctionReturn(0); 1902 } 1903 1904 #ifdef PETSC_HAVE_PBGL 1905 1906 #include <boost/parallel/mpi/bsp_process_group.hpp> 1907 #include <boost/graph/distributed/ilu_default_graph.hpp> 1908 #include <boost/graph/distributed/ilu_0_block.hpp> 1909 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1910 #include <boost/graph/distributed/petsc/interface.hpp> 1911 #include <boost/multi_array.hpp> 1912 #include <boost/parallel/distributed_property_map->hpp> 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1916 /* 1917 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1918 */ 1919 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1920 { 1921 namespace petsc = boost::distributed::petsc; 1922 1923 namespace graph_dist = boost::graph::distributed; 1924 using boost::graph::distributed::ilu_default::process_group_type; 1925 using boost::graph::ilu_permuted; 1926 1927 PetscTruth row_identity, col_identity; 1928 PetscContainer c; 1929 PetscInt m, n, M, N; 1930 PetscErrorCode ierr; 1931 1932 PetscFunctionBegin; 1933 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1934 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1935 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1936 if (!row_identity || !col_identity) { 1937 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1938 } 1939 1940 process_group_type pg; 1941 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1942 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1943 lgraph_type& level_graph = *lgraph_p; 1944 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1945 1946 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1947 ilu_permuted(level_graph); 1948 1949 /* put together the new matrix */ 1950 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1951 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1952 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1953 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1954 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1955 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1956 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1957 1958 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1959 ierr = PetscContainerSetPointer(c, lgraph_p); 1960 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1961 PetscFunctionReturn(0); 1962 } 1963 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1966 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1967 { 1968 PetscFunctionBegin; 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatSolve_MPIAIJ" 1974 /* 1975 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1976 */ 1977 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1978 { 1979 namespace graph_dist = boost::graph::distributed; 1980 1981 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1982 lgraph_type* lgraph_p; 1983 PetscContainer c; 1984 PetscErrorCode ierr; 1985 1986 PetscFunctionBegin; 1987 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1988 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1989 ierr = VecCopy(b, x);CHKERRQ(ierr); 1990 1991 PetscScalar* array_x; 1992 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1993 PetscInt sx; 1994 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1995 1996 PetscScalar* array_b; 1997 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1998 PetscInt sb; 1999 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2000 2001 lgraph_type& level_graph = *lgraph_p; 2002 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2003 2004 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2005 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2006 ref_x(array_x, boost::extents[num_vertices(graph)]); 2007 2008 typedef boost::iterator_property_map<array_ref_type::iterator, 2009 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2010 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2011 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2012 2013 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2014 2015 PetscFunctionReturn(0); 2016 } 2017 #endif 2018 2019 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2020 PetscInt nzlocal,nsends,nrecvs; 2021 PetscMPIInt *send_rank,*recv_rank; 2022 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2023 PetscScalar *sbuf_a,**rbuf_a; 2024 PetscErrorCode (*MatDestroy)(Mat); 2025 } Mat_Redundant; 2026 2027 #undef __FUNCT__ 2028 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2029 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2030 { 2031 PetscErrorCode ierr; 2032 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2033 PetscInt i; 2034 2035 PetscFunctionBegin; 2036 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2037 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2038 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2039 for (i=0; i<redund->nrecvs; i++){ 2040 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2041 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2042 } 2043 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2044 ierr = PetscFree(redund);CHKERRQ(ierr); 2045 PetscFunctionReturn(0); 2046 } 2047 2048 #undef __FUNCT__ 2049 #define __FUNCT__ "MatDestroy_MatRedundant" 2050 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2051 { 2052 PetscErrorCode ierr; 2053 PetscContainer container; 2054 Mat_Redundant *redund=PETSC_NULL; 2055 2056 PetscFunctionBegin; 2057 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2058 if (container) { 2059 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2060 } else { 2061 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2062 } 2063 A->ops->destroy = redund->MatDestroy; 2064 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2065 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2066 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2067 PetscFunctionReturn(0); 2068 } 2069 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2072 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2073 { 2074 PetscMPIInt rank,size; 2075 MPI_Comm comm=((PetscObject)mat)->comm; 2076 PetscErrorCode ierr; 2077 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2078 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2079 PetscInt *rowrange=mat->rmap->range; 2080 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2081 Mat A=aij->A,B=aij->B,C=*matredundant; 2082 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2083 PetscScalar *sbuf_a; 2084 PetscInt nzlocal=a->nz+b->nz; 2085 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2086 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2087 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2088 MatScalar *aworkA,*aworkB; 2089 PetscScalar *vals; 2090 PetscMPIInt tag1,tag2,tag3,imdex; 2091 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2092 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2093 MPI_Status recv_status,*send_status; 2094 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2095 PetscInt **rbuf_j=PETSC_NULL; 2096 PetscScalar **rbuf_a=PETSC_NULL; 2097 Mat_Redundant *redund=PETSC_NULL; 2098 PetscContainer container; 2099 2100 PetscFunctionBegin; 2101 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2102 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2103 2104 if (reuse == MAT_REUSE_MATRIX) { 2105 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2106 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2107 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2108 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2109 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2110 if (container) { 2111 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2112 } else { 2113 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2114 } 2115 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2116 2117 nsends = redund->nsends; 2118 nrecvs = redund->nrecvs; 2119 send_rank = redund->send_rank; 2120 recv_rank = redund->recv_rank; 2121 sbuf_nz = redund->sbuf_nz; 2122 rbuf_nz = redund->rbuf_nz; 2123 sbuf_j = redund->sbuf_j; 2124 sbuf_a = redund->sbuf_a; 2125 rbuf_j = redund->rbuf_j; 2126 rbuf_a = redund->rbuf_a; 2127 } 2128 2129 if (reuse == MAT_INITIAL_MATRIX){ 2130 PetscMPIInt subrank,subsize; 2131 PetscInt nleftover,np_subcomm; 2132 /* get the destination processors' id send_rank, nsends and nrecvs */ 2133 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2134 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2135 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2136 np_subcomm = size/nsubcomm; 2137 nleftover = size - nsubcomm*np_subcomm; 2138 nsends = 0; nrecvs = 0; 2139 for (i=0; i<size; i++){ /* i=rank*/ 2140 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2141 send_rank[nsends] = i; nsends++; 2142 recv_rank[nrecvs++] = i; 2143 } 2144 } 2145 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2146 i = size-nleftover-1; 2147 j = 0; 2148 while (j < nsubcomm - nleftover){ 2149 send_rank[nsends++] = i; 2150 i--; j++; 2151 } 2152 } 2153 2154 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2155 for (i=0; i<nleftover; i++){ 2156 recv_rank[nrecvs++] = size-nleftover+i; 2157 } 2158 } 2159 2160 /* allocate sbuf_j, sbuf_a */ 2161 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2162 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2163 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2164 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2165 2166 /* copy mat's local entries into the buffers */ 2167 if (reuse == MAT_INITIAL_MATRIX){ 2168 rownz_max = 0; 2169 rptr = sbuf_j; 2170 cols = sbuf_j + rend-rstart + 1; 2171 vals = sbuf_a; 2172 rptr[0] = 0; 2173 for (i=0; i<rend-rstart; i++){ 2174 row = i + rstart; 2175 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2176 ncols = nzA + nzB; 2177 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2178 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2179 /* load the column indices for this row into cols */ 2180 lwrite = 0; 2181 for (l=0; l<nzB; l++) { 2182 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2183 vals[lwrite] = aworkB[l]; 2184 cols[lwrite++] = ctmp; 2185 } 2186 } 2187 for (l=0; l<nzA; l++){ 2188 vals[lwrite] = aworkA[l]; 2189 cols[lwrite++] = cstart + cworkA[l]; 2190 } 2191 for (l=0; l<nzB; l++) { 2192 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2193 vals[lwrite] = aworkB[l]; 2194 cols[lwrite++] = ctmp; 2195 } 2196 } 2197 vals += ncols; 2198 cols += ncols; 2199 rptr[i+1] = rptr[i] + ncols; 2200 if (rownz_max < ncols) rownz_max = ncols; 2201 } 2202 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2203 } else { /* only copy matrix values into sbuf_a */ 2204 rptr = sbuf_j; 2205 vals = sbuf_a; 2206 rptr[0] = 0; 2207 for (i=0; i<rend-rstart; i++){ 2208 row = i + rstart; 2209 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2210 ncols = nzA + nzB; 2211 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2212 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2213 lwrite = 0; 2214 for (l=0; l<nzB; l++) { 2215 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2216 } 2217 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2218 for (l=0; l<nzB; l++) { 2219 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2220 } 2221 vals += ncols; 2222 rptr[i+1] = rptr[i] + ncols; 2223 } 2224 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2225 2226 /* send nzlocal to others, and recv other's nzlocal */ 2227 /*--------------------------------------------------*/ 2228 if (reuse == MAT_INITIAL_MATRIX){ 2229 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2230 s_waits2 = s_waits3 + nsends; 2231 s_waits1 = s_waits2 + nsends; 2232 r_waits1 = s_waits1 + nsends; 2233 r_waits2 = r_waits1 + nrecvs; 2234 r_waits3 = r_waits2 + nrecvs; 2235 } else { 2236 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2237 r_waits3 = s_waits3 + nsends; 2238 } 2239 2240 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2241 if (reuse == MAT_INITIAL_MATRIX){ 2242 /* get new tags to keep the communication clean */ 2243 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2244 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2245 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2246 2247 /* post receives of other's nzlocal */ 2248 for (i=0; i<nrecvs; i++){ 2249 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2250 } 2251 /* send nzlocal to others */ 2252 for (i=0; i<nsends; i++){ 2253 sbuf_nz[i] = nzlocal; 2254 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2255 } 2256 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2257 count = nrecvs; 2258 while (count) { 2259 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2260 recv_rank[imdex] = recv_status.MPI_SOURCE; 2261 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2262 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2263 2264 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2265 rbuf_nz[imdex] += i + 2; 2266 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2267 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2268 count--; 2269 } 2270 /* wait on sends of nzlocal */ 2271 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2272 /* send mat->i,j to others, and recv from other's */ 2273 /*------------------------------------------------*/ 2274 for (i=0; i<nsends; i++){ 2275 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2276 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2277 } 2278 /* wait on receives of mat->i,j */ 2279 /*------------------------------*/ 2280 count = nrecvs; 2281 while (count) { 2282 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2283 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2284 count--; 2285 } 2286 /* wait on sends of mat->i,j */ 2287 /*---------------------------*/ 2288 if (nsends) { 2289 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2290 } 2291 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2292 2293 /* post receives, send and receive mat->a */ 2294 /*----------------------------------------*/ 2295 for (imdex=0; imdex<nrecvs; imdex++) { 2296 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2297 } 2298 for (i=0; i<nsends; i++){ 2299 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2300 } 2301 count = nrecvs; 2302 while (count) { 2303 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2304 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2305 count--; 2306 } 2307 if (nsends) { 2308 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2309 } 2310 2311 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2312 2313 /* create redundant matrix */ 2314 /*-------------------------*/ 2315 if (reuse == MAT_INITIAL_MATRIX){ 2316 /* compute rownz_max for preallocation */ 2317 for (imdex=0; imdex<nrecvs; imdex++){ 2318 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2319 rptr = rbuf_j[imdex]; 2320 for (i=0; i<j; i++){ 2321 ncols = rptr[i+1] - rptr[i]; 2322 if (rownz_max < ncols) rownz_max = ncols; 2323 } 2324 } 2325 2326 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2327 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2328 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2329 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2330 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2331 } else { 2332 C = *matredundant; 2333 } 2334 2335 /* insert local matrix entries */ 2336 rptr = sbuf_j; 2337 cols = sbuf_j + rend-rstart + 1; 2338 vals = sbuf_a; 2339 for (i=0; i<rend-rstart; i++){ 2340 row = i + rstart; 2341 ncols = rptr[i+1] - rptr[i]; 2342 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2343 vals += ncols; 2344 cols += ncols; 2345 } 2346 /* insert received matrix entries */ 2347 for (imdex=0; imdex<nrecvs; imdex++){ 2348 rstart = rowrange[recv_rank[imdex]]; 2349 rend = rowrange[recv_rank[imdex]+1]; 2350 rptr = rbuf_j[imdex]; 2351 cols = rbuf_j[imdex] + rend-rstart + 1; 2352 vals = rbuf_a[imdex]; 2353 for (i=0; i<rend-rstart; i++){ 2354 row = i + rstart; 2355 ncols = rptr[i+1] - rptr[i]; 2356 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2357 vals += ncols; 2358 cols += ncols; 2359 } 2360 } 2361 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2362 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2363 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2364 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2365 if (reuse == MAT_INITIAL_MATRIX){ 2366 PetscContainer container; 2367 *matredundant = C; 2368 /* create a supporting struct and attach it to C for reuse */ 2369 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2370 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2371 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2372 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2373 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2374 2375 redund->nzlocal = nzlocal; 2376 redund->nsends = nsends; 2377 redund->nrecvs = nrecvs; 2378 redund->send_rank = send_rank; 2379 redund->recv_rank = recv_rank; 2380 redund->sbuf_nz = sbuf_nz; 2381 redund->rbuf_nz = rbuf_nz; 2382 redund->sbuf_j = sbuf_j; 2383 redund->sbuf_a = sbuf_a; 2384 redund->rbuf_j = rbuf_j; 2385 redund->rbuf_a = rbuf_a; 2386 2387 redund->MatDestroy = C->ops->destroy; 2388 C->ops->destroy = MatDestroy_MatRedundant; 2389 } 2390 PetscFunctionReturn(0); 2391 } 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2395 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2396 { 2397 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2398 PetscErrorCode ierr; 2399 PetscInt i,*idxb = 0; 2400 PetscScalar *va,*vb; 2401 Vec vtmp; 2402 2403 PetscFunctionBegin; 2404 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2405 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2406 if (idx) { 2407 for (i=0; i<A->rmap->n; i++) { 2408 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2409 } 2410 } 2411 2412 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2413 if (idx) { 2414 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2415 } 2416 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2417 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2418 2419 for (i=0; i<A->rmap->n; i++){ 2420 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2421 va[i] = vb[i]; 2422 if (idx) idx[i] = a->garray[idxb[i]]; 2423 } 2424 } 2425 2426 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2427 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2428 if (idxb) { 2429 ierr = PetscFree(idxb);CHKERRQ(ierr); 2430 } 2431 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2432 PetscFunctionReturn(0); 2433 } 2434 2435 #undef __FUNCT__ 2436 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2437 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2438 { 2439 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2440 PetscErrorCode ierr; 2441 PetscInt i,*idxb = 0; 2442 PetscScalar *va,*vb; 2443 Vec vtmp; 2444 2445 PetscFunctionBegin; 2446 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2447 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2448 if (idx) { 2449 for (i=0; i<A->cmap->n; i++) { 2450 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2451 } 2452 } 2453 2454 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2455 if (idx) { 2456 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2457 } 2458 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2459 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2460 2461 for (i=0; i<A->rmap->n; i++){ 2462 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2463 va[i] = vb[i]; 2464 if (idx) idx[i] = a->garray[idxb[i]]; 2465 } 2466 } 2467 2468 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2469 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2470 if (idxb) { 2471 ierr = PetscFree(idxb);CHKERRQ(ierr); 2472 } 2473 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 #undef __FUNCT__ 2478 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2479 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2480 { 2481 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2482 PetscInt n = A->rmap->n; 2483 PetscInt cstart = A->cmap->rstart; 2484 PetscInt *cmap = mat->garray; 2485 PetscInt *diagIdx, *offdiagIdx; 2486 Vec diagV, offdiagV; 2487 PetscScalar *a, *diagA, *offdiagA; 2488 PetscInt r; 2489 PetscErrorCode ierr; 2490 2491 PetscFunctionBegin; 2492 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2493 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2494 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2495 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2496 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2497 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2498 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2499 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2500 for(r = 0; r < n; ++r) { 2501 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2502 a[r] = diagA[r]; 2503 idx[r] = cstart + diagIdx[r]; 2504 } else { 2505 a[r] = offdiagA[r]; 2506 idx[r] = cmap[offdiagIdx[r]]; 2507 } 2508 } 2509 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2510 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2511 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2512 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2513 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2514 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2515 PetscFunctionReturn(0); 2516 } 2517 2518 #undef __FUNCT__ 2519 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2520 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2521 { 2522 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2523 PetscInt n = A->rmap->n; 2524 PetscInt cstart = A->cmap->rstart; 2525 PetscInt *cmap = mat->garray; 2526 PetscInt *diagIdx, *offdiagIdx; 2527 Vec diagV, offdiagV; 2528 PetscScalar *a, *diagA, *offdiagA; 2529 PetscInt r; 2530 PetscErrorCode ierr; 2531 2532 PetscFunctionBegin; 2533 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2534 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2535 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2536 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2537 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2538 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2539 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2540 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2541 for(r = 0; r < n; ++r) { 2542 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2543 a[r] = diagA[r]; 2544 idx[r] = cstart + diagIdx[r]; 2545 } else { 2546 a[r] = offdiagA[r]; 2547 idx[r] = cmap[offdiagIdx[r]]; 2548 } 2549 } 2550 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2551 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2552 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2553 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2554 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2555 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2556 PetscFunctionReturn(0); 2557 } 2558 2559 #undef __FUNCT__ 2560 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2561 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2562 { 2563 PetscErrorCode ierr; 2564 Mat *dummy; 2565 2566 PetscFunctionBegin; 2567 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2568 *newmat = *dummy; 2569 ierr = PetscFree(dummy);CHKERRQ(ierr); 2570 PetscFunctionReturn(0); 2571 } 2572 2573 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2574 /* -------------------------------------------------------------------*/ 2575 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2576 MatGetRow_MPIAIJ, 2577 MatRestoreRow_MPIAIJ, 2578 MatMult_MPIAIJ, 2579 /* 4*/ MatMultAdd_MPIAIJ, 2580 MatMultTranspose_MPIAIJ, 2581 MatMultTransposeAdd_MPIAIJ, 2582 #ifdef PETSC_HAVE_PBGL 2583 MatSolve_MPIAIJ, 2584 #else 2585 0, 2586 #endif 2587 0, 2588 0, 2589 /*10*/ 0, 2590 0, 2591 0, 2592 MatSOR_MPIAIJ, 2593 MatTranspose_MPIAIJ, 2594 /*15*/ MatGetInfo_MPIAIJ, 2595 MatEqual_MPIAIJ, 2596 MatGetDiagonal_MPIAIJ, 2597 MatDiagonalScale_MPIAIJ, 2598 MatNorm_MPIAIJ, 2599 /*20*/ MatAssemblyBegin_MPIAIJ, 2600 MatAssemblyEnd_MPIAIJ, 2601 MatSetOption_MPIAIJ, 2602 MatZeroEntries_MPIAIJ, 2603 /*24*/ MatZeroRows_MPIAIJ, 2604 0, 2605 #ifdef PETSC_HAVE_PBGL 2606 0, 2607 #else 2608 0, 2609 #endif 2610 0, 2611 0, 2612 /*29*/ MatSetUpPreallocation_MPIAIJ, 2613 #ifdef PETSC_HAVE_PBGL 2614 0, 2615 #else 2616 0, 2617 #endif 2618 0, 2619 0, 2620 0, 2621 /*34*/ MatDuplicate_MPIAIJ, 2622 0, 2623 0, 2624 0, 2625 0, 2626 /*39*/ MatAXPY_MPIAIJ, 2627 MatGetSubMatrices_MPIAIJ, 2628 MatIncreaseOverlap_MPIAIJ, 2629 MatGetValues_MPIAIJ, 2630 MatCopy_MPIAIJ, 2631 /*44*/ MatGetRowMax_MPIAIJ, 2632 MatScale_MPIAIJ, 2633 0, 2634 0, 2635 0, 2636 /*49*/ MatSetBlockSize_MPIAIJ, 2637 0, 2638 0, 2639 0, 2640 0, 2641 /*54*/ MatFDColoringCreate_MPIAIJ, 2642 0, 2643 MatSetUnfactored_MPIAIJ, 2644 MatPermute_MPIAIJ, 2645 0, 2646 /*59*/ MatGetSubMatrix_MPIAIJ, 2647 MatDestroy_MPIAIJ, 2648 MatView_MPIAIJ, 2649 0, 2650 0, 2651 /*64*/ 0, 2652 0, 2653 0, 2654 0, 2655 0, 2656 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2657 MatGetRowMinAbs_MPIAIJ, 2658 0, 2659 MatSetColoring_MPIAIJ, 2660 #if defined(PETSC_HAVE_ADIC) 2661 MatSetValuesAdic_MPIAIJ, 2662 #else 2663 0, 2664 #endif 2665 MatSetValuesAdifor_MPIAIJ, 2666 /*75*/ MatFDColoringApply_AIJ, 2667 0, 2668 0, 2669 0, 2670 0, 2671 /*80*/ 0, 2672 0, 2673 0, 2674 /*83*/ MatLoad_MPIAIJ, 2675 0, 2676 0, 2677 0, 2678 0, 2679 0, 2680 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2681 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2682 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2683 MatPtAP_Basic, 2684 MatPtAPSymbolic_MPIAIJ, 2685 /*94*/ MatPtAPNumeric_MPIAIJ, 2686 0, 2687 0, 2688 0, 2689 0, 2690 /*99*/ 0, 2691 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2692 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2693 MatConjugate_MPIAIJ, 2694 0, 2695 /*104*/MatSetValuesRow_MPIAIJ, 2696 MatRealPart_MPIAIJ, 2697 MatImaginaryPart_MPIAIJ, 2698 0, 2699 0, 2700 /*109*/0, 2701 MatGetRedundantMatrix_MPIAIJ, 2702 MatGetRowMin_MPIAIJ, 2703 0, 2704 0, 2705 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2706 0, 2707 0, 2708 0, 2709 0, 2710 0 2711 }; 2712 2713 /* ----------------------------------------------------------------------------------------*/ 2714 2715 EXTERN_C_BEGIN 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2718 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2719 { 2720 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2721 PetscErrorCode ierr; 2722 2723 PetscFunctionBegin; 2724 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2725 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2726 PetscFunctionReturn(0); 2727 } 2728 EXTERN_C_END 2729 2730 EXTERN_C_BEGIN 2731 #undef __FUNCT__ 2732 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2733 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2734 { 2735 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2736 PetscErrorCode ierr; 2737 2738 PetscFunctionBegin; 2739 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2740 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2741 PetscFunctionReturn(0); 2742 } 2743 EXTERN_C_END 2744 2745 #include "petscpc.h" 2746 EXTERN_C_BEGIN 2747 #undef __FUNCT__ 2748 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2749 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2750 { 2751 Mat_MPIAIJ *b; 2752 PetscErrorCode ierr; 2753 PetscInt i; 2754 2755 PetscFunctionBegin; 2756 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2757 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2758 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2759 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2760 2761 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2762 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2763 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2764 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2765 if (d_nnz) { 2766 for (i=0; i<B->rmap->n; i++) { 2767 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2768 } 2769 } 2770 if (o_nnz) { 2771 for (i=0; i<B->rmap->n; i++) { 2772 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2773 } 2774 } 2775 b = (Mat_MPIAIJ*)B->data; 2776 2777 if (!B->preallocated) { 2778 /* Explicitly create 2 MATSEQAIJ matrices. */ 2779 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2780 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2781 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2782 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2783 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2784 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2785 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2786 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2787 } 2788 2789 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2790 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2791 B->preallocated = PETSC_TRUE; 2792 PetscFunctionReturn(0); 2793 } 2794 EXTERN_C_END 2795 2796 #undef __FUNCT__ 2797 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2798 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2799 { 2800 Mat mat; 2801 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2802 PetscErrorCode ierr; 2803 2804 PetscFunctionBegin; 2805 *newmat = 0; 2806 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2807 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2808 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2809 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2810 a = (Mat_MPIAIJ*)mat->data; 2811 2812 mat->factortype = matin->factortype; 2813 mat->rmap->bs = matin->rmap->bs; 2814 mat->assembled = PETSC_TRUE; 2815 mat->insertmode = NOT_SET_VALUES; 2816 mat->preallocated = PETSC_TRUE; 2817 2818 a->size = oldmat->size; 2819 a->rank = oldmat->rank; 2820 a->donotstash = oldmat->donotstash; 2821 a->roworiented = oldmat->roworiented; 2822 a->rowindices = 0; 2823 a->rowvalues = 0; 2824 a->getrowactive = PETSC_FALSE; 2825 2826 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2827 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2828 2829 if (oldmat->colmap) { 2830 #if defined (PETSC_USE_CTABLE) 2831 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2832 #else 2833 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2834 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2835 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2836 #endif 2837 } else a->colmap = 0; 2838 if (oldmat->garray) { 2839 PetscInt len; 2840 len = oldmat->B->cmap->n; 2841 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2842 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2843 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2844 } else a->garray = 0; 2845 2846 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2847 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2848 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2849 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2850 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2851 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2852 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2853 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2854 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2855 *newmat = mat; 2856 PetscFunctionReturn(0); 2857 } 2858 2859 #undef __FUNCT__ 2860 #define __FUNCT__ "MatLoad_MPIAIJ" 2861 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2862 { 2863 Mat A; 2864 PetscScalar *vals,*svals; 2865 MPI_Comm comm = ((PetscObject)viewer)->comm; 2866 MPI_Status status; 2867 PetscErrorCode ierr; 2868 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2869 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2870 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2871 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2872 PetscInt cend,cstart,n,*rowners; 2873 int fd; 2874 2875 PetscFunctionBegin; 2876 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2877 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2878 if (!rank) { 2879 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2880 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2881 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2882 } 2883 2884 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2885 M = header[1]; N = header[2]; 2886 /* determine ownership of all rows */ 2887 m = M/size + ((M % size) > rank); 2888 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2889 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2890 2891 /* First process needs enough room for process with most rows */ 2892 if (!rank) { 2893 mmax = rowners[1]; 2894 for (i=2; i<size; i++) { 2895 mmax = PetscMax(mmax,rowners[i]); 2896 } 2897 } else mmax = m; 2898 2899 rowners[0] = 0; 2900 for (i=2; i<=size; i++) { 2901 rowners[i] += rowners[i-1]; 2902 } 2903 rstart = rowners[rank]; 2904 rend = rowners[rank+1]; 2905 2906 /* distribute row lengths to all processors */ 2907 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2908 if (!rank) { 2909 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2910 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2911 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2912 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2913 for (j=0; j<m; j++) { 2914 procsnz[0] += ourlens[j]; 2915 } 2916 for (i=1; i<size; i++) { 2917 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2918 /* calculate the number of nonzeros on each processor */ 2919 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2920 procsnz[i] += rowlengths[j]; 2921 } 2922 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2923 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2924 } 2925 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2926 } else { 2927 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2928 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2929 } 2930 2931 if (!rank) { 2932 /* determine max buffer needed and allocate it */ 2933 maxnz = 0; 2934 for (i=0; i<size; i++) { 2935 maxnz = PetscMax(maxnz,procsnz[i]); 2936 } 2937 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2938 2939 /* read in my part of the matrix column indices */ 2940 nz = procsnz[0]; 2941 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2942 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2943 2944 /* read in every one elses and ship off */ 2945 for (i=1; i<size; i++) { 2946 nz = procsnz[i]; 2947 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2948 mpicnt = PetscMPIIntCast(nz); 2949 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2950 } 2951 ierr = PetscFree(cols);CHKERRQ(ierr); 2952 } else { 2953 /* determine buffer space needed for message */ 2954 nz = 0; 2955 for (i=0; i<m; i++) { 2956 nz += ourlens[i]; 2957 } 2958 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2959 2960 /* receive message of column indices*/ 2961 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2962 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2963 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2964 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 2965 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 2966 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 2967 } 2968 2969 /* determine column ownership if matrix is not square */ 2970 if (N != M) { 2971 n = N/size + ((N % size) > rank); 2972 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2973 cstart = cend - n; 2974 } else { 2975 cstart = rstart; 2976 cend = rend; 2977 n = cend - cstart; 2978 } 2979 2980 /* loop over local rows, determining number of off diagonal entries */ 2981 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2982 jj = 0; 2983 for (i=0; i<m; i++) { 2984 for (j=0; j<ourlens[i]; j++) { 2985 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2986 jj++; 2987 } 2988 } 2989 2990 /* create our matrix */ 2991 for (i=0; i<m; i++) { 2992 ourlens[i] -= offlens[i]; 2993 } 2994 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2995 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2996 ierr = MatSetType(A,type);CHKERRQ(ierr); 2997 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2998 2999 for (i=0; i<m; i++) { 3000 ourlens[i] += offlens[i]; 3001 } 3002 3003 if (!rank) { 3004 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3005 3006 /* read in my part of the matrix numerical values */ 3007 nz = procsnz[0]; 3008 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3009 3010 /* insert into matrix */ 3011 jj = rstart; 3012 smycols = mycols; 3013 svals = vals; 3014 for (i=0; i<m; i++) { 3015 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3016 smycols += ourlens[i]; 3017 svals += ourlens[i]; 3018 jj++; 3019 } 3020 3021 /* read in other processors and ship out */ 3022 for (i=1; i<size; i++) { 3023 nz = procsnz[i]; 3024 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3025 mpicnt = PetscMPIIntCast(nz); 3026 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3027 } 3028 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3029 } else { 3030 /* receive numeric values */ 3031 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3032 3033 /* receive message of values*/ 3034 mpicnt = PetscMPIIntCast(nz); 3035 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3036 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3037 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 3038 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 3039 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 3040 3041 /* insert into matrix */ 3042 jj = rstart; 3043 smycols = mycols; 3044 svals = vals; 3045 for (i=0; i<m; i++) { 3046 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3047 smycols += ourlens[i]; 3048 svals += ourlens[i]; 3049 jj++; 3050 } 3051 } 3052 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3053 ierr = PetscFree(vals);CHKERRQ(ierr); 3054 ierr = PetscFree(mycols);CHKERRQ(ierr); 3055 ierr = PetscFree(rowners);CHKERRQ(ierr); 3056 3057 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3058 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3059 *newmat = A; 3060 PetscFunctionReturn(0); 3061 } 3062 3063 #undef __FUNCT__ 3064 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3065 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3066 { 3067 PetscErrorCode ierr; 3068 IS iscol_local; 3069 PetscInt csize; 3070 3071 PetscFunctionBegin; 3072 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3073 if (call == MAT_REUSE_MATRIX) { 3074 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3075 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3076 } else { 3077 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3078 } 3079 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3080 if (call == MAT_INITIAL_MATRIX) { 3081 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3082 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3083 } 3084 PetscFunctionReturn(0); 3085 } 3086 3087 #undef __FUNCT__ 3088 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3089 /* 3090 Not great since it makes two copies of the submatrix, first an SeqAIJ 3091 in local and then by concatenating the local matrices the end result. 3092 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3093 3094 Note: This requires a sequential iscol with all indices. 3095 */ 3096 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3097 { 3098 PetscErrorCode ierr; 3099 PetscMPIInt rank,size; 3100 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3101 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3102 Mat *local,M,Mreuse; 3103 MatScalar *vwork,*aa; 3104 MPI_Comm comm = ((PetscObject)mat)->comm; 3105 Mat_SeqAIJ *aij; 3106 3107 3108 PetscFunctionBegin; 3109 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3110 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3111 3112 if (call == MAT_REUSE_MATRIX) { 3113 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3114 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3115 local = &Mreuse; 3116 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3117 } else { 3118 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3119 Mreuse = *local; 3120 ierr = PetscFree(local);CHKERRQ(ierr); 3121 } 3122 3123 /* 3124 m - number of local rows 3125 n - number of columns (same on all processors) 3126 rstart - first row in new global matrix generated 3127 */ 3128 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3129 if (call == MAT_INITIAL_MATRIX) { 3130 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3131 ii = aij->i; 3132 jj = aij->j; 3133 3134 /* 3135 Determine the number of non-zeros in the diagonal and off-diagonal 3136 portions of the matrix in order to do correct preallocation 3137 */ 3138 3139 /* first get start and end of "diagonal" columns */ 3140 if (csize == PETSC_DECIDE) { 3141 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3142 if (mglobal == n) { /* square matrix */ 3143 nlocal = m; 3144 } else { 3145 nlocal = n/size + ((n % size) > rank); 3146 } 3147 } else { 3148 nlocal = csize; 3149 } 3150 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3151 rstart = rend - nlocal; 3152 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3153 3154 /* next, compute all the lengths */ 3155 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3156 olens = dlens + m; 3157 for (i=0; i<m; i++) { 3158 jend = ii[i+1] - ii[i]; 3159 olen = 0; 3160 dlen = 0; 3161 for (j=0; j<jend; j++) { 3162 if (*jj < rstart || *jj >= rend) olen++; 3163 else dlen++; 3164 jj++; 3165 } 3166 olens[i] = olen; 3167 dlens[i] = dlen; 3168 } 3169 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3170 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3171 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3172 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3173 ierr = PetscFree(dlens);CHKERRQ(ierr); 3174 } else { 3175 PetscInt ml,nl; 3176 3177 M = *newmat; 3178 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3179 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3180 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3181 /* 3182 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3183 rather than the slower MatSetValues(). 3184 */ 3185 M->was_assembled = PETSC_TRUE; 3186 M->assembled = PETSC_FALSE; 3187 } 3188 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3189 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3190 ii = aij->i; 3191 jj = aij->j; 3192 aa = aij->a; 3193 for (i=0; i<m; i++) { 3194 row = rstart + i; 3195 nz = ii[i+1] - ii[i]; 3196 cwork = jj; jj += nz; 3197 vwork = aa; aa += nz; 3198 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3199 } 3200 3201 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3202 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3203 *newmat = M; 3204 3205 /* save submatrix used in processor for next request */ 3206 if (call == MAT_INITIAL_MATRIX) { 3207 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3208 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3209 } 3210 3211 PetscFunctionReturn(0); 3212 } 3213 3214 EXTERN_C_BEGIN 3215 #undef __FUNCT__ 3216 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3217 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3218 { 3219 PetscInt m,cstart, cend,j,nnz,i,d; 3220 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3221 const PetscInt *JJ; 3222 PetscScalar *values; 3223 PetscErrorCode ierr; 3224 3225 PetscFunctionBegin; 3226 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3227 3228 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3229 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3230 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3231 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3232 m = B->rmap->n; 3233 cstart = B->cmap->rstart; 3234 cend = B->cmap->rend; 3235 rstart = B->rmap->rstart; 3236 3237 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3238 3239 #if defined(PETSC_USE_DEBUGGING) 3240 for (i=0; i<m; i++) { 3241 nnz = Ii[i+1]- Ii[i]; 3242 JJ = J + Ii[i]; 3243 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3244 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3245 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3246 } 3247 #endif 3248 3249 for (i=0; i<m; i++) { 3250 nnz = Ii[i+1]- Ii[i]; 3251 JJ = J + Ii[i]; 3252 nnz_max = PetscMax(nnz_max,nnz); 3253 d = 0; 3254 for (j=0; j<nnz; j++) { 3255 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3256 } 3257 d_nnz[i] = d; 3258 o_nnz[i] = nnz - d; 3259 } 3260 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3261 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3262 3263 if (v) values = (PetscScalar*)v; 3264 else { 3265 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3266 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3267 } 3268 3269 for (i=0; i<m; i++) { 3270 ii = i + rstart; 3271 nnz = Ii[i+1]- Ii[i]; 3272 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3273 } 3274 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3275 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3276 3277 if (!v) { 3278 ierr = PetscFree(values);CHKERRQ(ierr); 3279 } 3280 PetscFunctionReturn(0); 3281 } 3282 EXTERN_C_END 3283 3284 #undef __FUNCT__ 3285 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3286 /*@ 3287 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3288 (the default parallel PETSc format). 3289 3290 Collective on MPI_Comm 3291 3292 Input Parameters: 3293 + B - the matrix 3294 . i - the indices into j for the start of each local row (starts with zero) 3295 . j - the column indices for each local row (starts with zero) 3296 - v - optional values in the matrix 3297 3298 Level: developer 3299 3300 Notes: 3301 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3302 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3303 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3304 3305 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3306 3307 The format which is used for the sparse matrix input, is equivalent to a 3308 row-major ordering.. i.e for the following matrix, the input data expected is 3309 as shown: 3310 3311 1 0 0 3312 2 0 3 P0 3313 ------- 3314 4 5 6 P1 3315 3316 Process0 [P0]: rows_owned=[0,1] 3317 i = {0,1,3} [size = nrow+1 = 2+1] 3318 j = {0,0,2} [size = nz = 6] 3319 v = {1,2,3} [size = nz = 6] 3320 3321 Process1 [P1]: rows_owned=[2] 3322 i = {0,3} [size = nrow+1 = 1+1] 3323 j = {0,1,2} [size = nz = 6] 3324 v = {4,5,6} [size = nz = 6] 3325 3326 .keywords: matrix, aij, compressed row, sparse, parallel 3327 3328 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3329 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3330 @*/ 3331 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3332 { 3333 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3334 3335 PetscFunctionBegin; 3336 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3337 if (f) { 3338 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3339 } 3340 PetscFunctionReturn(0); 3341 } 3342 3343 #undef __FUNCT__ 3344 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3345 /*@C 3346 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3347 (the default parallel PETSc format). For good matrix assembly performance 3348 the user should preallocate the matrix storage by setting the parameters 3349 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3350 performance can be increased by more than a factor of 50. 3351 3352 Collective on MPI_Comm 3353 3354 Input Parameters: 3355 + A - the matrix 3356 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3357 (same value is used for all local rows) 3358 . d_nnz - array containing the number of nonzeros in the various rows of the 3359 DIAGONAL portion of the local submatrix (possibly different for each row) 3360 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3361 The size of this array is equal to the number of local rows, i.e 'm'. 3362 You must leave room for the diagonal entry even if it is zero. 3363 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3364 submatrix (same value is used for all local rows). 3365 - o_nnz - array containing the number of nonzeros in the various rows of the 3366 OFF-DIAGONAL portion of the local submatrix (possibly different for 3367 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3368 structure. The size of this array is equal to the number 3369 of local rows, i.e 'm'. 3370 3371 If the *_nnz parameter is given then the *_nz parameter is ignored 3372 3373 The AIJ format (also called the Yale sparse matrix format or 3374 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3375 storage. The stored row and column indices begin with zero. See the users manual for details. 3376 3377 The parallel matrix is partitioned such that the first m0 rows belong to 3378 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3379 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3380 3381 The DIAGONAL portion of the local submatrix of a processor can be defined 3382 as the submatrix which is obtained by extraction the part corresponding 3383 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3384 first row that belongs to the processor, and r2 is the last row belonging 3385 to the this processor. This is a square mxm matrix. The remaining portion 3386 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3387 3388 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3389 3390 You can call MatGetInfo() to get information on how effective the preallocation was; 3391 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3392 You can also run with the option -info and look for messages with the string 3393 malloc in them to see if additional memory allocation was needed. 3394 3395 Example usage: 3396 3397 Consider the following 8x8 matrix with 34 non-zero values, that is 3398 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3399 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3400 as follows: 3401 3402 .vb 3403 1 2 0 | 0 3 0 | 0 4 3404 Proc0 0 5 6 | 7 0 0 | 8 0 3405 9 0 10 | 11 0 0 | 12 0 3406 ------------------------------------- 3407 13 0 14 | 15 16 17 | 0 0 3408 Proc1 0 18 0 | 19 20 21 | 0 0 3409 0 0 0 | 22 23 0 | 24 0 3410 ------------------------------------- 3411 Proc2 25 26 27 | 0 0 28 | 29 0 3412 30 0 0 | 31 32 33 | 0 34 3413 .ve 3414 3415 This can be represented as a collection of submatrices as: 3416 3417 .vb 3418 A B C 3419 D E F 3420 G H I 3421 .ve 3422 3423 Where the submatrices A,B,C are owned by proc0, D,E,F are 3424 owned by proc1, G,H,I are owned by proc2. 3425 3426 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3427 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3428 The 'M','N' parameters are 8,8, and have the same values on all procs. 3429 3430 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3431 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3432 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3433 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3434 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3435 matrix, ans [DF] as another SeqAIJ matrix. 3436 3437 When d_nz, o_nz parameters are specified, d_nz storage elements are 3438 allocated for every row of the local diagonal submatrix, and o_nz 3439 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3440 One way to choose d_nz and o_nz is to use the max nonzerors per local 3441 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3442 In this case, the values of d_nz,o_nz are: 3443 .vb 3444 proc0 : dnz = 2, o_nz = 2 3445 proc1 : dnz = 3, o_nz = 2 3446 proc2 : dnz = 1, o_nz = 4 3447 .ve 3448 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3449 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3450 for proc3. i.e we are using 12+15+10=37 storage locations to store 3451 34 values. 3452 3453 When d_nnz, o_nnz parameters are specified, the storage is specified 3454 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3455 In the above case the values for d_nnz,o_nnz are: 3456 .vb 3457 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3458 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3459 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3460 .ve 3461 Here the space allocated is sum of all the above values i.e 34, and 3462 hence pre-allocation is perfect. 3463 3464 Level: intermediate 3465 3466 .keywords: matrix, aij, compressed row, sparse, parallel 3467 3468 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3469 MPIAIJ, MatGetInfo() 3470 @*/ 3471 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3472 { 3473 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3474 3475 PetscFunctionBegin; 3476 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3477 if (f) { 3478 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3479 } 3480 PetscFunctionReturn(0); 3481 } 3482 3483 #undef __FUNCT__ 3484 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3485 /*@ 3486 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3487 CSR format the local rows. 3488 3489 Collective on MPI_Comm 3490 3491 Input Parameters: 3492 + comm - MPI communicator 3493 . m - number of local rows (Cannot be PETSC_DECIDE) 3494 . n - This value should be the same as the local size used in creating the 3495 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3496 calculated if N is given) For square matrices n is almost always m. 3497 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3498 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3499 . i - row indices 3500 . j - column indices 3501 - a - matrix values 3502 3503 Output Parameter: 3504 . mat - the matrix 3505 3506 Level: intermediate 3507 3508 Notes: 3509 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3510 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3511 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3512 3513 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3514 3515 The format which is used for the sparse matrix input, is equivalent to a 3516 row-major ordering.. i.e for the following matrix, the input data expected is 3517 as shown: 3518 3519 1 0 0 3520 2 0 3 P0 3521 ------- 3522 4 5 6 P1 3523 3524 Process0 [P0]: rows_owned=[0,1] 3525 i = {0,1,3} [size = nrow+1 = 2+1] 3526 j = {0,0,2} [size = nz = 6] 3527 v = {1,2,3} [size = nz = 6] 3528 3529 Process1 [P1]: rows_owned=[2] 3530 i = {0,3} [size = nrow+1 = 1+1] 3531 j = {0,1,2} [size = nz = 6] 3532 v = {4,5,6} [size = nz = 6] 3533 3534 .keywords: matrix, aij, compressed row, sparse, parallel 3535 3536 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3537 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3538 @*/ 3539 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3540 { 3541 PetscErrorCode ierr; 3542 3543 PetscFunctionBegin; 3544 if (i[0]) { 3545 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3546 } 3547 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3548 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3549 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3550 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3551 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3552 PetscFunctionReturn(0); 3553 } 3554 3555 #undef __FUNCT__ 3556 #define __FUNCT__ "MatCreateMPIAIJ" 3557 /*@C 3558 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3559 (the default parallel PETSc format). For good matrix assembly performance 3560 the user should preallocate the matrix storage by setting the parameters 3561 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3562 performance can be increased by more than a factor of 50. 3563 3564 Collective on MPI_Comm 3565 3566 Input Parameters: 3567 + comm - MPI communicator 3568 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3569 This value should be the same as the local size used in creating the 3570 y vector for the matrix-vector product y = Ax. 3571 . n - This value should be the same as the local size used in creating the 3572 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3573 calculated if N is given) For square matrices n is almost always m. 3574 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3575 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3576 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3577 (same value is used for all local rows) 3578 . d_nnz - array containing the number of nonzeros in the various rows of the 3579 DIAGONAL portion of the local submatrix (possibly different for each row) 3580 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3581 The size of this array is equal to the number of local rows, i.e 'm'. 3582 You must leave room for the diagonal entry even if it is zero. 3583 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3584 submatrix (same value is used for all local rows). 3585 - o_nnz - array containing the number of nonzeros in the various rows of the 3586 OFF-DIAGONAL portion of the local submatrix (possibly different for 3587 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3588 structure. The size of this array is equal to the number 3589 of local rows, i.e 'm'. 3590 3591 Output Parameter: 3592 . A - the matrix 3593 3594 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3595 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3596 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3597 3598 Notes: 3599 If the *_nnz parameter is given then the *_nz parameter is ignored 3600 3601 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3602 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3603 storage requirements for this matrix. 3604 3605 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3606 processor than it must be used on all processors that share the object for 3607 that argument. 3608 3609 The user MUST specify either the local or global matrix dimensions 3610 (possibly both). 3611 3612 The parallel matrix is partitioned across processors such that the 3613 first m0 rows belong to process 0, the next m1 rows belong to 3614 process 1, the next m2 rows belong to process 2 etc.. where 3615 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3616 values corresponding to [m x N] submatrix. 3617 3618 The columns are logically partitioned with the n0 columns belonging 3619 to 0th partition, the next n1 columns belonging to the next 3620 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3621 3622 The DIAGONAL portion of the local submatrix on any given processor 3623 is the submatrix corresponding to the rows and columns m,n 3624 corresponding to the given processor. i.e diagonal matrix on 3625 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3626 etc. The remaining portion of the local submatrix [m x (N-n)] 3627 constitute the OFF-DIAGONAL portion. The example below better 3628 illustrates this concept. 3629 3630 For a square global matrix we define each processor's diagonal portion 3631 to be its local rows and the corresponding columns (a square submatrix); 3632 each processor's off-diagonal portion encompasses the remainder of the 3633 local matrix (a rectangular submatrix). 3634 3635 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3636 3637 When calling this routine with a single process communicator, a matrix of 3638 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3639 type of communicator, use the construction mechanism: 3640 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3641 3642 By default, this format uses inodes (identical nodes) when possible. 3643 We search for consecutive rows with the same nonzero structure, thereby 3644 reusing matrix information to achieve increased efficiency. 3645 3646 Options Database Keys: 3647 + -mat_no_inode - Do not use inodes 3648 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3649 - -mat_aij_oneindex - Internally use indexing starting at 1 3650 rather than 0. Note that when calling MatSetValues(), 3651 the user still MUST index entries starting at 0! 3652 3653 3654 Example usage: 3655 3656 Consider the following 8x8 matrix with 34 non-zero values, that is 3657 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3658 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3659 as follows: 3660 3661 .vb 3662 1 2 0 | 0 3 0 | 0 4 3663 Proc0 0 5 6 | 7 0 0 | 8 0 3664 9 0 10 | 11 0 0 | 12 0 3665 ------------------------------------- 3666 13 0 14 | 15 16 17 | 0 0 3667 Proc1 0 18 0 | 19 20 21 | 0 0 3668 0 0 0 | 22 23 0 | 24 0 3669 ------------------------------------- 3670 Proc2 25 26 27 | 0 0 28 | 29 0 3671 30 0 0 | 31 32 33 | 0 34 3672 .ve 3673 3674 This can be represented as a collection of submatrices as: 3675 3676 .vb 3677 A B C 3678 D E F 3679 G H I 3680 .ve 3681 3682 Where the submatrices A,B,C are owned by proc0, D,E,F are 3683 owned by proc1, G,H,I are owned by proc2. 3684 3685 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3686 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3687 The 'M','N' parameters are 8,8, and have the same values on all procs. 3688 3689 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3690 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3691 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3692 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3693 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3694 matrix, ans [DF] as another SeqAIJ matrix. 3695 3696 When d_nz, o_nz parameters are specified, d_nz storage elements are 3697 allocated for every row of the local diagonal submatrix, and o_nz 3698 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3699 One way to choose d_nz and o_nz is to use the max nonzerors per local 3700 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3701 In this case, the values of d_nz,o_nz are: 3702 .vb 3703 proc0 : dnz = 2, o_nz = 2 3704 proc1 : dnz = 3, o_nz = 2 3705 proc2 : dnz = 1, o_nz = 4 3706 .ve 3707 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3708 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3709 for proc3. i.e we are using 12+15+10=37 storage locations to store 3710 34 values. 3711 3712 When d_nnz, o_nnz parameters are specified, the storage is specified 3713 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3714 In the above case the values for d_nnz,o_nnz are: 3715 .vb 3716 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3717 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3718 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3719 .ve 3720 Here the space allocated is sum of all the above values i.e 34, and 3721 hence pre-allocation is perfect. 3722 3723 Level: intermediate 3724 3725 .keywords: matrix, aij, compressed row, sparse, parallel 3726 3727 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3728 MPIAIJ, MatCreateMPIAIJWithArrays() 3729 @*/ 3730 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3731 { 3732 PetscErrorCode ierr; 3733 PetscMPIInt size; 3734 3735 PetscFunctionBegin; 3736 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3737 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3738 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3739 if (size > 1) { 3740 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3741 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3742 } else { 3743 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3744 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3745 } 3746 PetscFunctionReturn(0); 3747 } 3748 3749 #undef __FUNCT__ 3750 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3751 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3752 { 3753 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3754 3755 PetscFunctionBegin; 3756 *Ad = a->A; 3757 *Ao = a->B; 3758 *colmap = a->garray; 3759 PetscFunctionReturn(0); 3760 } 3761 3762 #undef __FUNCT__ 3763 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3764 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3765 { 3766 PetscErrorCode ierr; 3767 PetscInt i; 3768 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3769 3770 PetscFunctionBegin; 3771 if (coloring->ctype == IS_COLORING_GLOBAL) { 3772 ISColoringValue *allcolors,*colors; 3773 ISColoring ocoloring; 3774 3775 /* set coloring for diagonal portion */ 3776 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3777 3778 /* set coloring for off-diagonal portion */ 3779 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3780 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3781 for (i=0; i<a->B->cmap->n; i++) { 3782 colors[i] = allcolors[a->garray[i]]; 3783 } 3784 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3785 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3786 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3787 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3788 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3789 ISColoringValue *colors; 3790 PetscInt *larray; 3791 ISColoring ocoloring; 3792 3793 /* set coloring for diagonal portion */ 3794 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3795 for (i=0; i<a->A->cmap->n; i++) { 3796 larray[i] = i + A->cmap->rstart; 3797 } 3798 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3799 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3800 for (i=0; i<a->A->cmap->n; i++) { 3801 colors[i] = coloring->colors[larray[i]]; 3802 } 3803 ierr = PetscFree(larray);CHKERRQ(ierr); 3804 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3805 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3806 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3807 3808 /* set coloring for off-diagonal portion */ 3809 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3810 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3811 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3812 for (i=0; i<a->B->cmap->n; i++) { 3813 colors[i] = coloring->colors[larray[i]]; 3814 } 3815 ierr = PetscFree(larray);CHKERRQ(ierr); 3816 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3817 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3818 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3819 } else { 3820 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3821 } 3822 3823 PetscFunctionReturn(0); 3824 } 3825 3826 #if defined(PETSC_HAVE_ADIC) 3827 #undef __FUNCT__ 3828 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3829 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3830 { 3831 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3832 PetscErrorCode ierr; 3833 3834 PetscFunctionBegin; 3835 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3836 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3837 PetscFunctionReturn(0); 3838 } 3839 #endif 3840 3841 #undef __FUNCT__ 3842 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3843 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3844 { 3845 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3846 PetscErrorCode ierr; 3847 3848 PetscFunctionBegin; 3849 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3850 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3851 PetscFunctionReturn(0); 3852 } 3853 3854 #undef __FUNCT__ 3855 #define __FUNCT__ "MatMerge" 3856 /*@ 3857 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3858 matrices from each processor 3859 3860 Collective on MPI_Comm 3861 3862 Input Parameters: 3863 + comm - the communicators the parallel matrix will live on 3864 . inmat - the input sequential matrices 3865 . n - number of local columns (or PETSC_DECIDE) 3866 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3867 3868 Output Parameter: 3869 . outmat - the parallel matrix generated 3870 3871 Level: advanced 3872 3873 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3874 3875 @*/ 3876 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3877 { 3878 PetscErrorCode ierr; 3879 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3880 PetscInt *indx; 3881 PetscScalar *values; 3882 3883 PetscFunctionBegin; 3884 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3885 if (scall == MAT_INITIAL_MATRIX){ 3886 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3887 if (n == PETSC_DECIDE){ 3888 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3889 } 3890 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3891 rstart -= m; 3892 3893 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3894 for (i=0;i<m;i++) { 3895 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3896 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3897 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3898 } 3899 /* This routine will ONLY return MPIAIJ type matrix */ 3900 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3901 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3902 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3903 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3904 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3905 3906 } else if (scall == MAT_REUSE_MATRIX){ 3907 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3908 } else { 3909 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3910 } 3911 3912 for (i=0;i<m;i++) { 3913 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3914 Ii = i + rstart; 3915 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3916 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3917 } 3918 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3919 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3920 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3921 3922 PetscFunctionReturn(0); 3923 } 3924 3925 #undef __FUNCT__ 3926 #define __FUNCT__ "MatFileSplit" 3927 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3928 { 3929 PetscErrorCode ierr; 3930 PetscMPIInt rank; 3931 PetscInt m,N,i,rstart,nnz; 3932 size_t len; 3933 const PetscInt *indx; 3934 PetscViewer out; 3935 char *name; 3936 Mat B; 3937 const PetscScalar *values; 3938 3939 PetscFunctionBegin; 3940 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3941 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3942 /* Should this be the type of the diagonal block of A? */ 3943 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3944 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3945 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3946 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3947 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3948 for (i=0;i<m;i++) { 3949 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3950 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3951 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3952 } 3953 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3954 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3955 3956 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3957 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3958 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3959 sprintf(name,"%s.%d",outfile,rank); 3960 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3961 ierr = PetscFree(name); 3962 ierr = MatView(B,out);CHKERRQ(ierr); 3963 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3964 ierr = MatDestroy(B);CHKERRQ(ierr); 3965 PetscFunctionReturn(0); 3966 } 3967 3968 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3969 #undef __FUNCT__ 3970 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3971 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3972 { 3973 PetscErrorCode ierr; 3974 Mat_Merge_SeqsToMPI *merge; 3975 PetscContainer container; 3976 3977 PetscFunctionBegin; 3978 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3979 if (container) { 3980 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3981 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3982 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3983 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3984 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3985 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3986 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3987 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3988 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3989 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3990 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3991 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3992 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3993 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 3994 3995 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3996 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3997 } 3998 ierr = PetscFree(merge);CHKERRQ(ierr); 3999 4000 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4001 PetscFunctionReturn(0); 4002 } 4003 4004 #include "../src/mat/utils/freespace.h" 4005 #include "petscbt.h" 4006 4007 #undef __FUNCT__ 4008 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4009 /*@C 4010 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4011 matrices from each processor 4012 4013 Collective on MPI_Comm 4014 4015 Input Parameters: 4016 + comm - the communicators the parallel matrix will live on 4017 . seqmat - the input sequential matrices 4018 . m - number of local rows (or PETSC_DECIDE) 4019 . n - number of local columns (or PETSC_DECIDE) 4020 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4021 4022 Output Parameter: 4023 . mpimat - the parallel matrix generated 4024 4025 Level: advanced 4026 4027 Notes: 4028 The dimensions of the sequential matrix in each processor MUST be the same. 4029 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4030 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4031 @*/ 4032 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4033 { 4034 PetscErrorCode ierr; 4035 MPI_Comm comm=((PetscObject)mpimat)->comm; 4036 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4037 PetscMPIInt size,rank,taga,*len_s; 4038 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4039 PetscInt proc,m; 4040 PetscInt **buf_ri,**buf_rj; 4041 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4042 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4043 MPI_Request *s_waits,*r_waits; 4044 MPI_Status *status; 4045 MatScalar *aa=a->a; 4046 MatScalar **abuf_r,*ba_i; 4047 Mat_Merge_SeqsToMPI *merge; 4048 PetscContainer container; 4049 4050 PetscFunctionBegin; 4051 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4052 4053 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4054 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4055 4056 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4057 if (container) { 4058 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4059 } 4060 bi = merge->bi; 4061 bj = merge->bj; 4062 buf_ri = merge->buf_ri; 4063 buf_rj = merge->buf_rj; 4064 4065 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4066 owners = merge->rowmap->range; 4067 len_s = merge->len_s; 4068 4069 /* send and recv matrix values */ 4070 /*-----------------------------*/ 4071 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4072 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4073 4074 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4075 for (proc=0,k=0; proc<size; proc++){ 4076 if (!len_s[proc]) continue; 4077 i = owners[proc]; 4078 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4079 k++; 4080 } 4081 4082 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4083 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4084 ierr = PetscFree(status);CHKERRQ(ierr); 4085 4086 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4087 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4088 4089 /* insert mat values of mpimat */ 4090 /*----------------------------*/ 4091 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4092 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4093 4094 for (k=0; k<merge->nrecv; k++){ 4095 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4096 nrows = *(buf_ri_k[k]); 4097 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4098 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4099 } 4100 4101 /* set values of ba */ 4102 m = merge->rowmap->n; 4103 for (i=0; i<m; i++) { 4104 arow = owners[rank] + i; 4105 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4106 bnzi = bi[i+1] - bi[i]; 4107 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4108 4109 /* add local non-zero vals of this proc's seqmat into ba */ 4110 anzi = ai[arow+1] - ai[arow]; 4111 aj = a->j + ai[arow]; 4112 aa = a->a + ai[arow]; 4113 nextaj = 0; 4114 for (j=0; nextaj<anzi; j++){ 4115 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4116 ba_i[j] += aa[nextaj++]; 4117 } 4118 } 4119 4120 /* add received vals into ba */ 4121 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4122 /* i-th row */ 4123 if (i == *nextrow[k]) { 4124 anzi = *(nextai[k]+1) - *nextai[k]; 4125 aj = buf_rj[k] + *(nextai[k]); 4126 aa = abuf_r[k] + *(nextai[k]); 4127 nextaj = 0; 4128 for (j=0; nextaj<anzi; j++){ 4129 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4130 ba_i[j] += aa[nextaj++]; 4131 } 4132 } 4133 nextrow[k]++; nextai[k]++; 4134 } 4135 } 4136 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4137 } 4138 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4139 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4140 4141 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4142 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4143 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4144 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4145 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4146 PetscFunctionReturn(0); 4147 } 4148 4149 #undef __FUNCT__ 4150 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4151 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4152 { 4153 PetscErrorCode ierr; 4154 Mat B_mpi; 4155 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4156 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4157 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4158 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4159 PetscInt len,proc,*dnz,*onz; 4160 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4161 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4162 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4163 MPI_Status *status; 4164 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4165 PetscBT lnkbt; 4166 Mat_Merge_SeqsToMPI *merge; 4167 PetscContainer container; 4168 4169 PetscFunctionBegin; 4170 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4171 4172 /* make sure it is a PETSc comm */ 4173 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4175 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4176 4177 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4178 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4179 4180 /* determine row ownership */ 4181 /*---------------------------------------------------------*/ 4182 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4183 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4184 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4185 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4186 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4187 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4188 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4189 4190 m = merge->rowmap->n; 4191 M = merge->rowmap->N; 4192 owners = merge->rowmap->range; 4193 4194 /* determine the number of messages to send, their lengths */ 4195 /*---------------------------------------------------------*/ 4196 len_s = merge->len_s; 4197 4198 len = 0; /* length of buf_si[] */ 4199 merge->nsend = 0; 4200 for (proc=0; proc<size; proc++){ 4201 len_si[proc] = 0; 4202 if (proc == rank){ 4203 len_s[proc] = 0; 4204 } else { 4205 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4206 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4207 } 4208 if (len_s[proc]) { 4209 merge->nsend++; 4210 nrows = 0; 4211 for (i=owners[proc]; i<owners[proc+1]; i++){ 4212 if (ai[i+1] > ai[i]) nrows++; 4213 } 4214 len_si[proc] = 2*(nrows+1); 4215 len += len_si[proc]; 4216 } 4217 } 4218 4219 /* determine the number and length of messages to receive for ij-structure */ 4220 /*-------------------------------------------------------------------------*/ 4221 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4222 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4223 4224 /* post the Irecv of j-structure */ 4225 /*-------------------------------*/ 4226 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4227 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4228 4229 /* post the Isend of j-structure */ 4230 /*--------------------------------*/ 4231 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4232 4233 for (proc=0, k=0; proc<size; proc++){ 4234 if (!len_s[proc]) continue; 4235 i = owners[proc]; 4236 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4237 k++; 4238 } 4239 4240 /* receives and sends of j-structure are complete */ 4241 /*------------------------------------------------*/ 4242 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4243 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4244 4245 /* send and recv i-structure */ 4246 /*---------------------------*/ 4247 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4248 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4249 4250 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4251 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4252 for (proc=0,k=0; proc<size; proc++){ 4253 if (!len_s[proc]) continue; 4254 /* form outgoing message for i-structure: 4255 buf_si[0]: nrows to be sent 4256 [1:nrows]: row index (global) 4257 [nrows+1:2*nrows+1]: i-structure index 4258 */ 4259 /*-------------------------------------------*/ 4260 nrows = len_si[proc]/2 - 1; 4261 buf_si_i = buf_si + nrows+1; 4262 buf_si[0] = nrows; 4263 buf_si_i[0] = 0; 4264 nrows = 0; 4265 for (i=owners[proc]; i<owners[proc+1]; i++){ 4266 anzi = ai[i+1] - ai[i]; 4267 if (anzi) { 4268 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4269 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4270 nrows++; 4271 } 4272 } 4273 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4274 k++; 4275 buf_si += len_si[proc]; 4276 } 4277 4278 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4279 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4280 4281 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4282 for (i=0; i<merge->nrecv; i++){ 4283 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4284 } 4285 4286 ierr = PetscFree(len_si);CHKERRQ(ierr); 4287 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4288 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4289 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4290 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4291 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4292 ierr = PetscFree(status);CHKERRQ(ierr); 4293 4294 /* compute a local seq matrix in each processor */ 4295 /*----------------------------------------------*/ 4296 /* allocate bi array and free space for accumulating nonzero column info */ 4297 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4298 bi[0] = 0; 4299 4300 /* create and initialize a linked list */ 4301 nlnk = N+1; 4302 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4303 4304 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4305 len = 0; 4306 len = ai[owners[rank+1]] - ai[owners[rank]]; 4307 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4308 current_space = free_space; 4309 4310 /* determine symbolic info for each local row */ 4311 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4312 4313 for (k=0; k<merge->nrecv; k++){ 4314 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4315 nrows = *buf_ri_k[k]; 4316 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4317 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4318 } 4319 4320 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4321 len = 0; 4322 for (i=0;i<m;i++) { 4323 bnzi = 0; 4324 /* add local non-zero cols of this proc's seqmat into lnk */ 4325 arow = owners[rank] + i; 4326 anzi = ai[arow+1] - ai[arow]; 4327 aj = a->j + ai[arow]; 4328 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4329 bnzi += nlnk; 4330 /* add received col data into lnk */ 4331 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4332 if (i == *nextrow[k]) { /* i-th row */ 4333 anzi = *(nextai[k]+1) - *nextai[k]; 4334 aj = buf_rj[k] + *nextai[k]; 4335 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4336 bnzi += nlnk; 4337 nextrow[k]++; nextai[k]++; 4338 } 4339 } 4340 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4341 4342 /* if free space is not available, make more free space */ 4343 if (current_space->local_remaining<bnzi) { 4344 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4345 nspacedouble++; 4346 } 4347 /* copy data into free space, then initialize lnk */ 4348 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4349 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4350 4351 current_space->array += bnzi; 4352 current_space->local_used += bnzi; 4353 current_space->local_remaining -= bnzi; 4354 4355 bi[i+1] = bi[i] + bnzi; 4356 } 4357 4358 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4359 4360 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4361 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4362 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4363 4364 /* create symbolic parallel matrix B_mpi */ 4365 /*---------------------------------------*/ 4366 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4367 if (n==PETSC_DECIDE) { 4368 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4369 } else { 4370 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4371 } 4372 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4373 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4374 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4375 4376 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4377 B_mpi->assembled = PETSC_FALSE; 4378 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4379 merge->bi = bi; 4380 merge->bj = bj; 4381 merge->buf_ri = buf_ri; 4382 merge->buf_rj = buf_rj; 4383 merge->coi = PETSC_NULL; 4384 merge->coj = PETSC_NULL; 4385 merge->owners_co = PETSC_NULL; 4386 4387 /* attach the supporting struct to B_mpi for reuse */ 4388 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4389 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4390 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4391 *mpimat = B_mpi; 4392 4393 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4394 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4395 PetscFunctionReturn(0); 4396 } 4397 4398 #undef __FUNCT__ 4399 #define __FUNCT__ "MatMerge_SeqsToMPI" 4400 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4401 { 4402 PetscErrorCode ierr; 4403 4404 PetscFunctionBegin; 4405 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4406 if (scall == MAT_INITIAL_MATRIX){ 4407 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4408 } 4409 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4410 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4411 PetscFunctionReturn(0); 4412 } 4413 4414 #undef __FUNCT__ 4415 #define __FUNCT__ "MatGetLocalMat" 4416 /*@ 4417 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4418 4419 Not Collective 4420 4421 Input Parameters: 4422 + A - the matrix 4423 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4424 4425 Output Parameter: 4426 . A_loc - the local sequential matrix generated 4427 4428 Level: developer 4429 4430 @*/ 4431 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4432 { 4433 PetscErrorCode ierr; 4434 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4435 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4436 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4437 MatScalar *aa=a->a,*ba=b->a,*cam; 4438 PetscScalar *ca; 4439 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4440 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4441 4442 PetscFunctionBegin; 4443 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4444 if (scall == MAT_INITIAL_MATRIX){ 4445 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4446 ci[0] = 0; 4447 for (i=0; i<am; i++){ 4448 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4449 } 4450 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4451 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4452 k = 0; 4453 for (i=0; i<am; i++) { 4454 ncols_o = bi[i+1] - bi[i]; 4455 ncols_d = ai[i+1] - ai[i]; 4456 /* off-diagonal portion of A */ 4457 for (jo=0; jo<ncols_o; jo++) { 4458 col = cmap[*bj]; 4459 if (col >= cstart) break; 4460 cj[k] = col; bj++; 4461 ca[k++] = *ba++; 4462 } 4463 /* diagonal portion of A */ 4464 for (j=0; j<ncols_d; j++) { 4465 cj[k] = cstart + *aj++; 4466 ca[k++] = *aa++; 4467 } 4468 /* off-diagonal portion of A */ 4469 for (j=jo; j<ncols_o; j++) { 4470 cj[k] = cmap[*bj++]; 4471 ca[k++] = *ba++; 4472 } 4473 } 4474 /* put together the new matrix */ 4475 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4476 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4477 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4478 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4479 mat->free_a = PETSC_TRUE; 4480 mat->free_ij = PETSC_TRUE; 4481 mat->nonew = 0; 4482 } else if (scall == MAT_REUSE_MATRIX){ 4483 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4484 ci = mat->i; cj = mat->j; cam = mat->a; 4485 for (i=0; i<am; i++) { 4486 /* off-diagonal portion of A */ 4487 ncols_o = bi[i+1] - bi[i]; 4488 for (jo=0; jo<ncols_o; jo++) { 4489 col = cmap[*bj]; 4490 if (col >= cstart) break; 4491 *cam++ = *ba++; bj++; 4492 } 4493 /* diagonal portion of A */ 4494 ncols_d = ai[i+1] - ai[i]; 4495 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4496 /* off-diagonal portion of A */ 4497 for (j=jo; j<ncols_o; j++) { 4498 *cam++ = *ba++; bj++; 4499 } 4500 } 4501 } else { 4502 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4503 } 4504 4505 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4506 PetscFunctionReturn(0); 4507 } 4508 4509 #undef __FUNCT__ 4510 #define __FUNCT__ "MatGetLocalMatCondensed" 4511 /*@C 4512 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4513 4514 Not Collective 4515 4516 Input Parameters: 4517 + A - the matrix 4518 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4519 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4520 4521 Output Parameter: 4522 . A_loc - the local sequential matrix generated 4523 4524 Level: developer 4525 4526 @*/ 4527 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4528 { 4529 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4530 PetscErrorCode ierr; 4531 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4532 IS isrowa,iscola; 4533 Mat *aloc; 4534 4535 PetscFunctionBegin; 4536 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4537 if (!row){ 4538 start = A->rmap->rstart; end = A->rmap->rend; 4539 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4540 } else { 4541 isrowa = *row; 4542 } 4543 if (!col){ 4544 start = A->cmap->rstart; 4545 cmap = a->garray; 4546 nzA = a->A->cmap->n; 4547 nzB = a->B->cmap->n; 4548 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4549 ncols = 0; 4550 for (i=0; i<nzB; i++) { 4551 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4552 else break; 4553 } 4554 imark = i; 4555 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4556 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4557 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4558 ierr = PetscFree(idx);CHKERRQ(ierr); 4559 } else { 4560 iscola = *col; 4561 } 4562 if (scall != MAT_INITIAL_MATRIX){ 4563 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4564 aloc[0] = *A_loc; 4565 } 4566 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4567 *A_loc = aloc[0]; 4568 ierr = PetscFree(aloc);CHKERRQ(ierr); 4569 if (!row){ 4570 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4571 } 4572 if (!col){ 4573 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4574 } 4575 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4576 PetscFunctionReturn(0); 4577 } 4578 4579 #undef __FUNCT__ 4580 #define __FUNCT__ "MatGetBrowsOfAcols" 4581 /*@C 4582 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4583 4584 Collective on Mat 4585 4586 Input Parameters: 4587 + A,B - the matrices in mpiaij format 4588 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4589 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4590 4591 Output Parameter: 4592 + rowb, colb - index sets of rows and columns of B to extract 4593 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4594 - B_seq - the sequential matrix generated 4595 4596 Level: developer 4597 4598 @*/ 4599 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4600 { 4601 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4602 PetscErrorCode ierr; 4603 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4604 IS isrowb,iscolb; 4605 Mat *bseq; 4606 4607 PetscFunctionBegin; 4608 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4609 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4610 } 4611 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4612 4613 if (scall == MAT_INITIAL_MATRIX){ 4614 start = A->cmap->rstart; 4615 cmap = a->garray; 4616 nzA = a->A->cmap->n; 4617 nzB = a->B->cmap->n; 4618 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4619 ncols = 0; 4620 for (i=0; i<nzB; i++) { /* row < local row index */ 4621 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4622 else break; 4623 } 4624 imark = i; 4625 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4626 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4627 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4628 ierr = PetscFree(idx);CHKERRQ(ierr); 4629 *brstart = imark; 4630 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4631 } else { 4632 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4633 isrowb = *rowb; iscolb = *colb; 4634 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4635 bseq[0] = *B_seq; 4636 } 4637 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4638 *B_seq = bseq[0]; 4639 ierr = PetscFree(bseq);CHKERRQ(ierr); 4640 if (!rowb){ 4641 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4642 } else { 4643 *rowb = isrowb; 4644 } 4645 if (!colb){ 4646 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4647 } else { 4648 *colb = iscolb; 4649 } 4650 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4651 PetscFunctionReturn(0); 4652 } 4653 4654 #undef __FUNCT__ 4655 #define __FUNCT__ "MatGetBrowsOfAoCols" 4656 /*@C 4657 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4658 of the OFF-DIAGONAL portion of local A 4659 4660 Collective on Mat 4661 4662 Input Parameters: 4663 + A,B - the matrices in mpiaij format 4664 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4665 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4666 . startsj_r - similar to startsj for receives 4667 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4668 4669 Output Parameter: 4670 + B_oth - the sequential matrix generated 4671 4672 Level: developer 4673 4674 @*/ 4675 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4676 { 4677 VecScatter_MPI_General *gen_to,*gen_from; 4678 PetscErrorCode ierr; 4679 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4680 Mat_SeqAIJ *b_oth; 4681 VecScatter ctx=a->Mvctx; 4682 MPI_Comm comm=((PetscObject)ctx)->comm; 4683 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4684 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4685 PetscScalar *rvalues,*svalues; 4686 MatScalar *b_otha,*bufa,*bufA; 4687 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4688 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4689 MPI_Status *sstatus,rstatus; 4690 PetscMPIInt jj; 4691 PetscInt *cols,sbs,rbs; 4692 PetscScalar *vals; 4693 4694 PetscFunctionBegin; 4695 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4696 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4697 } 4698 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4699 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4700 4701 gen_to = (VecScatter_MPI_General*)ctx->todata; 4702 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4703 rvalues = gen_from->values; /* holds the length of receiving row */ 4704 svalues = gen_to->values; /* holds the length of sending row */ 4705 nrecvs = gen_from->n; 4706 nsends = gen_to->n; 4707 4708 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4709 srow = gen_to->indices; /* local row index to be sent */ 4710 sstarts = gen_to->starts; 4711 sprocs = gen_to->procs; 4712 sstatus = gen_to->sstatus; 4713 sbs = gen_to->bs; 4714 rstarts = gen_from->starts; 4715 rprocs = gen_from->procs; 4716 rbs = gen_from->bs; 4717 4718 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4719 if (scall == MAT_INITIAL_MATRIX){ 4720 /* i-array */ 4721 /*---------*/ 4722 /* post receives */ 4723 for (i=0; i<nrecvs; i++){ 4724 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4725 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4726 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4727 } 4728 4729 /* pack the outgoing message */ 4730 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4731 sstartsj[0] = 0; rstartsj[0] = 0; 4732 len = 0; /* total length of j or a array to be sent */ 4733 k = 0; 4734 for (i=0; i<nsends; i++){ 4735 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4736 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4737 for (j=0; j<nrows; j++) { 4738 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4739 for (l=0; l<sbs; l++){ 4740 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4741 rowlen[j*sbs+l] = ncols; 4742 len += ncols; 4743 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4744 } 4745 k++; 4746 } 4747 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4748 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4749 } 4750 /* recvs and sends of i-array are completed */ 4751 i = nrecvs; 4752 while (i--) { 4753 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4754 } 4755 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4756 4757 /* allocate buffers for sending j and a arrays */ 4758 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4759 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4760 4761 /* create i-array of B_oth */ 4762 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4763 b_othi[0] = 0; 4764 len = 0; /* total length of j or a array to be received */ 4765 k = 0; 4766 for (i=0; i<nrecvs; i++){ 4767 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4768 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4769 for (j=0; j<nrows; j++) { 4770 b_othi[k+1] = b_othi[k] + rowlen[j]; 4771 len += rowlen[j]; k++; 4772 } 4773 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4774 } 4775 4776 /* allocate space for j and a arrrays of B_oth */ 4777 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4778 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4779 4780 /* j-array */ 4781 /*---------*/ 4782 /* post receives of j-array */ 4783 for (i=0; i<nrecvs; i++){ 4784 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4785 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4786 } 4787 4788 /* pack the outgoing message j-array */ 4789 k = 0; 4790 for (i=0; i<nsends; i++){ 4791 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4792 bufJ = bufj+sstartsj[i]; 4793 for (j=0; j<nrows; j++) { 4794 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4795 for (ll=0; ll<sbs; ll++){ 4796 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4797 for (l=0; l<ncols; l++){ 4798 *bufJ++ = cols[l]; 4799 } 4800 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4801 } 4802 } 4803 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4804 } 4805 4806 /* recvs and sends of j-array are completed */ 4807 i = nrecvs; 4808 while (i--) { 4809 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4810 } 4811 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4812 } else if (scall == MAT_REUSE_MATRIX){ 4813 sstartsj = *startsj; 4814 rstartsj = *startsj_r; 4815 bufa = *bufa_ptr; 4816 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4817 b_otha = b_oth->a; 4818 } else { 4819 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4820 } 4821 4822 /* a-array */ 4823 /*---------*/ 4824 /* post receives of a-array */ 4825 for (i=0; i<nrecvs; i++){ 4826 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4827 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4828 } 4829 4830 /* pack the outgoing message a-array */ 4831 k = 0; 4832 for (i=0; i<nsends; i++){ 4833 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4834 bufA = bufa+sstartsj[i]; 4835 for (j=0; j<nrows; j++) { 4836 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4837 for (ll=0; ll<sbs; ll++){ 4838 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4839 for (l=0; l<ncols; l++){ 4840 *bufA++ = vals[l]; 4841 } 4842 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4843 } 4844 } 4845 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4846 } 4847 /* recvs and sends of a-array are completed */ 4848 i = nrecvs; 4849 while (i--) { 4850 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4851 } 4852 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4853 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4854 4855 if (scall == MAT_INITIAL_MATRIX){ 4856 /* put together the new matrix */ 4857 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4858 4859 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4860 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4861 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4862 b_oth->free_a = PETSC_TRUE; 4863 b_oth->free_ij = PETSC_TRUE; 4864 b_oth->nonew = 0; 4865 4866 ierr = PetscFree(bufj);CHKERRQ(ierr); 4867 if (!startsj || !bufa_ptr){ 4868 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4869 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4870 } else { 4871 *startsj = sstartsj; 4872 *startsj_r = rstartsj; 4873 *bufa_ptr = bufa; 4874 } 4875 } 4876 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4877 PetscFunctionReturn(0); 4878 } 4879 4880 #undef __FUNCT__ 4881 #define __FUNCT__ "MatGetCommunicationStructs" 4882 /*@C 4883 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4884 4885 Not Collective 4886 4887 Input Parameters: 4888 . A - The matrix in mpiaij format 4889 4890 Output Parameter: 4891 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4892 . colmap - A map from global column index to local index into lvec 4893 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4894 4895 Level: developer 4896 4897 @*/ 4898 #if defined (PETSC_USE_CTABLE) 4899 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4900 #else 4901 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4902 #endif 4903 { 4904 Mat_MPIAIJ *a; 4905 4906 PetscFunctionBegin; 4907 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4908 PetscValidPointer(lvec, 2) 4909 PetscValidPointer(colmap, 3) 4910 PetscValidPointer(multScatter, 4) 4911 a = (Mat_MPIAIJ *) A->data; 4912 if (lvec) *lvec = a->lvec; 4913 if (colmap) *colmap = a->colmap; 4914 if (multScatter) *multScatter = a->Mvctx; 4915 PetscFunctionReturn(0); 4916 } 4917 4918 EXTERN_C_BEGIN 4919 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4920 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4921 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4922 EXTERN_C_END 4923 4924 #undef __FUNCT__ 4925 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4926 /* 4927 Computes (B'*A')' since computing B*A directly is untenable 4928 4929 n p p 4930 ( ) ( ) ( ) 4931 m ( A ) * n ( B ) = m ( C ) 4932 ( ) ( ) ( ) 4933 4934 */ 4935 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4936 { 4937 PetscErrorCode ierr; 4938 Mat At,Bt,Ct; 4939 4940 PetscFunctionBegin; 4941 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4942 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4943 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4944 ierr = MatDestroy(At);CHKERRQ(ierr); 4945 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4946 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4947 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4948 PetscFunctionReturn(0); 4949 } 4950 4951 #undef __FUNCT__ 4952 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4953 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4954 { 4955 PetscErrorCode ierr; 4956 PetscInt m=A->rmap->n,n=B->cmap->n; 4957 Mat Cmat; 4958 4959 PetscFunctionBegin; 4960 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4961 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4962 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4963 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4964 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4965 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4966 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4967 *C = Cmat; 4968 PetscFunctionReturn(0); 4969 } 4970 4971 /* ----------------------------------------------------------------*/ 4972 #undef __FUNCT__ 4973 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4974 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4975 { 4976 PetscErrorCode ierr; 4977 4978 PetscFunctionBegin; 4979 if (scall == MAT_INITIAL_MATRIX){ 4980 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4981 } 4982 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4983 PetscFunctionReturn(0); 4984 } 4985 4986 EXTERN_C_BEGIN 4987 #if defined(PETSC_HAVE_MUMPS) 4988 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4989 #endif 4990 #if defined(PETSC_HAVE_PASTIX) 4991 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 4992 #endif 4993 #if defined(PETSC_HAVE_SUPERLU_DIST) 4994 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4995 #endif 4996 #if defined(PETSC_HAVE_SPOOLES) 4997 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 4998 #endif 4999 EXTERN_C_END 5000 5001 /*MC 5002 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5003 5004 Options Database Keys: 5005 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5006 5007 Level: beginner 5008 5009 .seealso: MatCreateMPIAIJ() 5010 M*/ 5011 5012 EXTERN_C_BEGIN 5013 #undef __FUNCT__ 5014 #define __FUNCT__ "MatCreate_MPIAIJ" 5015 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5016 { 5017 Mat_MPIAIJ *b; 5018 PetscErrorCode ierr; 5019 PetscMPIInt size; 5020 5021 PetscFunctionBegin; 5022 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5023 5024 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5025 B->data = (void*)b; 5026 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5027 B->rmap->bs = 1; 5028 B->assembled = PETSC_FALSE; 5029 B->mapping = 0; 5030 5031 B->insertmode = NOT_SET_VALUES; 5032 b->size = size; 5033 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5034 5035 /* build cache for off array entries formed */ 5036 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5037 b->donotstash = PETSC_FALSE; 5038 b->colmap = 0; 5039 b->garray = 0; 5040 b->roworiented = PETSC_TRUE; 5041 5042 /* stuff used for matrix vector multiply */ 5043 b->lvec = PETSC_NULL; 5044 b->Mvctx = PETSC_NULL; 5045 5046 /* stuff for MatGetRow() */ 5047 b->rowindices = 0; 5048 b->rowvalues = 0; 5049 b->getrowactive = PETSC_FALSE; 5050 5051 #if defined(PETSC_HAVE_SPOOLES) 5052 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5053 "MatGetFactor_mpiaij_spooles", 5054 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5055 #endif 5056 #if defined(PETSC_HAVE_MUMPS) 5057 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5058 "MatGetFactor_mpiaij_mumps", 5059 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5060 #endif 5061 #if defined(PETSC_HAVE_PASTIX) 5062 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5063 "MatGetFactor_mpiaij_pastix", 5064 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5065 #endif 5066 #if defined(PETSC_HAVE_SUPERLU_DIST) 5067 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5068 "MatGetFactor_mpiaij_superlu_dist", 5069 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5070 #endif 5071 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5072 "MatStoreValues_MPIAIJ", 5073 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5074 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5075 "MatRetrieveValues_MPIAIJ", 5076 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5077 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5078 "MatGetDiagonalBlock_MPIAIJ", 5079 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5080 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5081 "MatIsTranspose_MPIAIJ", 5082 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5083 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5084 "MatMPIAIJSetPreallocation_MPIAIJ", 5085 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5086 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5087 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5088 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5089 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5090 "MatDiagonalScaleLocal_MPIAIJ", 5091 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5092 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5093 "MatConvert_MPIAIJ_MPICSRPERM", 5094 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5095 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5096 "MatConvert_MPIAIJ_MPICRL", 5097 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5098 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5099 "MatConvert_MPIAIJ_MPISBAIJ", 5100 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5101 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5102 "MatMatMult_MPIDense_MPIAIJ", 5103 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5104 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5105 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5106 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5108 "MatMatMultNumeric_MPIDense_MPIAIJ", 5109 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5110 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5111 PetscFunctionReturn(0); 5112 } 5113 EXTERN_C_END 5114 5115 #undef __FUNCT__ 5116 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5117 /*@ 5118 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5119 and "off-diagonal" part of the matrix in CSR format. 5120 5121 Collective on MPI_Comm 5122 5123 Input Parameters: 5124 + comm - MPI communicator 5125 . m - number of local rows (Cannot be PETSC_DECIDE) 5126 . n - This value should be the same as the local size used in creating the 5127 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5128 calculated if N is given) For square matrices n is almost always m. 5129 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5130 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5131 . i - row indices for "diagonal" portion of matrix 5132 . j - column indices 5133 . a - matrix values 5134 . oi - row indices for "off-diagonal" portion of matrix 5135 . oj - column indices 5136 - oa - matrix values 5137 5138 Output Parameter: 5139 . mat - the matrix 5140 5141 Level: advanced 5142 5143 Notes: 5144 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5145 5146 The i and j indices are 0 based 5147 5148 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5149 5150 This sets local rows and cannot be used to set off-processor values. 5151 5152 You cannot later use MatSetValues() to change values in this matrix. 5153 5154 .keywords: matrix, aij, compressed row, sparse, parallel 5155 5156 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5157 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5158 @*/ 5159 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5160 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5161 { 5162 PetscErrorCode ierr; 5163 Mat_MPIAIJ *maij; 5164 5165 PetscFunctionBegin; 5166 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5167 if (i[0]) { 5168 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5169 } 5170 if (oi[0]) { 5171 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5172 } 5173 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5174 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5175 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5176 maij = (Mat_MPIAIJ*) (*mat)->data; 5177 maij->donotstash = PETSC_TRUE; 5178 (*mat)->preallocated = PETSC_TRUE; 5179 5180 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5181 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5182 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5183 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5184 5185 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5186 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5187 5188 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5189 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5190 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5191 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5192 5193 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5194 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5195 PetscFunctionReturn(0); 5196 } 5197 5198 /* 5199 Special version for direct calls from Fortran 5200 */ 5201 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5202 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5203 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5204 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5205 #endif 5206 5207 /* Change these macros so can be used in void function */ 5208 #undef CHKERRQ 5209 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5210 #undef SETERRQ2 5211 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5212 #undef SETERRQ 5213 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5214 5215 EXTERN_C_BEGIN 5216 #undef __FUNCT__ 5217 #define __FUNCT__ "matsetvaluesmpiaij_" 5218 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5219 { 5220 Mat mat = *mmat; 5221 PetscInt m = *mm, n = *mn; 5222 InsertMode addv = *maddv; 5223 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5224 PetscScalar value; 5225 PetscErrorCode ierr; 5226 5227 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5228 if (mat->insertmode == NOT_SET_VALUES) { 5229 mat->insertmode = addv; 5230 } 5231 #if defined(PETSC_USE_DEBUG) 5232 else if (mat->insertmode != addv) { 5233 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5234 } 5235 #endif 5236 { 5237 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5238 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5239 PetscTruth roworiented = aij->roworiented; 5240 5241 /* Some Variables required in the macro */ 5242 Mat A = aij->A; 5243 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5244 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5245 MatScalar *aa = a->a; 5246 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5247 Mat B = aij->B; 5248 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5249 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5250 MatScalar *ba = b->a; 5251 5252 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5253 PetscInt nonew = a->nonew; 5254 MatScalar *ap1,*ap2; 5255 5256 PetscFunctionBegin; 5257 for (i=0; i<m; i++) { 5258 if (im[i] < 0) continue; 5259 #if defined(PETSC_USE_DEBUG) 5260 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5261 #endif 5262 if (im[i] >= rstart && im[i] < rend) { 5263 row = im[i] - rstart; 5264 lastcol1 = -1; 5265 rp1 = aj + ai[row]; 5266 ap1 = aa + ai[row]; 5267 rmax1 = aimax[row]; 5268 nrow1 = ailen[row]; 5269 low1 = 0; 5270 high1 = nrow1; 5271 lastcol2 = -1; 5272 rp2 = bj + bi[row]; 5273 ap2 = ba + bi[row]; 5274 rmax2 = bimax[row]; 5275 nrow2 = bilen[row]; 5276 low2 = 0; 5277 high2 = nrow2; 5278 5279 for (j=0; j<n; j++) { 5280 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5281 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5282 if (in[j] >= cstart && in[j] < cend){ 5283 col = in[j] - cstart; 5284 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5285 } else if (in[j] < 0) continue; 5286 #if defined(PETSC_USE_DEBUG) 5287 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5288 #endif 5289 else { 5290 if (mat->was_assembled) { 5291 if (!aij->colmap) { 5292 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5293 } 5294 #if defined (PETSC_USE_CTABLE) 5295 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5296 col--; 5297 #else 5298 col = aij->colmap[in[j]] - 1; 5299 #endif 5300 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5301 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5302 col = in[j]; 5303 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5304 B = aij->B; 5305 b = (Mat_SeqAIJ*)B->data; 5306 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5307 rp2 = bj + bi[row]; 5308 ap2 = ba + bi[row]; 5309 rmax2 = bimax[row]; 5310 nrow2 = bilen[row]; 5311 low2 = 0; 5312 high2 = nrow2; 5313 bm = aij->B->rmap->n; 5314 ba = b->a; 5315 } 5316 } else col = in[j]; 5317 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5318 } 5319 } 5320 } else { 5321 if (!aij->donotstash) { 5322 if (roworiented) { 5323 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5324 } else { 5325 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5326 } 5327 } 5328 } 5329 }} 5330 PetscFunctionReturnVoid(); 5331 } 5332 EXTERN_C_END 5333 5334