1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscTruth roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscTruth ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 396 } else { 397 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatGetValues_MPIAIJ" 407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413 414 PetscFunctionBegin; 415 for (i=0; i<m; i++) { 416 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418 if (idxm[i] >= rstart && idxm[i] < rend) { 419 row = idxm[i] - rstart; 420 for (j=0; j<n; j++) { 421 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423 if (idxn[j] >= cstart && idxn[j] < cend){ 424 col = idxn[j] - cstart; 425 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426 } else { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 #if defined (PETSC_USE_CTABLE) 431 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432 col --; 433 #else 434 col = aij->colmap[idxn[j]] - 1; 435 #endif 436 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437 else { 438 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439 } 440 } 441 } 442 } else { 443 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscErrorCode ierr; 457 PetscInt nstash,reallocs; 458 InsertMode addv; 459 460 PetscFunctionBegin; 461 if (aij->donotstash) { 462 PetscFunctionReturn(0); 463 } 464 465 /* make sure all processors are either in INSERTMODE or ADDMODE */ 466 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 468 mat->insertmode = addv; /* in case this processor had no cache */ 469 470 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 471 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 472 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 479 { 480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 481 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 482 PetscErrorCode ierr; 483 PetscMPIInt n; 484 PetscInt i,j,rstart,ncols,flg; 485 PetscInt *row,*col; 486 PetscTruth other_disassembled; 487 PetscScalar *val; 488 InsertMode addv = mat->insertmode; 489 490 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 491 PetscFunctionBegin; 492 if (!aij->donotstash) { 493 while (1) { 494 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 495 if (!flg) break; 496 497 for (i=0; i<n;) { 498 /* Now identify the consecutive vals belonging to the same row */ 499 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 500 if (j < n) ncols = j-i; 501 else ncols = n-i; 502 /* Now assemble all these values with a single function call */ 503 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 504 i = j; 505 } 506 } 507 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 508 } 509 a->compressedrow.use = PETSC_FALSE; 510 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 511 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 512 513 /* determine if any processor has disassembled, if so we must 514 also disassemble ourselfs, in order that we may reassemble. */ 515 /* 516 if nonzero structure of submatrix B cannot change then we know that 517 no processor disassembled thus we can skip this stuff 518 */ 519 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 520 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 521 if (mat->was_assembled && !other_disassembled) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 } 524 } 525 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 526 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 527 } 528 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 529 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 530 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 532 533 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 534 aij->rowvalues = 0; 535 536 /* used by MatAXPY() */ 537 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 538 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 539 540 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 541 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 548 { 549 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 550 PetscErrorCode ierr; 551 552 PetscFunctionBegin; 553 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 554 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatZeroRows_MPIAIJ" 560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 561 { 562 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 563 PetscErrorCode ierr; 564 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 565 PetscInt i,*owners = A->rmap->range; 566 PetscInt *nprocs,j,idx,nsends,row; 567 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 568 PetscInt *rvalues,count,base,slen,*source; 569 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 570 MPI_Comm comm = ((PetscObject)A)->comm; 571 MPI_Request *send_waits,*recv_waits; 572 MPI_Status recv_status,*send_status; 573 #if defined(PETSC_DEBUG) 574 PetscTruth found = PETSC_FALSE; 575 #endif 576 577 PetscFunctionBegin; 578 /* first count number of contributors to each processor */ 579 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 580 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 581 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 582 j = 0; 583 for (i=0; i<N; i++) { 584 if (lastidx > (idx = rows[i])) j = 0; 585 lastidx = idx; 586 for (; j<size; j++) { 587 if (idx >= owners[j] && idx < owners[j+1]) { 588 nprocs[2*j]++; 589 nprocs[2*j+1] = 1; 590 owner[i] = j; 591 #if defined(PETSC_DEBUG) 592 found = PETSC_TRUE; 593 #endif 594 break; 595 } 596 } 597 #if defined(PETSC_DEBUG) 598 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 599 found = PETSC_FALSE; 600 #endif 601 } 602 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 603 604 /* inform other processors of number of messages and max length*/ 605 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 606 607 /* post receives: */ 608 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 609 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 610 for (i=0; i<nrecvs; i++) { 611 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 612 } 613 614 /* do sends: 615 1) starts[i] gives the starting index in svalues for stuff going to 616 the ith processor 617 */ 618 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 619 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 620 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 621 starts[0] = 0; 622 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 623 for (i=0; i<N; i++) { 624 svalues[starts[owner[i]]++] = rows[i]; 625 } 626 627 starts[0] = 0; 628 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 629 count = 0; 630 for (i=0; i<size; i++) { 631 if (nprocs[2*i+1]) { 632 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 633 } 634 } 635 ierr = PetscFree(starts);CHKERRQ(ierr); 636 637 base = owners[rank]; 638 639 /* wait on receives */ 640 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 641 count = nrecvs; slen = 0; 642 while (count) { 643 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 644 /* unpack receives into our local space */ 645 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 646 source[imdex] = recv_status.MPI_SOURCE; 647 lens[imdex] = n; 648 slen += n; 649 count--; 650 } 651 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 652 653 /* move the data into the send scatter */ 654 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 655 count = 0; 656 for (i=0; i<nrecvs; i++) { 657 values = rvalues + i*nmax; 658 for (j=0; j<lens[i]; j++) { 659 lrows[count++] = values[j] - base; 660 } 661 } 662 ierr = PetscFree(rvalues);CHKERRQ(ierr); 663 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 664 ierr = PetscFree(owner);CHKERRQ(ierr); 665 ierr = PetscFree(nprocs);CHKERRQ(ierr); 666 667 /* actually zap the local rows */ 668 /* 669 Zero the required rows. If the "diagonal block" of the matrix 670 is square and the user wishes to set the diagonal we use separate 671 code so that MatSetValues() is not called for each diagonal allocating 672 new memory, thus calling lots of mallocs and slowing things down. 673 674 */ 675 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 676 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 677 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 678 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 679 } else if (diag != 0.0) { 680 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 681 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 682 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 684 } 685 for (i = 0; i < slen; i++) { 686 row = lrows[i] + rstart; 687 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 688 } 689 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 690 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 691 } else { 692 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 693 } 694 ierr = PetscFree(lrows);CHKERRQ(ierr); 695 696 /* wait on sends */ 697 if (nsends) { 698 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 699 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 700 ierr = PetscFree(send_status);CHKERRQ(ierr); 701 } 702 ierr = PetscFree(send_waits);CHKERRQ(ierr); 703 ierr = PetscFree(svalues);CHKERRQ(ierr); 704 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatMult_MPIAIJ" 710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 711 { 712 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 713 PetscErrorCode ierr; 714 PetscInt nt; 715 716 PetscFunctionBegin; 717 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 718 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 719 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 720 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 721 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 722 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 729 { 730 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 731 PetscErrorCode ierr; 732 733 PetscFunctionBegin; 734 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatMultAdd_MPIAIJ" 740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 741 { 742 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 743 PetscErrorCode ierr; 744 745 PetscFunctionBegin; 746 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 747 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 748 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 749 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 756 { 757 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 758 PetscErrorCode ierr; 759 PetscTruth merged; 760 761 PetscFunctionBegin; 762 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 763 /* do nondiagonal part */ 764 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 765 if (!merged) { 766 /* send it on its way */ 767 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 768 /* do local part */ 769 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 770 /* receive remote parts: note this assumes the values are not actually */ 771 /* added in yy until the next line, */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } else { 774 /* do local part */ 775 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 776 /* send it on its way */ 777 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 /* values actually were received in the Begin() but we need to call this nop */ 779 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 EXTERN_C_BEGIN 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 788 { 789 MPI_Comm comm; 790 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 791 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 792 IS Me,Notme; 793 PetscErrorCode ierr; 794 PetscInt M,N,first,last,*notme,i; 795 PetscMPIInt size; 796 797 PetscFunctionBegin; 798 799 /* Easy test: symmetric diagonal block */ 800 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 801 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 802 if (!*f) PetscFunctionReturn(0); 803 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 804 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 805 if (size == 1) PetscFunctionReturn(0); 806 807 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 808 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 809 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 810 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 811 for (i=0; i<first; i++) notme[i] = i; 812 for (i=last; i<M; i++) notme[i-last+first] = i; 813 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 814 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 815 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 816 Aoff = Aoffs[0]; 817 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 818 Boff = Boffs[0]; 819 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 820 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 821 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 822 ierr = ISDestroy(Me);CHKERRQ(ierr); 823 ierr = ISDestroy(Notme);CHKERRQ(ierr); 824 825 PetscFunctionReturn(0); 826 } 827 EXTERN_C_END 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 832 { 833 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 /* do nondiagonal part */ 838 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 839 /* send it on its way */ 840 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 841 /* do local part */ 842 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 843 /* receive remote parts */ 844 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 /* 849 This only works correctly for square matrices where the subblock A->A is the 850 diagonal block 851 */ 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 855 { 856 PetscErrorCode ierr; 857 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 858 859 PetscFunctionBegin; 860 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 861 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 862 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatScale_MPIAIJ" 868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 869 { 870 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 871 PetscErrorCode ierr; 872 873 PetscFunctionBegin; 874 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 875 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 #undef __FUNCT__ 880 #define __FUNCT__ "MatDestroy_MPIAIJ" 881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 882 { 883 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 #if defined(PETSC_USE_LOG) 888 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 889 #endif 890 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 891 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 892 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 893 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 894 #if defined (PETSC_USE_CTABLE) 895 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 896 #else 897 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 898 #endif 899 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 900 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 901 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 902 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 903 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 904 ierr = PetscFree(aij);CHKERRQ(ierr); 905 906 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNCT__ 919 #define __FUNCT__ "MatView_MPIAIJ_Binary" 920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 921 { 922 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 923 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 924 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 925 PetscErrorCode ierr; 926 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 927 int fd; 928 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 929 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 930 PetscScalar *column_values; 931 932 PetscFunctionBegin; 933 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 934 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 935 nz = A->nz + B->nz; 936 if (!rank) { 937 header[0] = MAT_FILE_CLASSID; 938 header[1] = mat->rmap->N; 939 header[2] = mat->cmap->N; 940 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 941 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 942 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 943 /* get largest number of rows any processor has */ 944 rlen = mat->rmap->n; 945 range = mat->rmap->range; 946 for (i=1; i<size; i++) { 947 rlen = PetscMax(rlen,range[i+1] - range[i]); 948 } 949 } else { 950 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 951 rlen = mat->rmap->n; 952 } 953 954 /* load up the local row counts */ 955 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 956 for (i=0; i<mat->rmap->n; i++) { 957 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 958 } 959 960 /* store the row lengths to the file */ 961 if (!rank) { 962 MPI_Status status; 963 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 964 for (i=1; i<size; i++) { 965 rlen = range[i+1] - range[i]; 966 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 967 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 } 969 } else { 970 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 971 } 972 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 973 974 /* load up the local column indices */ 975 nzmax = nz; /* )th processor needs space a largest processor needs */ 976 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 977 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 978 cnt = 0; 979 for (i=0; i<mat->rmap->n; i++) { 980 for (j=B->i[i]; j<B->i[i+1]; j++) { 981 if ( (col = garray[B->j[j]]) > cstart) break; 982 column_indices[cnt++] = col; 983 } 984 for (k=A->i[i]; k<A->i[i+1]; k++) { 985 column_indices[cnt++] = A->j[k] + cstart; 986 } 987 for (; j<B->i[i+1]; j++) { 988 column_indices[cnt++] = garray[B->j[j]]; 989 } 990 } 991 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 992 993 /* store the column indices to the file */ 994 if (!rank) { 995 MPI_Status status; 996 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 997 for (i=1; i<size; i++) { 998 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 999 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1000 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1001 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1002 } 1003 } else { 1004 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1005 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1008 1009 /* load up the local column values */ 1010 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1011 cnt = 0; 1012 for (i=0; i<mat->rmap->n; i++) { 1013 for (j=B->i[i]; j<B->i[i+1]; j++) { 1014 if ( garray[B->j[j]] > cstart) break; 1015 column_values[cnt++] = B->a[j]; 1016 } 1017 for (k=A->i[i]; k<A->i[i+1]; k++) { 1018 column_values[cnt++] = A->a[k]; 1019 } 1020 for (; j<B->i[i+1]; j++) { 1021 column_values[cnt++] = B->a[j]; 1022 } 1023 } 1024 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1025 1026 /* store the column values to the file */ 1027 if (!rank) { 1028 MPI_Status status; 1029 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1030 for (i=1; i<size; i++) { 1031 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1032 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1033 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1034 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1035 } 1036 } else { 1037 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1038 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1039 } 1040 ierr = PetscFree(column_values);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1047 { 1048 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1049 PetscErrorCode ierr; 1050 PetscMPIInt rank = aij->rank,size = aij->size; 1051 PetscTruth isdraw,iascii,isbinary; 1052 PetscViewer sviewer; 1053 PetscViewerFormat format; 1054 1055 PetscFunctionBegin; 1056 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1057 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1058 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1059 if (iascii) { 1060 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1061 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1062 MatInfo info; 1063 PetscTruth inodes; 1064 1065 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1066 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1067 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1068 if (!inodes) { 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1070 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1071 } else { 1072 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1073 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1074 } 1075 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1077 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1079 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1081 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1084 PetscInt inodecount,inodelimit,*inodes; 1085 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1086 if (inodes) { 1087 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1088 } else { 1089 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1090 } 1091 PetscFunctionReturn(0); 1092 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1093 PetscFunctionReturn(0); 1094 } 1095 } else if (isbinary) { 1096 if (size == 1) { 1097 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1098 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1099 } else { 1100 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1101 } 1102 PetscFunctionReturn(0); 1103 } else if (isdraw) { 1104 PetscDraw draw; 1105 PetscTruth isnull; 1106 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1107 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1108 } 1109 1110 if (size == 1) { 1111 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1112 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1113 } else { 1114 /* assemble the entire matrix onto first processor. */ 1115 Mat A; 1116 Mat_SeqAIJ *Aloc; 1117 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1118 MatScalar *a; 1119 1120 if (mat->rmap->N > 1024) { 1121 PetscTruth flg = PETSC_FALSE; 1122 1123 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1124 if (!flg) { 1125 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1126 } 1127 } 1128 1129 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1130 if (!rank) { 1131 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1132 } else { 1133 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1134 } 1135 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1136 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1137 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1138 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1139 1140 /* copy over the A part */ 1141 Aloc = (Mat_SeqAIJ*)aij->A->data; 1142 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1143 row = mat->rmap->rstart; 1144 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1148 } 1149 aj = Aloc->j; 1150 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1151 1152 /* copy over the B part */ 1153 Aloc = (Mat_SeqAIJ*)aij->B->data; 1154 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1155 row = mat->rmap->rstart; 1156 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1157 ct = cols; 1158 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1159 for (i=0; i<m; i++) { 1160 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1161 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1162 } 1163 ierr = PetscFree(ct);CHKERRQ(ierr); 1164 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1165 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1166 /* 1167 Everyone has to call to draw the matrix since the graphics waits are 1168 synchronized across all processors that share the PetscDraw object 1169 */ 1170 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1171 if (!rank) { 1172 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1173 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1174 } 1175 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1176 ierr = MatDestroy(A);CHKERRQ(ierr); 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatView_MPIAIJ" 1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1184 { 1185 PetscErrorCode ierr; 1186 PetscTruth iascii,isdraw,issocket,isbinary; 1187 1188 PetscFunctionBegin; 1189 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1190 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1191 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1192 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1193 if (iascii || isdraw || isbinary || issocket) { 1194 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1195 } else { 1196 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatSOR_MPIAIJ" 1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1204 { 1205 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1206 PetscErrorCode ierr; 1207 Vec bb1 = 0; 1208 PetscTruth hasop; 1209 1210 PetscFunctionBegin; 1211 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1212 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1213 } 1214 1215 if (flag == SOR_APPLY_UPPER) { 1216 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1221 if (flag & SOR_ZERO_INITIAL_GUESS) { 1222 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1223 its--; 1224 } 1225 1226 while (its--) { 1227 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 1230 /* update rhs: bb1 = bb - B*x */ 1231 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1232 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1233 1234 /* local sweep */ 1235 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1236 } 1237 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1238 if (flag & SOR_ZERO_INITIAL_GUESS) { 1239 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1240 its--; 1241 } 1242 while (its--) { 1243 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 1246 /* update rhs: bb1 = bb - B*x */ 1247 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1248 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1249 1250 /* local sweep */ 1251 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1252 } 1253 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1254 if (flag & SOR_ZERO_INITIAL_GUESS) { 1255 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1256 its--; 1257 } 1258 while (its--) { 1259 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 1262 /* update rhs: bb1 = bb - B*x */ 1263 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1264 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1265 1266 /* local sweep */ 1267 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1268 } 1269 } else if (flag & SOR_EISENSTAT) { 1270 Vec xx1; 1271 1272 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1273 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1274 1275 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 if (!mat->diag) { 1278 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1279 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1280 } 1281 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1282 if (hasop) { 1283 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1284 } else { 1285 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1286 } 1287 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1288 1289 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1290 1291 /* local sweep */ 1292 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1293 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1294 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1295 } else { 1296 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1297 } 1298 1299 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatPermute_MPIAIJ" 1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1306 { 1307 MPI_Comm comm,pcomm; 1308 PetscInt first,local_size,nrows; 1309 const PetscInt *rows; 1310 PetscMPIInt size; 1311 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1316 /* make a collective version of 'rowp' */ 1317 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1318 if (pcomm==comm) { 1319 crowp = rowp; 1320 } else { 1321 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1322 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1323 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1324 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1325 } 1326 /* collect the global row permutation and invert it */ 1327 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1328 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1329 if (pcomm!=comm) { 1330 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1331 } 1332 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1333 /* get the local target indices */ 1334 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1335 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1336 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1337 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1338 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1339 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1340 /* the column permutation is so much easier; 1341 make a local version of 'colp' and invert it */ 1342 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1343 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1344 if (size==1) { 1345 lcolp = colp; 1346 } else { 1347 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1348 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1349 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1350 } 1351 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1352 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1353 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1354 if (size>1) { 1355 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1357 } 1358 /* now we just get the submatrix */ 1359 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1360 /* clean up */ 1361 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1362 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1369 { 1370 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1371 Mat A = mat->A,B = mat->B; 1372 PetscErrorCode ierr; 1373 PetscReal isend[5],irecv[5]; 1374 1375 PetscFunctionBegin; 1376 info->block_size = 1.0; 1377 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1378 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1379 isend[3] = info->memory; isend[4] = info->mallocs; 1380 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1381 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1382 isend[3] += info->memory; isend[4] += info->mallocs; 1383 if (flag == MAT_LOCAL) { 1384 info->nz_used = isend[0]; 1385 info->nz_allocated = isend[1]; 1386 info->nz_unneeded = isend[2]; 1387 info->memory = isend[3]; 1388 info->mallocs = isend[4]; 1389 } else if (flag == MAT_GLOBAL_MAX) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else if (flag == MAT_GLOBAL_SUM) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } 1404 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1405 info->fill_ratio_needed = 0; 1406 info->factor_mallocs = 0; 1407 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatSetOption_MPIAIJ" 1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1414 { 1415 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 switch (op) { 1420 case MAT_NEW_NONZERO_LOCATIONS: 1421 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1422 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1423 case MAT_KEEP_NONZERO_PATTERN: 1424 case MAT_NEW_NONZERO_LOCATION_ERR: 1425 case MAT_USE_INODES: 1426 case MAT_IGNORE_ZERO_ENTRIES: 1427 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1428 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1429 break; 1430 case MAT_ROW_ORIENTED: 1431 a->roworiented = flg; 1432 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1433 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1434 break; 1435 case MAT_NEW_DIAGONALS: 1436 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1437 break; 1438 case MAT_IGNORE_OFF_PROC_ENTRIES: 1439 a->donotstash = PETSC_TRUE; 1440 break; 1441 case MAT_SPD: 1442 A->spd_set = PETSC_TRUE; 1443 A->spd = flg; 1444 if (flg) { 1445 A->symmetric = PETSC_TRUE; 1446 A->structurally_symmetric = PETSC_TRUE; 1447 A->symmetric_set = PETSC_TRUE; 1448 A->structurally_symmetric_set = PETSC_TRUE; 1449 } 1450 break; 1451 case MAT_SYMMETRIC: 1452 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1453 break; 1454 case MAT_STRUCTURALLY_SYMMETRIC: 1455 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1456 break; 1457 case MAT_HERMITIAN: 1458 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1459 break; 1460 case MAT_SYMMETRY_ETERNAL: 1461 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1462 break; 1463 default: 1464 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1465 } 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatGetRow_MPIAIJ" 1471 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1472 { 1473 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1474 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1475 PetscErrorCode ierr; 1476 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1477 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1478 PetscInt *cmap,*idx_p; 1479 1480 PetscFunctionBegin; 1481 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1482 mat->getrowactive = PETSC_TRUE; 1483 1484 if (!mat->rowvalues && (idx || v)) { 1485 /* 1486 allocate enough space to hold information from the longest row. 1487 */ 1488 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1489 PetscInt max = 1,tmp; 1490 for (i=0; i<matin->rmap->n; i++) { 1491 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1492 if (max < tmp) { max = tmp; } 1493 } 1494 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1495 } 1496 1497 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1498 lrow = row - rstart; 1499 1500 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1501 if (!v) {pvA = 0; pvB = 0;} 1502 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1503 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1504 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1505 nztot = nzA + nzB; 1506 1507 cmap = mat->garray; 1508 if (v || idx) { 1509 if (nztot) { 1510 /* Sort by increasing column numbers, assuming A and B already sorted */ 1511 PetscInt imark = -1; 1512 if (v) { 1513 *v = v_p = mat->rowvalues; 1514 for (i=0; i<nzB; i++) { 1515 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1516 else break; 1517 } 1518 imark = i; 1519 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1520 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1521 } 1522 if (idx) { 1523 *idx = idx_p = mat->rowindices; 1524 if (imark > -1) { 1525 for (i=0; i<imark; i++) { 1526 idx_p[i] = cmap[cworkB[i]]; 1527 } 1528 } else { 1529 for (i=0; i<nzB; i++) { 1530 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1531 else break; 1532 } 1533 imark = i; 1534 } 1535 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1536 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1537 } 1538 } else { 1539 if (idx) *idx = 0; 1540 if (v) *v = 0; 1541 } 1542 } 1543 *nz = nztot; 1544 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1545 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1551 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1552 { 1553 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1554 1555 PetscFunctionBegin; 1556 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1557 aij->getrowactive = PETSC_FALSE; 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatNorm_MPIAIJ" 1563 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1564 { 1565 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1566 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1567 PetscErrorCode ierr; 1568 PetscInt i,j,cstart = mat->cmap->rstart; 1569 PetscReal sum = 0.0; 1570 MatScalar *v; 1571 1572 PetscFunctionBegin; 1573 if (aij->size == 1) { 1574 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1575 } else { 1576 if (type == NORM_FROBENIUS) { 1577 v = amat->a; 1578 for (i=0; i<amat->nz; i++) { 1579 #if defined(PETSC_USE_COMPLEX) 1580 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1581 #else 1582 sum += (*v)*(*v); v++; 1583 #endif 1584 } 1585 v = bmat->a; 1586 for (i=0; i<bmat->nz; i++) { 1587 #if defined(PETSC_USE_COMPLEX) 1588 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1589 #else 1590 sum += (*v)*(*v); v++; 1591 #endif 1592 } 1593 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1594 *norm = sqrt(*norm); 1595 } else if (type == NORM_1) { /* max column norm */ 1596 PetscReal *tmp,*tmp2; 1597 PetscInt *jj,*garray = aij->garray; 1598 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1599 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1600 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1601 *norm = 0.0; 1602 v = amat->a; jj = amat->j; 1603 for (j=0; j<amat->nz; j++) { 1604 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1605 } 1606 v = bmat->a; jj = bmat->j; 1607 for (j=0; j<bmat->nz; j++) { 1608 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1609 } 1610 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1611 for (j=0; j<mat->cmap->N; j++) { 1612 if (tmp2[j] > *norm) *norm = tmp2[j]; 1613 } 1614 ierr = PetscFree(tmp);CHKERRQ(ierr); 1615 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1616 } else if (type == NORM_INFINITY) { /* max row norm */ 1617 PetscReal ntemp = 0.0; 1618 for (j=0; j<aij->A->rmap->n; j++) { 1619 v = amat->a + amat->i[j]; 1620 sum = 0.0; 1621 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1622 sum += PetscAbsScalar(*v); v++; 1623 } 1624 v = bmat->a + bmat->i[j]; 1625 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1626 sum += PetscAbsScalar(*v); v++; 1627 } 1628 if (sum > ntemp) ntemp = sum; 1629 } 1630 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1631 } else { 1632 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1633 } 1634 } 1635 PetscFunctionReturn(0); 1636 } 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatTranspose_MPIAIJ" 1640 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1641 { 1642 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1643 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1644 PetscErrorCode ierr; 1645 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1646 PetscInt cstart=A->cmap->rstart,ncol; 1647 Mat B; 1648 MatScalar *array; 1649 1650 PetscFunctionBegin; 1651 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1652 1653 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1654 ai = Aloc->i; aj = Aloc->j; 1655 bi = Bloc->i; bj = Bloc->j; 1656 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1657 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1658 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1659 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1660 for (i=0; i<ai[ma]; i++){ 1661 d_nnz[aj[i]] ++; 1662 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1663 } 1664 1665 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1666 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1667 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1668 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1669 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1670 } else { 1671 B = *matout; 1672 } 1673 1674 /* copy over the A part */ 1675 array = Aloc->a; 1676 row = A->rmap->rstart; 1677 for (i=0; i<ma; i++) { 1678 ncol = ai[i+1]-ai[i]; 1679 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1680 row++; array += ncol; aj += ncol; 1681 } 1682 aj = Aloc->j; 1683 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1684 1685 /* copy over the B part */ 1686 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1687 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1688 array = Bloc->a; 1689 row = A->rmap->rstart; 1690 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1691 cols_tmp = cols; 1692 for (i=0; i<mb; i++) { 1693 ncol = bi[i+1]-bi[i]; 1694 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1695 row++; array += ncol; cols_tmp += ncol; 1696 } 1697 ierr = PetscFree(cols);CHKERRQ(ierr); 1698 1699 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1701 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1702 *matout = B; 1703 } else { 1704 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1705 } 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1711 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1712 { 1713 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1714 Mat a = aij->A,b = aij->B; 1715 PetscErrorCode ierr; 1716 PetscInt s1,s2,s3; 1717 1718 PetscFunctionBegin; 1719 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1720 if (rr) { 1721 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1722 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1723 /* Overlap communication with computation. */ 1724 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 } 1726 if (ll) { 1727 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1728 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1729 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1730 } 1731 /* scale the diagonal block */ 1732 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1733 1734 if (rr) { 1735 /* Do a scatter end and then right scale the off-diagonal block */ 1736 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1737 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1738 } 1739 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1745 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1746 { 1747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1748 PetscErrorCode ierr; 1749 1750 PetscFunctionBegin; 1751 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1752 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1753 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1754 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1755 PetscFunctionReturn(0); 1756 } 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1759 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1760 { 1761 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1762 PetscErrorCode ierr; 1763 1764 PetscFunctionBegin; 1765 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1766 PetscFunctionReturn(0); 1767 } 1768 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "MatEqual_MPIAIJ" 1771 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1772 { 1773 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1774 Mat a,b,c,d; 1775 PetscTruth flg; 1776 PetscErrorCode ierr; 1777 1778 PetscFunctionBegin; 1779 a = matA->A; b = matA->B; 1780 c = matB->A; d = matB->B; 1781 1782 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1783 if (flg) { 1784 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1785 } 1786 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1787 PetscFunctionReturn(0); 1788 } 1789 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "MatCopy_MPIAIJ" 1792 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1793 { 1794 PetscErrorCode ierr; 1795 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1796 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1797 1798 PetscFunctionBegin; 1799 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1800 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1801 /* because of the column compression in the off-processor part of the matrix a->B, 1802 the number of columns in a->B and b->B may be different, hence we cannot call 1803 the MatCopy() directly on the two parts. If need be, we can provide a more 1804 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1805 then copying the submatrices */ 1806 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1807 } else { 1808 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1809 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1816 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1817 { 1818 PetscErrorCode ierr; 1819 1820 PetscFunctionBegin; 1821 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatAXPY_MPIAIJ" 1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1828 { 1829 PetscErrorCode ierr; 1830 PetscInt i; 1831 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1832 PetscBLASInt bnz,one=1; 1833 Mat_SeqAIJ *x,*y; 1834 1835 PetscFunctionBegin; 1836 if (str == SAME_NONZERO_PATTERN) { 1837 PetscScalar alpha = a; 1838 x = (Mat_SeqAIJ *)xx->A->data; 1839 y = (Mat_SeqAIJ *)yy->A->data; 1840 bnz = PetscBLASIntCast(x->nz); 1841 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1842 x = (Mat_SeqAIJ *)xx->B->data; 1843 y = (Mat_SeqAIJ *)yy->B->data; 1844 bnz = PetscBLASIntCast(x->nz); 1845 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1846 } else if (str == SUBSET_NONZERO_PATTERN) { 1847 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1848 1849 x = (Mat_SeqAIJ *)xx->B->data; 1850 y = (Mat_SeqAIJ *)yy->B->data; 1851 if (y->xtoy && y->XtoY != xx->B) { 1852 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1853 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1854 } 1855 if (!y->xtoy) { /* get xtoy */ 1856 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1857 y->XtoY = xx->B; 1858 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1859 } 1860 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1861 } else { 1862 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1863 } 1864 PetscFunctionReturn(0); 1865 } 1866 1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatConjugate_MPIAIJ" 1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1872 { 1873 #if defined(PETSC_USE_COMPLEX) 1874 PetscErrorCode ierr; 1875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1876 1877 PetscFunctionBegin; 1878 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1879 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1880 #else 1881 PetscFunctionBegin; 1882 #endif 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatRealPart_MPIAIJ" 1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1889 { 1890 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1891 PetscErrorCode ierr; 1892 1893 PetscFunctionBegin; 1894 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1895 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1902 { 1903 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1908 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #ifdef PETSC_HAVE_PBGL 1913 1914 #include <boost/parallel/mpi/bsp_process_group.hpp> 1915 #include <boost/graph/distributed/ilu_default_graph.hpp> 1916 #include <boost/graph/distributed/ilu_0_block.hpp> 1917 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1918 #include <boost/graph/distributed/petsc/interface.hpp> 1919 #include <boost/multi_array.hpp> 1920 #include <boost/parallel/distributed_property_map->hpp> 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1924 /* 1925 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1926 */ 1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1928 { 1929 namespace petsc = boost::distributed::petsc; 1930 1931 namespace graph_dist = boost::graph::distributed; 1932 using boost::graph::distributed::ilu_default::process_group_type; 1933 using boost::graph::ilu_permuted; 1934 1935 PetscTruth row_identity, col_identity; 1936 PetscContainer c; 1937 PetscInt m, n, M, N; 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1942 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1943 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1944 if (!row_identity || !col_identity) { 1945 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1946 } 1947 1948 process_group_type pg; 1949 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1950 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1951 lgraph_type& level_graph = *lgraph_p; 1952 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1953 1954 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1955 ilu_permuted(level_graph); 1956 1957 /* put together the new matrix */ 1958 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1959 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1960 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1961 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1962 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1963 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1965 1966 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1967 ierr = PetscContainerSetPointer(c, lgraph_p); 1968 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1975 { 1976 PetscFunctionBegin; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatSolve_MPIAIJ" 1982 /* 1983 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1984 */ 1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1986 { 1987 namespace graph_dist = boost::graph::distributed; 1988 1989 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1990 lgraph_type* lgraph_p; 1991 PetscContainer c; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1996 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1997 ierr = VecCopy(b, x);CHKERRQ(ierr); 1998 1999 PetscScalar* array_x; 2000 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2001 PetscInt sx; 2002 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2003 2004 PetscScalar* array_b; 2005 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2006 PetscInt sb; 2007 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2008 2009 lgraph_type& level_graph = *lgraph_p; 2010 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2011 2012 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2013 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2014 ref_x(array_x, boost::extents[num_vertices(graph)]); 2015 2016 typedef boost::iterator_property_map<array_ref_type::iterator, 2017 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2018 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2019 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2020 2021 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2022 2023 PetscFunctionReturn(0); 2024 } 2025 #endif 2026 2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2028 PetscInt nzlocal,nsends,nrecvs; 2029 PetscMPIInt *send_rank,*recv_rank; 2030 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2031 PetscScalar *sbuf_a,**rbuf_a; 2032 PetscErrorCode (*MatDestroy)(Mat); 2033 } Mat_Redundant; 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2038 { 2039 PetscErrorCode ierr; 2040 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2041 PetscInt i; 2042 2043 PetscFunctionBegin; 2044 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2045 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2046 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2047 for (i=0; i<redund->nrecvs; i++){ 2048 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2049 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2050 } 2051 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2052 ierr = PetscFree(redund);CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatDestroy_MatRedundant" 2058 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2059 { 2060 PetscErrorCode ierr; 2061 PetscContainer container; 2062 Mat_Redundant *redund=PETSC_NULL; 2063 2064 PetscFunctionBegin; 2065 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2066 if (container) { 2067 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2068 } else { 2069 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2070 } 2071 A->ops->destroy = redund->MatDestroy; 2072 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2073 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2074 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2081 { 2082 PetscMPIInt rank,size; 2083 MPI_Comm comm=((PetscObject)mat)->comm; 2084 PetscErrorCode ierr; 2085 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2086 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2087 PetscInt *rowrange=mat->rmap->range; 2088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2089 Mat A=aij->A,B=aij->B,C=*matredundant; 2090 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2091 PetscScalar *sbuf_a; 2092 PetscInt nzlocal=a->nz+b->nz; 2093 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2094 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2095 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2096 MatScalar *aworkA,*aworkB; 2097 PetscScalar *vals; 2098 PetscMPIInt tag1,tag2,tag3,imdex; 2099 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2100 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2101 MPI_Status recv_status,*send_status; 2102 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2103 PetscInt **rbuf_j=PETSC_NULL; 2104 PetscScalar **rbuf_a=PETSC_NULL; 2105 Mat_Redundant *redund=PETSC_NULL; 2106 PetscContainer container; 2107 2108 PetscFunctionBegin; 2109 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2110 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2111 2112 if (reuse == MAT_REUSE_MATRIX) { 2113 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2114 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2115 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2116 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2117 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2118 if (container) { 2119 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2120 } else { 2121 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2122 } 2123 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2124 2125 nsends = redund->nsends; 2126 nrecvs = redund->nrecvs; 2127 send_rank = redund->send_rank; 2128 recv_rank = redund->recv_rank; 2129 sbuf_nz = redund->sbuf_nz; 2130 rbuf_nz = redund->rbuf_nz; 2131 sbuf_j = redund->sbuf_j; 2132 sbuf_a = redund->sbuf_a; 2133 rbuf_j = redund->rbuf_j; 2134 rbuf_a = redund->rbuf_a; 2135 } 2136 2137 if (reuse == MAT_INITIAL_MATRIX){ 2138 PetscMPIInt subrank,subsize; 2139 PetscInt nleftover,np_subcomm; 2140 /* get the destination processors' id send_rank, nsends and nrecvs */ 2141 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2142 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2143 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2144 np_subcomm = size/nsubcomm; 2145 nleftover = size - nsubcomm*np_subcomm; 2146 nsends = 0; nrecvs = 0; 2147 for (i=0; i<size; i++){ /* i=rank*/ 2148 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2149 send_rank[nsends] = i; nsends++; 2150 recv_rank[nrecvs++] = i; 2151 } 2152 } 2153 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2154 i = size-nleftover-1; 2155 j = 0; 2156 while (j < nsubcomm - nleftover){ 2157 send_rank[nsends++] = i; 2158 i--; j++; 2159 } 2160 } 2161 2162 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2163 for (i=0; i<nleftover; i++){ 2164 recv_rank[nrecvs++] = size-nleftover+i; 2165 } 2166 } 2167 2168 /* allocate sbuf_j, sbuf_a */ 2169 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2170 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2171 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2172 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2173 2174 /* copy mat's local entries into the buffers */ 2175 if (reuse == MAT_INITIAL_MATRIX){ 2176 rownz_max = 0; 2177 rptr = sbuf_j; 2178 cols = sbuf_j + rend-rstart + 1; 2179 vals = sbuf_a; 2180 rptr[0] = 0; 2181 for (i=0; i<rend-rstart; i++){ 2182 row = i + rstart; 2183 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2184 ncols = nzA + nzB; 2185 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2186 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2187 /* load the column indices for this row into cols */ 2188 lwrite = 0; 2189 for (l=0; l<nzB; l++) { 2190 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2191 vals[lwrite] = aworkB[l]; 2192 cols[lwrite++] = ctmp; 2193 } 2194 } 2195 for (l=0; l<nzA; l++){ 2196 vals[lwrite] = aworkA[l]; 2197 cols[lwrite++] = cstart + cworkA[l]; 2198 } 2199 for (l=0; l<nzB; l++) { 2200 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2201 vals[lwrite] = aworkB[l]; 2202 cols[lwrite++] = ctmp; 2203 } 2204 } 2205 vals += ncols; 2206 cols += ncols; 2207 rptr[i+1] = rptr[i] + ncols; 2208 if (rownz_max < ncols) rownz_max = ncols; 2209 } 2210 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2211 } else { /* only copy matrix values into sbuf_a */ 2212 rptr = sbuf_j; 2213 vals = sbuf_a; 2214 rptr[0] = 0; 2215 for (i=0; i<rend-rstart; i++){ 2216 row = i + rstart; 2217 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2218 ncols = nzA + nzB; 2219 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2220 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2221 lwrite = 0; 2222 for (l=0; l<nzB; l++) { 2223 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2224 } 2225 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2226 for (l=0; l<nzB; l++) { 2227 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2228 } 2229 vals += ncols; 2230 rptr[i+1] = rptr[i] + ncols; 2231 } 2232 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2233 2234 /* send nzlocal to others, and recv other's nzlocal */ 2235 /*--------------------------------------------------*/ 2236 if (reuse == MAT_INITIAL_MATRIX){ 2237 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2238 s_waits2 = s_waits3 + nsends; 2239 s_waits1 = s_waits2 + nsends; 2240 r_waits1 = s_waits1 + nsends; 2241 r_waits2 = r_waits1 + nrecvs; 2242 r_waits3 = r_waits2 + nrecvs; 2243 } else { 2244 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2245 r_waits3 = s_waits3 + nsends; 2246 } 2247 2248 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2249 if (reuse == MAT_INITIAL_MATRIX){ 2250 /* get new tags to keep the communication clean */ 2251 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2252 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2253 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2254 2255 /* post receives of other's nzlocal */ 2256 for (i=0; i<nrecvs; i++){ 2257 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2258 } 2259 /* send nzlocal to others */ 2260 for (i=0; i<nsends; i++){ 2261 sbuf_nz[i] = nzlocal; 2262 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2263 } 2264 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2265 count = nrecvs; 2266 while (count) { 2267 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2268 recv_rank[imdex] = recv_status.MPI_SOURCE; 2269 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2270 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2271 2272 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2273 rbuf_nz[imdex] += i + 2; 2274 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2275 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2276 count--; 2277 } 2278 /* wait on sends of nzlocal */ 2279 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2280 /* send mat->i,j to others, and recv from other's */ 2281 /*------------------------------------------------*/ 2282 for (i=0; i<nsends; i++){ 2283 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2284 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2285 } 2286 /* wait on receives of mat->i,j */ 2287 /*------------------------------*/ 2288 count = nrecvs; 2289 while (count) { 2290 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2291 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2292 count--; 2293 } 2294 /* wait on sends of mat->i,j */ 2295 /*---------------------------*/ 2296 if (nsends) { 2297 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2298 } 2299 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2300 2301 /* post receives, send and receive mat->a */ 2302 /*----------------------------------------*/ 2303 for (imdex=0; imdex<nrecvs; imdex++) { 2304 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2305 } 2306 for (i=0; i<nsends; i++){ 2307 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2308 } 2309 count = nrecvs; 2310 while (count) { 2311 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2312 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2313 count--; 2314 } 2315 if (nsends) { 2316 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2317 } 2318 2319 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2320 2321 /* create redundant matrix */ 2322 /*-------------------------*/ 2323 if (reuse == MAT_INITIAL_MATRIX){ 2324 /* compute rownz_max for preallocation */ 2325 for (imdex=0; imdex<nrecvs; imdex++){ 2326 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2327 rptr = rbuf_j[imdex]; 2328 for (i=0; i<j; i++){ 2329 ncols = rptr[i+1] - rptr[i]; 2330 if (rownz_max < ncols) rownz_max = ncols; 2331 } 2332 } 2333 2334 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2335 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2336 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2337 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2338 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2339 } else { 2340 C = *matredundant; 2341 } 2342 2343 /* insert local matrix entries */ 2344 rptr = sbuf_j; 2345 cols = sbuf_j + rend-rstart + 1; 2346 vals = sbuf_a; 2347 for (i=0; i<rend-rstart; i++){ 2348 row = i + rstart; 2349 ncols = rptr[i+1] - rptr[i]; 2350 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2351 vals += ncols; 2352 cols += ncols; 2353 } 2354 /* insert received matrix entries */ 2355 for (imdex=0; imdex<nrecvs; imdex++){ 2356 rstart = rowrange[recv_rank[imdex]]; 2357 rend = rowrange[recv_rank[imdex]+1]; 2358 rptr = rbuf_j[imdex]; 2359 cols = rbuf_j[imdex] + rend-rstart + 1; 2360 vals = rbuf_a[imdex]; 2361 for (i=0; i<rend-rstart; i++){ 2362 row = i + rstart; 2363 ncols = rptr[i+1] - rptr[i]; 2364 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2365 vals += ncols; 2366 cols += ncols; 2367 } 2368 } 2369 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2370 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2371 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2372 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2373 if (reuse == MAT_INITIAL_MATRIX){ 2374 PetscContainer container; 2375 *matredundant = C; 2376 /* create a supporting struct and attach it to C for reuse */ 2377 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2378 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2379 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2380 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2381 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2382 2383 redund->nzlocal = nzlocal; 2384 redund->nsends = nsends; 2385 redund->nrecvs = nrecvs; 2386 redund->send_rank = send_rank; 2387 redund->recv_rank = recv_rank; 2388 redund->sbuf_nz = sbuf_nz; 2389 redund->rbuf_nz = rbuf_nz; 2390 redund->sbuf_j = sbuf_j; 2391 redund->sbuf_a = sbuf_a; 2392 redund->rbuf_j = rbuf_j; 2393 redund->rbuf_a = rbuf_a; 2394 2395 redund->MatDestroy = C->ops->destroy; 2396 C->ops->destroy = MatDestroy_MatRedundant; 2397 } 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2404 { 2405 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2406 PetscErrorCode ierr; 2407 PetscInt i,*idxb = 0; 2408 PetscScalar *va,*vb; 2409 Vec vtmp; 2410 2411 PetscFunctionBegin; 2412 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2413 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2414 if (idx) { 2415 for (i=0; i<A->rmap->n; i++) { 2416 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2417 } 2418 } 2419 2420 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2421 if (idx) { 2422 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2423 } 2424 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2425 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2426 2427 for (i=0; i<A->rmap->n; i++){ 2428 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2429 va[i] = vb[i]; 2430 if (idx) idx[i] = a->garray[idxb[i]]; 2431 } 2432 } 2433 2434 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2435 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2436 if (idxb) { 2437 ierr = PetscFree(idxb);CHKERRQ(ierr); 2438 } 2439 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2446 { 2447 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2448 PetscErrorCode ierr; 2449 PetscInt i,*idxb = 0; 2450 PetscScalar *va,*vb; 2451 Vec vtmp; 2452 2453 PetscFunctionBegin; 2454 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2455 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2456 if (idx) { 2457 for (i=0; i<A->cmap->n; i++) { 2458 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2459 } 2460 } 2461 2462 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2463 if (idx) { 2464 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2465 } 2466 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2467 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2468 2469 for (i=0; i<A->rmap->n; i++){ 2470 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2471 va[i] = vb[i]; 2472 if (idx) idx[i] = a->garray[idxb[i]]; 2473 } 2474 } 2475 2476 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2477 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2478 if (idxb) { 2479 ierr = PetscFree(idxb);CHKERRQ(ierr); 2480 } 2481 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2488 { 2489 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2490 PetscInt n = A->rmap->n; 2491 PetscInt cstart = A->cmap->rstart; 2492 PetscInt *cmap = mat->garray; 2493 PetscInt *diagIdx, *offdiagIdx; 2494 Vec diagV, offdiagV; 2495 PetscScalar *a, *diagA, *offdiagA; 2496 PetscInt r; 2497 PetscErrorCode ierr; 2498 2499 PetscFunctionBegin; 2500 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2501 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2502 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2503 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2504 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2505 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2506 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2507 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2508 for(r = 0; r < n; ++r) { 2509 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2510 a[r] = diagA[r]; 2511 idx[r] = cstart + diagIdx[r]; 2512 } else { 2513 a[r] = offdiagA[r]; 2514 idx[r] = cmap[offdiagIdx[r]]; 2515 } 2516 } 2517 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2518 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2519 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2520 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2521 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2522 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2529 { 2530 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2531 PetscInt n = A->rmap->n; 2532 PetscInt cstart = A->cmap->rstart; 2533 PetscInt *cmap = mat->garray; 2534 PetscInt *diagIdx, *offdiagIdx; 2535 Vec diagV, offdiagV; 2536 PetscScalar *a, *diagA, *offdiagA; 2537 PetscInt r; 2538 PetscErrorCode ierr; 2539 2540 PetscFunctionBegin; 2541 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2542 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2543 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2544 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2545 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2546 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2547 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2548 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2549 for(r = 0; r < n; ++r) { 2550 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2551 a[r] = diagA[r]; 2552 idx[r] = cstart + diagIdx[r]; 2553 } else { 2554 a[r] = offdiagA[r]; 2555 idx[r] = cmap[offdiagIdx[r]]; 2556 } 2557 } 2558 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2559 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2560 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2561 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2562 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2563 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2570 { 2571 PetscErrorCode ierr; 2572 Mat *dummy; 2573 2574 PetscFunctionBegin; 2575 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2576 *newmat = *dummy; 2577 ierr = PetscFree(dummy);CHKERRQ(ierr); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2582 /* -------------------------------------------------------------------*/ 2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2584 MatGetRow_MPIAIJ, 2585 MatRestoreRow_MPIAIJ, 2586 MatMult_MPIAIJ, 2587 /* 4*/ MatMultAdd_MPIAIJ, 2588 MatMultTranspose_MPIAIJ, 2589 MatMultTransposeAdd_MPIAIJ, 2590 #ifdef PETSC_HAVE_PBGL 2591 MatSolve_MPIAIJ, 2592 #else 2593 0, 2594 #endif 2595 0, 2596 0, 2597 /*10*/ 0, 2598 0, 2599 0, 2600 MatSOR_MPIAIJ, 2601 MatTranspose_MPIAIJ, 2602 /*15*/ MatGetInfo_MPIAIJ, 2603 MatEqual_MPIAIJ, 2604 MatGetDiagonal_MPIAIJ, 2605 MatDiagonalScale_MPIAIJ, 2606 MatNorm_MPIAIJ, 2607 /*20*/ MatAssemblyBegin_MPIAIJ, 2608 MatAssemblyEnd_MPIAIJ, 2609 MatSetOption_MPIAIJ, 2610 MatZeroEntries_MPIAIJ, 2611 /*24*/ MatZeroRows_MPIAIJ, 2612 0, 2613 #ifdef PETSC_HAVE_PBGL 2614 0, 2615 #else 2616 0, 2617 #endif 2618 0, 2619 0, 2620 /*29*/ MatSetUpPreallocation_MPIAIJ, 2621 #ifdef PETSC_HAVE_PBGL 2622 0, 2623 #else 2624 0, 2625 #endif 2626 0, 2627 0, 2628 0, 2629 /*34*/ MatDuplicate_MPIAIJ, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*39*/ MatAXPY_MPIAIJ, 2635 MatGetSubMatrices_MPIAIJ, 2636 MatIncreaseOverlap_MPIAIJ, 2637 MatGetValues_MPIAIJ, 2638 MatCopy_MPIAIJ, 2639 /*44*/ MatGetRowMax_MPIAIJ, 2640 MatScale_MPIAIJ, 2641 0, 2642 0, 2643 0, 2644 /*49*/ MatSetBlockSize_MPIAIJ, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*54*/ MatFDColoringCreate_MPIAIJ, 2650 0, 2651 MatSetUnfactored_MPIAIJ, 2652 MatPermute_MPIAIJ, 2653 0, 2654 /*59*/ MatGetSubMatrix_MPIAIJ, 2655 MatDestroy_MPIAIJ, 2656 MatView_MPIAIJ, 2657 0, 2658 0, 2659 /*64*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2665 MatGetRowMinAbs_MPIAIJ, 2666 0, 2667 MatSetColoring_MPIAIJ, 2668 #if defined(PETSC_HAVE_ADIC) 2669 MatSetValuesAdic_MPIAIJ, 2670 #else 2671 0, 2672 #endif 2673 MatSetValuesAdifor_MPIAIJ, 2674 /*75*/ MatFDColoringApply_AIJ, 2675 0, 2676 0, 2677 0, 2678 0, 2679 /*80*/ 0, 2680 0, 2681 0, 2682 /*83*/ MatLoad_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2689 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2690 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2691 MatPtAP_Basic, 2692 MatPtAPSymbolic_MPIAIJ, 2693 /*94*/ MatPtAPNumeric_MPIAIJ, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /*99*/ 0, 2699 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2700 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2701 MatConjugate_MPIAIJ, 2702 0, 2703 /*104*/MatSetValuesRow_MPIAIJ, 2704 MatRealPart_MPIAIJ, 2705 MatImaginaryPart_MPIAIJ, 2706 0, 2707 0, 2708 /*109*/0, 2709 MatGetRedundantMatrix_MPIAIJ, 2710 MatGetRowMin_MPIAIJ, 2711 0, 2712 0, 2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /*119*/0, 2719 0, 2720 0, 2721 0 2722 }; 2723 2724 /* ----------------------------------------------------------------------------------------*/ 2725 2726 EXTERN_C_BEGIN 2727 #undef __FUNCT__ 2728 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2729 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2730 { 2731 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2732 PetscErrorCode ierr; 2733 2734 PetscFunctionBegin; 2735 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2736 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2737 PetscFunctionReturn(0); 2738 } 2739 EXTERN_C_END 2740 2741 EXTERN_C_BEGIN 2742 #undef __FUNCT__ 2743 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2744 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2745 { 2746 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2747 PetscErrorCode ierr; 2748 2749 PetscFunctionBegin; 2750 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2751 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2752 PetscFunctionReturn(0); 2753 } 2754 EXTERN_C_END 2755 2756 EXTERN_C_BEGIN 2757 #undef __FUNCT__ 2758 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2759 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2760 { 2761 Mat_MPIAIJ *b; 2762 PetscErrorCode ierr; 2763 PetscInt i; 2764 2765 PetscFunctionBegin; 2766 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2767 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2768 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2769 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2770 2771 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2772 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2773 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2774 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2775 if (d_nnz) { 2776 for (i=0; i<B->rmap->n; i++) { 2777 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2778 } 2779 } 2780 if (o_nnz) { 2781 for (i=0; i<B->rmap->n; i++) { 2782 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2783 } 2784 } 2785 b = (Mat_MPIAIJ*)B->data; 2786 2787 if (!B->preallocated) { 2788 /* Explicitly create 2 MATSEQAIJ matrices. */ 2789 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2790 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2791 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2792 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2793 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2794 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2795 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2796 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2797 } 2798 2799 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2800 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2801 B->preallocated = PETSC_TRUE; 2802 PetscFunctionReturn(0); 2803 } 2804 EXTERN_C_END 2805 2806 #undef __FUNCT__ 2807 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2808 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2809 { 2810 Mat mat; 2811 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2812 PetscErrorCode ierr; 2813 2814 PetscFunctionBegin; 2815 *newmat = 0; 2816 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2817 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2818 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2819 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2820 a = (Mat_MPIAIJ*)mat->data; 2821 2822 mat->factortype = matin->factortype; 2823 mat->rmap->bs = matin->rmap->bs; 2824 mat->assembled = PETSC_TRUE; 2825 mat->insertmode = NOT_SET_VALUES; 2826 mat->preallocated = PETSC_TRUE; 2827 2828 a->size = oldmat->size; 2829 a->rank = oldmat->rank; 2830 a->donotstash = oldmat->donotstash; 2831 a->roworiented = oldmat->roworiented; 2832 a->rowindices = 0; 2833 a->rowvalues = 0; 2834 a->getrowactive = PETSC_FALSE; 2835 2836 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2837 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2838 2839 if (oldmat->colmap) { 2840 #if defined (PETSC_USE_CTABLE) 2841 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2842 #else 2843 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2844 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2845 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2846 #endif 2847 } else a->colmap = 0; 2848 if (oldmat->garray) { 2849 PetscInt len; 2850 len = oldmat->B->cmap->n; 2851 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2852 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2853 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2854 } else a->garray = 0; 2855 2856 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2857 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2858 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2859 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2860 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2861 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2862 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2863 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2864 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2865 *newmat = mat; 2866 PetscFunctionReturn(0); 2867 } 2868 2869 #undef __FUNCT__ 2870 #define __FUNCT__ "MatLoad_MPIAIJ" 2871 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2872 { 2873 PetscScalar *vals,*svals; 2874 MPI_Comm comm = ((PetscObject)viewer)->comm; 2875 MPI_Status status; 2876 PetscErrorCode ierr; 2877 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2878 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2879 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2880 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2881 PetscInt cend,cstart,n,*rowners,sizesset=1; 2882 int fd; 2883 2884 PetscFunctionBegin; 2885 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2886 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2887 if (!rank) { 2888 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2889 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2890 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2891 } 2892 2893 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2894 2895 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2896 M = header[1]; N = header[2]; 2897 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2898 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2899 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2900 2901 /* If global sizes are set, check if they are consistent with that given in the file */ 2902 if (sizesset) { 2903 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2904 } 2905 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2906 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2907 2908 /* determine ownership of all rows */ 2909 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 2910 else m = newMat->rmap->n; /* Set by user */ 2911 2912 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2913 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2914 2915 /* First process needs enough room for process with most rows */ 2916 if (!rank) { 2917 mmax = rowners[1]; 2918 for (i=2; i<size; i++) { 2919 mmax = PetscMax(mmax,rowners[i]); 2920 } 2921 } else mmax = m; 2922 2923 rowners[0] = 0; 2924 for (i=2; i<=size; i++) { 2925 rowners[i] += rowners[i-1]; 2926 } 2927 rstart = rowners[rank]; 2928 rend = rowners[rank+1]; 2929 2930 /* distribute row lengths to all processors */ 2931 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2932 if (!rank) { 2933 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2934 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2935 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2936 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2937 for (j=0; j<m; j++) { 2938 procsnz[0] += ourlens[j]; 2939 } 2940 for (i=1; i<size; i++) { 2941 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2942 /* calculate the number of nonzeros on each processor */ 2943 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2944 procsnz[i] += rowlengths[j]; 2945 } 2946 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2947 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2948 } 2949 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2950 } else { 2951 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2952 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2953 } 2954 2955 if (!rank) { 2956 /* determine max buffer needed and allocate it */ 2957 maxnz = 0; 2958 for (i=0; i<size; i++) { 2959 maxnz = PetscMax(maxnz,procsnz[i]); 2960 } 2961 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2962 2963 /* read in my part of the matrix column indices */ 2964 nz = procsnz[0]; 2965 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2966 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2967 2968 /* read in every one elses and ship off */ 2969 for (i=1; i<size; i++) { 2970 nz = procsnz[i]; 2971 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2972 mpicnt = PetscMPIIntCast(nz); 2973 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2974 } 2975 ierr = PetscFree(cols);CHKERRQ(ierr); 2976 } else { 2977 /* determine buffer space needed for message */ 2978 nz = 0; 2979 for (i=0; i<m; i++) { 2980 nz += ourlens[i]; 2981 } 2982 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2983 2984 /* receive message of column indices*/ 2985 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2986 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2987 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2988 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 2989 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 2990 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 2991 } 2992 2993 /* determine column ownership if matrix is not square */ 2994 if (N != M) { 2995 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2996 else n = newMat->cmap->n; 2997 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2998 cstart = cend - n; 2999 } else { 3000 cstart = rstart; 3001 cend = rend; 3002 n = cend - cstart; 3003 } 3004 3005 /* loop over local rows, determining number of off diagonal entries */ 3006 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3007 jj = 0; 3008 for (i=0; i<m; i++) { 3009 for (j=0; j<ourlens[i]; j++) { 3010 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3011 jj++; 3012 } 3013 } 3014 3015 for (i=0; i<m; i++) { 3016 ourlens[i] -= offlens[i]; 3017 } 3018 if (!sizesset) { 3019 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3020 } 3021 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3022 3023 for (i=0; i<m; i++) { 3024 ourlens[i] += offlens[i]; 3025 } 3026 3027 if (!rank) { 3028 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3029 3030 /* read in my part of the matrix numerical values */ 3031 nz = procsnz[0]; 3032 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3033 3034 /* insert into matrix */ 3035 jj = rstart; 3036 smycols = mycols; 3037 svals = vals; 3038 for (i=0; i<m; i++) { 3039 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3040 smycols += ourlens[i]; 3041 svals += ourlens[i]; 3042 jj++; 3043 } 3044 3045 /* read in other processors and ship out */ 3046 for (i=1; i<size; i++) { 3047 nz = procsnz[i]; 3048 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3049 mpicnt = PetscMPIIntCast(nz); 3050 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3051 } 3052 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3053 } else { 3054 /* receive numeric values */ 3055 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3056 3057 /* receive message of values*/ 3058 mpicnt = PetscMPIIntCast(nz); 3059 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm,&status);CHKERRQ(ierr); 3060 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3061 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 3062 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 3063 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 3064 3065 /* insert into matrix */ 3066 jj = rstart; 3067 smycols = mycols; 3068 svals = vals; 3069 for (i=0; i<m; i++) { 3070 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3071 smycols += ourlens[i]; 3072 svals += ourlens[i]; 3073 jj++; 3074 } 3075 } 3076 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3077 ierr = PetscFree(vals);CHKERRQ(ierr); 3078 ierr = PetscFree(mycols);CHKERRQ(ierr); 3079 ierr = PetscFree(rowners);CHKERRQ(ierr); 3080 3081 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3082 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3083 PetscFunctionReturn(0); 3084 } 3085 3086 #undef __FUNCT__ 3087 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3088 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3089 { 3090 PetscErrorCode ierr; 3091 IS iscol_local; 3092 PetscInt csize; 3093 3094 PetscFunctionBegin; 3095 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3096 if (call == MAT_REUSE_MATRIX) { 3097 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3098 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3099 } else { 3100 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3101 } 3102 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3103 if (call == MAT_INITIAL_MATRIX) { 3104 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3105 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3106 } 3107 PetscFunctionReturn(0); 3108 } 3109 3110 #undef __FUNCT__ 3111 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3112 /* 3113 Not great since it makes two copies of the submatrix, first an SeqAIJ 3114 in local and then by concatenating the local matrices the end result. 3115 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3116 3117 Note: This requires a sequential iscol with all indices. 3118 */ 3119 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3120 { 3121 PetscErrorCode ierr; 3122 PetscMPIInt rank,size; 3123 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3124 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3125 Mat *local,M,Mreuse; 3126 MatScalar *vwork,*aa; 3127 MPI_Comm comm = ((PetscObject)mat)->comm; 3128 Mat_SeqAIJ *aij; 3129 3130 3131 PetscFunctionBegin; 3132 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3133 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3134 3135 if (call == MAT_REUSE_MATRIX) { 3136 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3137 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3138 local = &Mreuse; 3139 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3140 } else { 3141 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3142 Mreuse = *local; 3143 ierr = PetscFree(local);CHKERRQ(ierr); 3144 } 3145 3146 /* 3147 m - number of local rows 3148 n - number of columns (same on all processors) 3149 rstart - first row in new global matrix generated 3150 */ 3151 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3152 if (call == MAT_INITIAL_MATRIX) { 3153 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3154 ii = aij->i; 3155 jj = aij->j; 3156 3157 /* 3158 Determine the number of non-zeros in the diagonal and off-diagonal 3159 portions of the matrix in order to do correct preallocation 3160 */ 3161 3162 /* first get start and end of "diagonal" columns */ 3163 if (csize == PETSC_DECIDE) { 3164 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3165 if (mglobal == n) { /* square matrix */ 3166 nlocal = m; 3167 } else { 3168 nlocal = n/size + ((n % size) > rank); 3169 } 3170 } else { 3171 nlocal = csize; 3172 } 3173 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3174 rstart = rend - nlocal; 3175 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3176 3177 /* next, compute all the lengths */ 3178 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3179 olens = dlens + m; 3180 for (i=0; i<m; i++) { 3181 jend = ii[i+1] - ii[i]; 3182 olen = 0; 3183 dlen = 0; 3184 for (j=0; j<jend; j++) { 3185 if (*jj < rstart || *jj >= rend) olen++; 3186 else dlen++; 3187 jj++; 3188 } 3189 olens[i] = olen; 3190 dlens[i] = dlen; 3191 } 3192 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3193 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3194 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3195 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3196 ierr = PetscFree(dlens);CHKERRQ(ierr); 3197 } else { 3198 PetscInt ml,nl; 3199 3200 M = *newmat; 3201 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3202 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3203 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3204 /* 3205 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3206 rather than the slower MatSetValues(). 3207 */ 3208 M->was_assembled = PETSC_TRUE; 3209 M->assembled = PETSC_FALSE; 3210 } 3211 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3212 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3213 ii = aij->i; 3214 jj = aij->j; 3215 aa = aij->a; 3216 for (i=0; i<m; i++) { 3217 row = rstart + i; 3218 nz = ii[i+1] - ii[i]; 3219 cwork = jj; jj += nz; 3220 vwork = aa; aa += nz; 3221 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3222 } 3223 3224 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3225 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3226 *newmat = M; 3227 3228 /* save submatrix used in processor for next request */ 3229 if (call == MAT_INITIAL_MATRIX) { 3230 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3231 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3232 } 3233 3234 PetscFunctionReturn(0); 3235 } 3236 3237 EXTERN_C_BEGIN 3238 #undef __FUNCT__ 3239 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3240 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3241 { 3242 PetscInt m,cstart, cend,j,nnz,i,d; 3243 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3244 const PetscInt *JJ; 3245 PetscScalar *values; 3246 PetscErrorCode ierr; 3247 3248 PetscFunctionBegin; 3249 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3250 3251 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3252 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3253 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3254 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3255 m = B->rmap->n; 3256 cstart = B->cmap->rstart; 3257 cend = B->cmap->rend; 3258 rstart = B->rmap->rstart; 3259 3260 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3261 3262 #if defined(PETSC_USE_DEBUGGING) 3263 for (i=0; i<m; i++) { 3264 nnz = Ii[i+1]- Ii[i]; 3265 JJ = J + Ii[i]; 3266 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3267 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3268 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3269 } 3270 #endif 3271 3272 for (i=0; i<m; i++) { 3273 nnz = Ii[i+1]- Ii[i]; 3274 JJ = J + Ii[i]; 3275 nnz_max = PetscMax(nnz_max,nnz); 3276 d = 0; 3277 for (j=0; j<nnz; j++) { 3278 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3279 } 3280 d_nnz[i] = d; 3281 o_nnz[i] = nnz - d; 3282 } 3283 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3284 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3285 3286 if (v) values = (PetscScalar*)v; 3287 else { 3288 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3289 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3290 } 3291 3292 for (i=0; i<m; i++) { 3293 ii = i + rstart; 3294 nnz = Ii[i+1]- Ii[i]; 3295 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3296 } 3297 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3298 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3299 3300 if (!v) { 3301 ierr = PetscFree(values);CHKERRQ(ierr); 3302 } 3303 PetscFunctionReturn(0); 3304 } 3305 EXTERN_C_END 3306 3307 #undef __FUNCT__ 3308 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3309 /*@ 3310 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3311 (the default parallel PETSc format). 3312 3313 Collective on MPI_Comm 3314 3315 Input Parameters: 3316 + B - the matrix 3317 . i - the indices into j for the start of each local row (starts with zero) 3318 . j - the column indices for each local row (starts with zero) 3319 - v - optional values in the matrix 3320 3321 Level: developer 3322 3323 Notes: 3324 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3325 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3326 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3327 3328 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3329 3330 The format which is used for the sparse matrix input, is equivalent to a 3331 row-major ordering.. i.e for the following matrix, the input data expected is 3332 as shown: 3333 3334 1 0 0 3335 2 0 3 P0 3336 ------- 3337 4 5 6 P1 3338 3339 Process0 [P0]: rows_owned=[0,1] 3340 i = {0,1,3} [size = nrow+1 = 2+1] 3341 j = {0,0,2} [size = nz = 6] 3342 v = {1,2,3} [size = nz = 6] 3343 3344 Process1 [P1]: rows_owned=[2] 3345 i = {0,3} [size = nrow+1 = 1+1] 3346 j = {0,1,2} [size = nz = 6] 3347 v = {4,5,6} [size = nz = 6] 3348 3349 .keywords: matrix, aij, compressed row, sparse, parallel 3350 3351 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3352 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3353 @*/ 3354 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3355 { 3356 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3357 3358 PetscFunctionBegin; 3359 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3360 if (f) { 3361 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3362 } 3363 PetscFunctionReturn(0); 3364 } 3365 3366 #undef __FUNCT__ 3367 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3368 /*@C 3369 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3370 (the default parallel PETSc format). For good matrix assembly performance 3371 the user should preallocate the matrix storage by setting the parameters 3372 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3373 performance can be increased by more than a factor of 50. 3374 3375 Collective on MPI_Comm 3376 3377 Input Parameters: 3378 + A - the matrix 3379 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3380 (same value is used for all local rows) 3381 . d_nnz - array containing the number of nonzeros in the various rows of the 3382 DIAGONAL portion of the local submatrix (possibly different for each row) 3383 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3384 The size of this array is equal to the number of local rows, i.e 'm'. 3385 You must leave room for the diagonal entry even if it is zero. 3386 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3387 submatrix (same value is used for all local rows). 3388 - o_nnz - array containing the number of nonzeros in the various rows of the 3389 OFF-DIAGONAL portion of the local submatrix (possibly different for 3390 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3391 structure. The size of this array is equal to the number 3392 of local rows, i.e 'm'. 3393 3394 If the *_nnz parameter is given then the *_nz parameter is ignored 3395 3396 The AIJ format (also called the Yale sparse matrix format or 3397 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3398 storage. The stored row and column indices begin with zero. 3399 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3400 3401 The parallel matrix is partitioned such that the first m0 rows belong to 3402 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3403 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3404 3405 The DIAGONAL portion of the local submatrix of a processor can be defined 3406 as the submatrix which is obtained by extraction the part corresponding 3407 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3408 first row that belongs to the processor, and r2 is the last row belonging 3409 to the this processor. This is a square mxm matrix. The remaining portion 3410 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3411 3412 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3413 3414 You can call MatGetInfo() to get information on how effective the preallocation was; 3415 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3416 You can also run with the option -info and look for messages with the string 3417 malloc in them to see if additional memory allocation was needed. 3418 3419 Example usage: 3420 3421 Consider the following 8x8 matrix with 34 non-zero values, that is 3422 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3423 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3424 as follows: 3425 3426 .vb 3427 1 2 0 | 0 3 0 | 0 4 3428 Proc0 0 5 6 | 7 0 0 | 8 0 3429 9 0 10 | 11 0 0 | 12 0 3430 ------------------------------------- 3431 13 0 14 | 15 16 17 | 0 0 3432 Proc1 0 18 0 | 19 20 21 | 0 0 3433 0 0 0 | 22 23 0 | 24 0 3434 ------------------------------------- 3435 Proc2 25 26 27 | 0 0 28 | 29 0 3436 30 0 0 | 31 32 33 | 0 34 3437 .ve 3438 3439 This can be represented as a collection of submatrices as: 3440 3441 .vb 3442 A B C 3443 D E F 3444 G H I 3445 .ve 3446 3447 Where the submatrices A,B,C are owned by proc0, D,E,F are 3448 owned by proc1, G,H,I are owned by proc2. 3449 3450 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3451 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3452 The 'M','N' parameters are 8,8, and have the same values on all procs. 3453 3454 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3455 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3456 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3457 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3458 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3459 matrix, ans [DF] as another SeqAIJ matrix. 3460 3461 When d_nz, o_nz parameters are specified, d_nz storage elements are 3462 allocated for every row of the local diagonal submatrix, and o_nz 3463 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3464 One way to choose d_nz and o_nz is to use the max nonzerors per local 3465 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3466 In this case, the values of d_nz,o_nz are: 3467 .vb 3468 proc0 : dnz = 2, o_nz = 2 3469 proc1 : dnz = 3, o_nz = 2 3470 proc2 : dnz = 1, o_nz = 4 3471 .ve 3472 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3473 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3474 for proc3. i.e we are using 12+15+10=37 storage locations to store 3475 34 values. 3476 3477 When d_nnz, o_nnz parameters are specified, the storage is specified 3478 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3479 In the above case the values for d_nnz,o_nnz are: 3480 .vb 3481 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3482 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3483 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3484 .ve 3485 Here the space allocated is sum of all the above values i.e 34, and 3486 hence pre-allocation is perfect. 3487 3488 Level: intermediate 3489 3490 .keywords: matrix, aij, compressed row, sparse, parallel 3491 3492 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3493 MPIAIJ, MatGetInfo() 3494 @*/ 3495 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3496 { 3497 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3498 3499 PetscFunctionBegin; 3500 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3501 if (f) { 3502 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3503 } 3504 PetscFunctionReturn(0); 3505 } 3506 3507 #undef __FUNCT__ 3508 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3509 /*@ 3510 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3511 CSR format the local rows. 3512 3513 Collective on MPI_Comm 3514 3515 Input Parameters: 3516 + comm - MPI communicator 3517 . m - number of local rows (Cannot be PETSC_DECIDE) 3518 . n - This value should be the same as the local size used in creating the 3519 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3520 calculated if N is given) For square matrices n is almost always m. 3521 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3522 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3523 . i - row indices 3524 . j - column indices 3525 - a - matrix values 3526 3527 Output Parameter: 3528 . mat - the matrix 3529 3530 Level: intermediate 3531 3532 Notes: 3533 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3534 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3535 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3536 3537 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3538 3539 The format which is used for the sparse matrix input, is equivalent to a 3540 row-major ordering.. i.e for the following matrix, the input data expected is 3541 as shown: 3542 3543 1 0 0 3544 2 0 3 P0 3545 ------- 3546 4 5 6 P1 3547 3548 Process0 [P0]: rows_owned=[0,1] 3549 i = {0,1,3} [size = nrow+1 = 2+1] 3550 j = {0,0,2} [size = nz = 6] 3551 v = {1,2,3} [size = nz = 6] 3552 3553 Process1 [P1]: rows_owned=[2] 3554 i = {0,3} [size = nrow+1 = 1+1] 3555 j = {0,1,2} [size = nz = 6] 3556 v = {4,5,6} [size = nz = 6] 3557 3558 .keywords: matrix, aij, compressed row, sparse, parallel 3559 3560 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3561 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3562 @*/ 3563 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3564 { 3565 PetscErrorCode ierr; 3566 3567 PetscFunctionBegin; 3568 if (i[0]) { 3569 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3570 } 3571 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3572 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3573 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3574 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3575 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3576 PetscFunctionReturn(0); 3577 } 3578 3579 #undef __FUNCT__ 3580 #define __FUNCT__ "MatCreateMPIAIJ" 3581 /*@C 3582 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3583 (the default parallel PETSc format). For good matrix assembly performance 3584 the user should preallocate the matrix storage by setting the parameters 3585 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3586 performance can be increased by more than a factor of 50. 3587 3588 Collective on MPI_Comm 3589 3590 Input Parameters: 3591 + comm - MPI communicator 3592 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3593 This value should be the same as the local size used in creating the 3594 y vector for the matrix-vector product y = Ax. 3595 . n - This value should be the same as the local size used in creating the 3596 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3597 calculated if N is given) For square matrices n is almost always m. 3598 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3599 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3600 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3601 (same value is used for all local rows) 3602 . d_nnz - array containing the number of nonzeros in the various rows of the 3603 DIAGONAL portion of the local submatrix (possibly different for each row) 3604 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3605 The size of this array is equal to the number of local rows, i.e 'm'. 3606 You must leave room for the diagonal entry even if it is zero. 3607 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3608 submatrix (same value is used for all local rows). 3609 - o_nnz - array containing the number of nonzeros in the various rows of the 3610 OFF-DIAGONAL portion of the local submatrix (possibly different for 3611 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3612 structure. The size of this array is equal to the number 3613 of local rows, i.e 'm'. 3614 3615 Output Parameter: 3616 . A - the matrix 3617 3618 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3619 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3620 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3621 3622 Notes: 3623 If the *_nnz parameter is given then the *_nz parameter is ignored 3624 3625 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3626 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3627 storage requirements for this matrix. 3628 3629 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3630 processor than it must be used on all processors that share the object for 3631 that argument. 3632 3633 The user MUST specify either the local or global matrix dimensions 3634 (possibly both). 3635 3636 The parallel matrix is partitioned across processors such that the 3637 first m0 rows belong to process 0, the next m1 rows belong to 3638 process 1, the next m2 rows belong to process 2 etc.. where 3639 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3640 values corresponding to [m x N] submatrix. 3641 3642 The columns are logically partitioned with the n0 columns belonging 3643 to 0th partition, the next n1 columns belonging to the next 3644 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3645 3646 The DIAGONAL portion of the local submatrix on any given processor 3647 is the submatrix corresponding to the rows and columns m,n 3648 corresponding to the given processor. i.e diagonal matrix on 3649 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3650 etc. The remaining portion of the local submatrix [m x (N-n)] 3651 constitute the OFF-DIAGONAL portion. The example below better 3652 illustrates this concept. 3653 3654 For a square global matrix we define each processor's diagonal portion 3655 to be its local rows and the corresponding columns (a square submatrix); 3656 each processor's off-diagonal portion encompasses the remainder of the 3657 local matrix (a rectangular submatrix). 3658 3659 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3660 3661 When calling this routine with a single process communicator, a matrix of 3662 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3663 type of communicator, use the construction mechanism: 3664 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3665 3666 By default, this format uses inodes (identical nodes) when possible. 3667 We search for consecutive rows with the same nonzero structure, thereby 3668 reusing matrix information to achieve increased efficiency. 3669 3670 Options Database Keys: 3671 + -mat_no_inode - Do not use inodes 3672 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3673 - -mat_aij_oneindex - Internally use indexing starting at 1 3674 rather than 0. Note that when calling MatSetValues(), 3675 the user still MUST index entries starting at 0! 3676 3677 3678 Example usage: 3679 3680 Consider the following 8x8 matrix with 34 non-zero values, that is 3681 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3682 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3683 as follows: 3684 3685 .vb 3686 1 2 0 | 0 3 0 | 0 4 3687 Proc0 0 5 6 | 7 0 0 | 8 0 3688 9 0 10 | 11 0 0 | 12 0 3689 ------------------------------------- 3690 13 0 14 | 15 16 17 | 0 0 3691 Proc1 0 18 0 | 19 20 21 | 0 0 3692 0 0 0 | 22 23 0 | 24 0 3693 ------------------------------------- 3694 Proc2 25 26 27 | 0 0 28 | 29 0 3695 30 0 0 | 31 32 33 | 0 34 3696 .ve 3697 3698 This can be represented as a collection of submatrices as: 3699 3700 .vb 3701 A B C 3702 D E F 3703 G H I 3704 .ve 3705 3706 Where the submatrices A,B,C are owned by proc0, D,E,F are 3707 owned by proc1, G,H,I are owned by proc2. 3708 3709 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3710 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3711 The 'M','N' parameters are 8,8, and have the same values on all procs. 3712 3713 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3714 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3715 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3716 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3717 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3718 matrix, ans [DF] as another SeqAIJ matrix. 3719 3720 When d_nz, o_nz parameters are specified, d_nz storage elements are 3721 allocated for every row of the local diagonal submatrix, and o_nz 3722 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3723 One way to choose d_nz and o_nz is to use the max nonzerors per local 3724 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3725 In this case, the values of d_nz,o_nz are: 3726 .vb 3727 proc0 : dnz = 2, o_nz = 2 3728 proc1 : dnz = 3, o_nz = 2 3729 proc2 : dnz = 1, o_nz = 4 3730 .ve 3731 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3732 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3733 for proc3. i.e we are using 12+15+10=37 storage locations to store 3734 34 values. 3735 3736 When d_nnz, o_nnz parameters are specified, the storage is specified 3737 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3738 In the above case the values for d_nnz,o_nnz are: 3739 .vb 3740 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3741 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3742 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3743 .ve 3744 Here the space allocated is sum of all the above values i.e 34, and 3745 hence pre-allocation is perfect. 3746 3747 Level: intermediate 3748 3749 .keywords: matrix, aij, compressed row, sparse, parallel 3750 3751 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3752 MPIAIJ, MatCreateMPIAIJWithArrays() 3753 @*/ 3754 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3755 { 3756 PetscErrorCode ierr; 3757 PetscMPIInt size; 3758 3759 PetscFunctionBegin; 3760 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3761 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3762 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3763 if (size > 1) { 3764 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3765 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3766 } else { 3767 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3768 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3769 } 3770 PetscFunctionReturn(0); 3771 } 3772 3773 #undef __FUNCT__ 3774 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3775 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3776 { 3777 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3778 3779 PetscFunctionBegin; 3780 *Ad = a->A; 3781 *Ao = a->B; 3782 *colmap = a->garray; 3783 PetscFunctionReturn(0); 3784 } 3785 3786 #undef __FUNCT__ 3787 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3788 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3789 { 3790 PetscErrorCode ierr; 3791 PetscInt i; 3792 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3793 3794 PetscFunctionBegin; 3795 if (coloring->ctype == IS_COLORING_GLOBAL) { 3796 ISColoringValue *allcolors,*colors; 3797 ISColoring ocoloring; 3798 3799 /* set coloring for diagonal portion */ 3800 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3801 3802 /* set coloring for off-diagonal portion */ 3803 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3804 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3805 for (i=0; i<a->B->cmap->n; i++) { 3806 colors[i] = allcolors[a->garray[i]]; 3807 } 3808 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3809 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3810 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3811 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3812 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3813 ISColoringValue *colors; 3814 PetscInt *larray; 3815 ISColoring ocoloring; 3816 3817 /* set coloring for diagonal portion */ 3818 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3819 for (i=0; i<a->A->cmap->n; i++) { 3820 larray[i] = i + A->cmap->rstart; 3821 } 3822 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3823 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3824 for (i=0; i<a->A->cmap->n; i++) { 3825 colors[i] = coloring->colors[larray[i]]; 3826 } 3827 ierr = PetscFree(larray);CHKERRQ(ierr); 3828 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3829 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3830 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3831 3832 /* set coloring for off-diagonal portion */ 3833 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3834 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3835 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3836 for (i=0; i<a->B->cmap->n; i++) { 3837 colors[i] = coloring->colors[larray[i]]; 3838 } 3839 ierr = PetscFree(larray);CHKERRQ(ierr); 3840 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3841 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3842 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3843 } else { 3844 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3845 } 3846 3847 PetscFunctionReturn(0); 3848 } 3849 3850 #if defined(PETSC_HAVE_ADIC) 3851 #undef __FUNCT__ 3852 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3853 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3854 { 3855 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3856 PetscErrorCode ierr; 3857 3858 PetscFunctionBegin; 3859 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3860 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3861 PetscFunctionReturn(0); 3862 } 3863 #endif 3864 3865 #undef __FUNCT__ 3866 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3867 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3868 { 3869 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3870 PetscErrorCode ierr; 3871 3872 PetscFunctionBegin; 3873 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3874 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3875 PetscFunctionReturn(0); 3876 } 3877 3878 #undef __FUNCT__ 3879 #define __FUNCT__ "MatMerge" 3880 /*@ 3881 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3882 matrices from each processor 3883 3884 Collective on MPI_Comm 3885 3886 Input Parameters: 3887 + comm - the communicators the parallel matrix will live on 3888 . inmat - the input sequential matrices 3889 . n - number of local columns (or PETSC_DECIDE) 3890 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3891 3892 Output Parameter: 3893 . outmat - the parallel matrix generated 3894 3895 Level: advanced 3896 3897 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3898 3899 @*/ 3900 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3901 { 3902 PetscErrorCode ierr; 3903 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3904 PetscInt *indx; 3905 PetscScalar *values; 3906 3907 PetscFunctionBegin; 3908 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3909 if (scall == MAT_INITIAL_MATRIX){ 3910 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3911 if (n == PETSC_DECIDE){ 3912 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3913 } 3914 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3915 rstart -= m; 3916 3917 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3918 for (i=0;i<m;i++) { 3919 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3920 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3921 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3922 } 3923 /* This routine will ONLY return MPIAIJ type matrix */ 3924 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3925 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3926 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3927 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3928 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3929 3930 } else if (scall == MAT_REUSE_MATRIX){ 3931 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3932 } else { 3933 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3934 } 3935 3936 for (i=0;i<m;i++) { 3937 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3938 Ii = i + rstart; 3939 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3940 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3941 } 3942 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3943 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3944 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3945 3946 PetscFunctionReturn(0); 3947 } 3948 3949 #undef __FUNCT__ 3950 #define __FUNCT__ "MatFileSplit" 3951 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3952 { 3953 PetscErrorCode ierr; 3954 PetscMPIInt rank; 3955 PetscInt m,N,i,rstart,nnz; 3956 size_t len; 3957 const PetscInt *indx; 3958 PetscViewer out; 3959 char *name; 3960 Mat B; 3961 const PetscScalar *values; 3962 3963 PetscFunctionBegin; 3964 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3965 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3966 /* Should this be the type of the diagonal block of A? */ 3967 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3968 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3969 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3970 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3971 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3972 for (i=0;i<m;i++) { 3973 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3974 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3975 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3976 } 3977 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3978 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3979 3980 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3981 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3982 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3983 sprintf(name,"%s.%d",outfile,rank); 3984 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3985 ierr = PetscFree(name); 3986 ierr = MatView(B,out);CHKERRQ(ierr); 3987 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3988 ierr = MatDestroy(B);CHKERRQ(ierr); 3989 PetscFunctionReturn(0); 3990 } 3991 3992 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3993 #undef __FUNCT__ 3994 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3995 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3996 { 3997 PetscErrorCode ierr; 3998 Mat_Merge_SeqsToMPI *merge; 3999 PetscContainer container; 4000 4001 PetscFunctionBegin; 4002 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4003 if (container) { 4004 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4005 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4006 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4007 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4008 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4009 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4010 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4011 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4012 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4013 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4014 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4015 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4016 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4017 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4018 4019 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4020 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4021 } 4022 ierr = PetscFree(merge);CHKERRQ(ierr); 4023 4024 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4025 PetscFunctionReturn(0); 4026 } 4027 4028 #include "../src/mat/utils/freespace.h" 4029 #include "petscbt.h" 4030 4031 #undef __FUNCT__ 4032 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4033 /*@C 4034 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4035 matrices from each processor 4036 4037 Collective on MPI_Comm 4038 4039 Input Parameters: 4040 + comm - the communicators the parallel matrix will live on 4041 . seqmat - the input sequential matrices 4042 . m - number of local rows (or PETSC_DECIDE) 4043 . n - number of local columns (or PETSC_DECIDE) 4044 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4045 4046 Output Parameter: 4047 . mpimat - the parallel matrix generated 4048 4049 Level: advanced 4050 4051 Notes: 4052 The dimensions of the sequential matrix in each processor MUST be the same. 4053 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4054 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4055 @*/ 4056 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4057 { 4058 PetscErrorCode ierr; 4059 MPI_Comm comm=((PetscObject)mpimat)->comm; 4060 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4061 PetscMPIInt size,rank,taga,*len_s; 4062 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4063 PetscInt proc,m; 4064 PetscInt **buf_ri,**buf_rj; 4065 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4066 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4067 MPI_Request *s_waits,*r_waits; 4068 MPI_Status *status; 4069 MatScalar *aa=a->a; 4070 MatScalar **abuf_r,*ba_i; 4071 Mat_Merge_SeqsToMPI *merge; 4072 PetscContainer container; 4073 4074 PetscFunctionBegin; 4075 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4076 4077 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4078 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4079 4080 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4081 if (container) { 4082 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4083 } 4084 bi = merge->bi; 4085 bj = merge->bj; 4086 buf_ri = merge->buf_ri; 4087 buf_rj = merge->buf_rj; 4088 4089 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4090 owners = merge->rowmap->range; 4091 len_s = merge->len_s; 4092 4093 /* send and recv matrix values */ 4094 /*-----------------------------*/ 4095 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4096 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4097 4098 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4099 for (proc=0,k=0; proc<size; proc++){ 4100 if (!len_s[proc]) continue; 4101 i = owners[proc]; 4102 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4103 k++; 4104 } 4105 4106 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4107 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4108 ierr = PetscFree(status);CHKERRQ(ierr); 4109 4110 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4111 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4112 4113 /* insert mat values of mpimat */ 4114 /*----------------------------*/ 4115 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4116 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4117 4118 for (k=0; k<merge->nrecv; k++){ 4119 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4120 nrows = *(buf_ri_k[k]); 4121 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4122 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4123 } 4124 4125 /* set values of ba */ 4126 m = merge->rowmap->n; 4127 for (i=0; i<m; i++) { 4128 arow = owners[rank] + i; 4129 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4130 bnzi = bi[i+1] - bi[i]; 4131 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4132 4133 /* add local non-zero vals of this proc's seqmat into ba */ 4134 anzi = ai[arow+1] - ai[arow]; 4135 aj = a->j + ai[arow]; 4136 aa = a->a + ai[arow]; 4137 nextaj = 0; 4138 for (j=0; nextaj<anzi; j++){ 4139 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4140 ba_i[j] += aa[nextaj++]; 4141 } 4142 } 4143 4144 /* add received vals into ba */ 4145 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4146 /* i-th row */ 4147 if (i == *nextrow[k]) { 4148 anzi = *(nextai[k]+1) - *nextai[k]; 4149 aj = buf_rj[k] + *(nextai[k]); 4150 aa = abuf_r[k] + *(nextai[k]); 4151 nextaj = 0; 4152 for (j=0; nextaj<anzi; j++){ 4153 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4154 ba_i[j] += aa[nextaj++]; 4155 } 4156 } 4157 nextrow[k]++; nextai[k]++; 4158 } 4159 } 4160 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4161 } 4162 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4163 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4164 4165 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4166 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4167 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4168 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4169 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4170 PetscFunctionReturn(0); 4171 } 4172 4173 #undef __FUNCT__ 4174 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4175 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4176 { 4177 PetscErrorCode ierr; 4178 Mat B_mpi; 4179 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4180 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4181 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4182 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4183 PetscInt len,proc,*dnz,*onz; 4184 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4185 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4186 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4187 MPI_Status *status; 4188 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4189 PetscBT lnkbt; 4190 Mat_Merge_SeqsToMPI *merge; 4191 PetscContainer container; 4192 4193 PetscFunctionBegin; 4194 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4195 4196 /* make sure it is a PETSc comm */ 4197 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4199 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4200 4201 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4202 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4203 4204 /* determine row ownership */ 4205 /*---------------------------------------------------------*/ 4206 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4207 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4208 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4209 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4210 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4211 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4212 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4213 4214 m = merge->rowmap->n; 4215 M = merge->rowmap->N; 4216 owners = merge->rowmap->range; 4217 4218 /* determine the number of messages to send, their lengths */ 4219 /*---------------------------------------------------------*/ 4220 len_s = merge->len_s; 4221 4222 len = 0; /* length of buf_si[] */ 4223 merge->nsend = 0; 4224 for (proc=0; proc<size; proc++){ 4225 len_si[proc] = 0; 4226 if (proc == rank){ 4227 len_s[proc] = 0; 4228 } else { 4229 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4230 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4231 } 4232 if (len_s[proc]) { 4233 merge->nsend++; 4234 nrows = 0; 4235 for (i=owners[proc]; i<owners[proc+1]; i++){ 4236 if (ai[i+1] > ai[i]) nrows++; 4237 } 4238 len_si[proc] = 2*(nrows+1); 4239 len += len_si[proc]; 4240 } 4241 } 4242 4243 /* determine the number and length of messages to receive for ij-structure */ 4244 /*-------------------------------------------------------------------------*/ 4245 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4246 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4247 4248 /* post the Irecv of j-structure */ 4249 /*-------------------------------*/ 4250 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4251 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4252 4253 /* post the Isend of j-structure */ 4254 /*--------------------------------*/ 4255 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4256 4257 for (proc=0, k=0; proc<size; proc++){ 4258 if (!len_s[proc]) continue; 4259 i = owners[proc]; 4260 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4261 k++; 4262 } 4263 4264 /* receives and sends of j-structure are complete */ 4265 /*------------------------------------------------*/ 4266 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4267 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4268 4269 /* send and recv i-structure */ 4270 /*---------------------------*/ 4271 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4272 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4273 4274 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4275 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4276 for (proc=0,k=0; proc<size; proc++){ 4277 if (!len_s[proc]) continue; 4278 /* form outgoing message for i-structure: 4279 buf_si[0]: nrows to be sent 4280 [1:nrows]: row index (global) 4281 [nrows+1:2*nrows+1]: i-structure index 4282 */ 4283 /*-------------------------------------------*/ 4284 nrows = len_si[proc]/2 - 1; 4285 buf_si_i = buf_si + nrows+1; 4286 buf_si[0] = nrows; 4287 buf_si_i[0] = 0; 4288 nrows = 0; 4289 for (i=owners[proc]; i<owners[proc+1]; i++){ 4290 anzi = ai[i+1] - ai[i]; 4291 if (anzi) { 4292 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4293 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4294 nrows++; 4295 } 4296 } 4297 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4298 k++; 4299 buf_si += len_si[proc]; 4300 } 4301 4302 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4303 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4304 4305 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4306 for (i=0; i<merge->nrecv; i++){ 4307 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4308 } 4309 4310 ierr = PetscFree(len_si);CHKERRQ(ierr); 4311 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4312 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4313 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4314 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4315 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4316 ierr = PetscFree(status);CHKERRQ(ierr); 4317 4318 /* compute a local seq matrix in each processor */ 4319 /*----------------------------------------------*/ 4320 /* allocate bi array and free space for accumulating nonzero column info */ 4321 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4322 bi[0] = 0; 4323 4324 /* create and initialize a linked list */ 4325 nlnk = N+1; 4326 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4327 4328 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4329 len = 0; 4330 len = ai[owners[rank+1]] - ai[owners[rank]]; 4331 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4332 current_space = free_space; 4333 4334 /* determine symbolic info for each local row */ 4335 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4336 4337 for (k=0; k<merge->nrecv; k++){ 4338 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4339 nrows = *buf_ri_k[k]; 4340 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4341 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4342 } 4343 4344 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4345 len = 0; 4346 for (i=0;i<m;i++) { 4347 bnzi = 0; 4348 /* add local non-zero cols of this proc's seqmat into lnk */ 4349 arow = owners[rank] + i; 4350 anzi = ai[arow+1] - ai[arow]; 4351 aj = a->j + ai[arow]; 4352 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4353 bnzi += nlnk; 4354 /* add received col data into lnk */ 4355 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4356 if (i == *nextrow[k]) { /* i-th row */ 4357 anzi = *(nextai[k]+1) - *nextai[k]; 4358 aj = buf_rj[k] + *nextai[k]; 4359 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4360 bnzi += nlnk; 4361 nextrow[k]++; nextai[k]++; 4362 } 4363 } 4364 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4365 4366 /* if free space is not available, make more free space */ 4367 if (current_space->local_remaining<bnzi) { 4368 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4369 nspacedouble++; 4370 } 4371 /* copy data into free space, then initialize lnk */ 4372 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4373 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4374 4375 current_space->array += bnzi; 4376 current_space->local_used += bnzi; 4377 current_space->local_remaining -= bnzi; 4378 4379 bi[i+1] = bi[i] + bnzi; 4380 } 4381 4382 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4383 4384 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4385 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4386 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4387 4388 /* create symbolic parallel matrix B_mpi */ 4389 /*---------------------------------------*/ 4390 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4391 if (n==PETSC_DECIDE) { 4392 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4393 } else { 4394 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4395 } 4396 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4397 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4398 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4399 4400 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4401 B_mpi->assembled = PETSC_FALSE; 4402 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4403 merge->bi = bi; 4404 merge->bj = bj; 4405 merge->buf_ri = buf_ri; 4406 merge->buf_rj = buf_rj; 4407 merge->coi = PETSC_NULL; 4408 merge->coj = PETSC_NULL; 4409 merge->owners_co = PETSC_NULL; 4410 4411 /* attach the supporting struct to B_mpi for reuse */ 4412 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4413 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4414 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4415 *mpimat = B_mpi; 4416 4417 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4418 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4419 PetscFunctionReturn(0); 4420 } 4421 4422 #undef __FUNCT__ 4423 #define __FUNCT__ "MatMerge_SeqsToMPI" 4424 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4425 { 4426 PetscErrorCode ierr; 4427 4428 PetscFunctionBegin; 4429 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4430 if (scall == MAT_INITIAL_MATRIX){ 4431 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4432 } 4433 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4434 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4435 PetscFunctionReturn(0); 4436 } 4437 4438 #undef __FUNCT__ 4439 #define __FUNCT__ "MatGetLocalMat" 4440 /*@ 4441 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4442 4443 Not Collective 4444 4445 Input Parameters: 4446 + A - the matrix 4447 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4448 4449 Output Parameter: 4450 . A_loc - the local sequential matrix generated 4451 4452 Level: developer 4453 4454 @*/ 4455 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4456 { 4457 PetscErrorCode ierr; 4458 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4459 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4460 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4461 MatScalar *aa=a->a,*ba=b->a,*cam; 4462 PetscScalar *ca; 4463 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4464 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4465 4466 PetscFunctionBegin; 4467 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4468 if (scall == MAT_INITIAL_MATRIX){ 4469 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4470 ci[0] = 0; 4471 for (i=0; i<am; i++){ 4472 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4473 } 4474 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4475 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4476 k = 0; 4477 for (i=0; i<am; i++) { 4478 ncols_o = bi[i+1] - bi[i]; 4479 ncols_d = ai[i+1] - ai[i]; 4480 /* off-diagonal portion of A */ 4481 for (jo=0; jo<ncols_o; jo++) { 4482 col = cmap[*bj]; 4483 if (col >= cstart) break; 4484 cj[k] = col; bj++; 4485 ca[k++] = *ba++; 4486 } 4487 /* diagonal portion of A */ 4488 for (j=0; j<ncols_d; j++) { 4489 cj[k] = cstart + *aj++; 4490 ca[k++] = *aa++; 4491 } 4492 /* off-diagonal portion of A */ 4493 for (j=jo; j<ncols_o; j++) { 4494 cj[k] = cmap[*bj++]; 4495 ca[k++] = *ba++; 4496 } 4497 } 4498 /* put together the new matrix */ 4499 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4500 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4501 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4502 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4503 mat->free_a = PETSC_TRUE; 4504 mat->free_ij = PETSC_TRUE; 4505 mat->nonew = 0; 4506 } else if (scall == MAT_REUSE_MATRIX){ 4507 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4508 ci = mat->i; cj = mat->j; cam = mat->a; 4509 for (i=0; i<am; i++) { 4510 /* off-diagonal portion of A */ 4511 ncols_o = bi[i+1] - bi[i]; 4512 for (jo=0; jo<ncols_o; jo++) { 4513 col = cmap[*bj]; 4514 if (col >= cstart) break; 4515 *cam++ = *ba++; bj++; 4516 } 4517 /* diagonal portion of A */ 4518 ncols_d = ai[i+1] - ai[i]; 4519 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4520 /* off-diagonal portion of A */ 4521 for (j=jo; j<ncols_o; j++) { 4522 *cam++ = *ba++; bj++; 4523 } 4524 } 4525 } else { 4526 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4527 } 4528 4529 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4530 PetscFunctionReturn(0); 4531 } 4532 4533 #undef __FUNCT__ 4534 #define __FUNCT__ "MatGetLocalMatCondensed" 4535 /*@C 4536 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4537 4538 Not Collective 4539 4540 Input Parameters: 4541 + A - the matrix 4542 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4543 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4544 4545 Output Parameter: 4546 . A_loc - the local sequential matrix generated 4547 4548 Level: developer 4549 4550 @*/ 4551 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4552 { 4553 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4554 PetscErrorCode ierr; 4555 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4556 IS isrowa,iscola; 4557 Mat *aloc; 4558 4559 PetscFunctionBegin; 4560 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4561 if (!row){ 4562 start = A->rmap->rstart; end = A->rmap->rend; 4563 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4564 } else { 4565 isrowa = *row; 4566 } 4567 if (!col){ 4568 start = A->cmap->rstart; 4569 cmap = a->garray; 4570 nzA = a->A->cmap->n; 4571 nzB = a->B->cmap->n; 4572 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4573 ncols = 0; 4574 for (i=0; i<nzB; i++) { 4575 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4576 else break; 4577 } 4578 imark = i; 4579 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4580 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4581 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4582 ierr = PetscFree(idx);CHKERRQ(ierr); 4583 } else { 4584 iscola = *col; 4585 } 4586 if (scall != MAT_INITIAL_MATRIX){ 4587 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4588 aloc[0] = *A_loc; 4589 } 4590 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4591 *A_loc = aloc[0]; 4592 ierr = PetscFree(aloc);CHKERRQ(ierr); 4593 if (!row){ 4594 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4595 } 4596 if (!col){ 4597 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4598 } 4599 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4600 PetscFunctionReturn(0); 4601 } 4602 4603 #undef __FUNCT__ 4604 #define __FUNCT__ "MatGetBrowsOfAcols" 4605 /*@C 4606 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4607 4608 Collective on Mat 4609 4610 Input Parameters: 4611 + A,B - the matrices in mpiaij format 4612 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4613 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4614 4615 Output Parameter: 4616 + rowb, colb - index sets of rows and columns of B to extract 4617 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4618 - B_seq - the sequential matrix generated 4619 4620 Level: developer 4621 4622 @*/ 4623 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4624 { 4625 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4626 PetscErrorCode ierr; 4627 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4628 IS isrowb,iscolb; 4629 Mat *bseq; 4630 4631 PetscFunctionBegin; 4632 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4633 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4634 } 4635 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4636 4637 if (scall == MAT_INITIAL_MATRIX){ 4638 start = A->cmap->rstart; 4639 cmap = a->garray; 4640 nzA = a->A->cmap->n; 4641 nzB = a->B->cmap->n; 4642 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4643 ncols = 0; 4644 for (i=0; i<nzB; i++) { /* row < local row index */ 4645 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4646 else break; 4647 } 4648 imark = i; 4649 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4650 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4651 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4652 ierr = PetscFree(idx);CHKERRQ(ierr); 4653 *brstart = imark; 4654 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4655 } else { 4656 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4657 isrowb = *rowb; iscolb = *colb; 4658 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4659 bseq[0] = *B_seq; 4660 } 4661 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4662 *B_seq = bseq[0]; 4663 ierr = PetscFree(bseq);CHKERRQ(ierr); 4664 if (!rowb){ 4665 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4666 } else { 4667 *rowb = isrowb; 4668 } 4669 if (!colb){ 4670 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4671 } else { 4672 *colb = iscolb; 4673 } 4674 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4675 PetscFunctionReturn(0); 4676 } 4677 4678 #undef __FUNCT__ 4679 #define __FUNCT__ "MatGetBrowsOfAoCols" 4680 /*@C 4681 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4682 of the OFF-DIAGONAL portion of local A 4683 4684 Collective on Mat 4685 4686 Input Parameters: 4687 + A,B - the matrices in mpiaij format 4688 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4689 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4690 . startsj_r - similar to startsj for receives 4691 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4692 4693 Output Parameter: 4694 + B_oth - the sequential matrix generated 4695 4696 Level: developer 4697 4698 @*/ 4699 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4700 { 4701 VecScatter_MPI_General *gen_to,*gen_from; 4702 PetscErrorCode ierr; 4703 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4704 Mat_SeqAIJ *b_oth; 4705 VecScatter ctx=a->Mvctx; 4706 MPI_Comm comm=((PetscObject)ctx)->comm; 4707 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4708 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4709 PetscScalar *rvalues,*svalues; 4710 MatScalar *b_otha,*bufa,*bufA; 4711 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4712 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4713 MPI_Status *sstatus,rstatus; 4714 PetscMPIInt jj; 4715 PetscInt *cols,sbs,rbs; 4716 PetscScalar *vals; 4717 4718 PetscFunctionBegin; 4719 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4720 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4721 } 4722 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4723 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4724 4725 gen_to = (VecScatter_MPI_General*)ctx->todata; 4726 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4727 rvalues = gen_from->values; /* holds the length of receiving row */ 4728 svalues = gen_to->values; /* holds the length of sending row */ 4729 nrecvs = gen_from->n; 4730 nsends = gen_to->n; 4731 4732 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4733 srow = gen_to->indices; /* local row index to be sent */ 4734 sstarts = gen_to->starts; 4735 sprocs = gen_to->procs; 4736 sstatus = gen_to->sstatus; 4737 sbs = gen_to->bs; 4738 rstarts = gen_from->starts; 4739 rprocs = gen_from->procs; 4740 rbs = gen_from->bs; 4741 4742 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4743 if (scall == MAT_INITIAL_MATRIX){ 4744 /* i-array */ 4745 /*---------*/ 4746 /* post receives */ 4747 for (i=0; i<nrecvs; i++){ 4748 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4749 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4750 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4751 } 4752 4753 /* pack the outgoing message */ 4754 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4755 sstartsj[0] = 0; rstartsj[0] = 0; 4756 len = 0; /* total length of j or a array to be sent */ 4757 k = 0; 4758 for (i=0; i<nsends; i++){ 4759 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4760 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4761 for (j=0; j<nrows; j++) { 4762 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4763 for (l=0; l<sbs; l++){ 4764 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4765 rowlen[j*sbs+l] = ncols; 4766 len += ncols; 4767 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4768 } 4769 k++; 4770 } 4771 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4772 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4773 } 4774 /* recvs and sends of i-array are completed */ 4775 i = nrecvs; 4776 while (i--) { 4777 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4778 } 4779 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4780 4781 /* allocate buffers for sending j and a arrays */ 4782 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4783 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4784 4785 /* create i-array of B_oth */ 4786 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4787 b_othi[0] = 0; 4788 len = 0; /* total length of j or a array to be received */ 4789 k = 0; 4790 for (i=0; i<nrecvs; i++){ 4791 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4792 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4793 for (j=0; j<nrows; j++) { 4794 b_othi[k+1] = b_othi[k] + rowlen[j]; 4795 len += rowlen[j]; k++; 4796 } 4797 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4798 } 4799 4800 /* allocate space for j and a arrrays of B_oth */ 4801 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4802 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4803 4804 /* j-array */ 4805 /*---------*/ 4806 /* post receives of j-array */ 4807 for (i=0; i<nrecvs; i++){ 4808 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4809 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4810 } 4811 4812 /* pack the outgoing message j-array */ 4813 k = 0; 4814 for (i=0; i<nsends; i++){ 4815 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4816 bufJ = bufj+sstartsj[i]; 4817 for (j=0; j<nrows; j++) { 4818 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4819 for (ll=0; ll<sbs; ll++){ 4820 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4821 for (l=0; l<ncols; l++){ 4822 *bufJ++ = cols[l]; 4823 } 4824 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4825 } 4826 } 4827 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4828 } 4829 4830 /* recvs and sends of j-array are completed */ 4831 i = nrecvs; 4832 while (i--) { 4833 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4834 } 4835 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4836 } else if (scall == MAT_REUSE_MATRIX){ 4837 sstartsj = *startsj; 4838 rstartsj = *startsj_r; 4839 bufa = *bufa_ptr; 4840 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4841 b_otha = b_oth->a; 4842 } else { 4843 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4844 } 4845 4846 /* a-array */ 4847 /*---------*/ 4848 /* post receives of a-array */ 4849 for (i=0; i<nrecvs; i++){ 4850 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4851 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4852 } 4853 4854 /* pack the outgoing message a-array */ 4855 k = 0; 4856 for (i=0; i<nsends; i++){ 4857 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4858 bufA = bufa+sstartsj[i]; 4859 for (j=0; j<nrows; j++) { 4860 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4861 for (ll=0; ll<sbs; ll++){ 4862 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4863 for (l=0; l<ncols; l++){ 4864 *bufA++ = vals[l]; 4865 } 4866 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4867 } 4868 } 4869 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4870 } 4871 /* recvs and sends of a-array are completed */ 4872 i = nrecvs; 4873 while (i--) { 4874 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4875 } 4876 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4877 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4878 4879 if (scall == MAT_INITIAL_MATRIX){ 4880 /* put together the new matrix */ 4881 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4882 4883 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4884 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4885 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4886 b_oth->free_a = PETSC_TRUE; 4887 b_oth->free_ij = PETSC_TRUE; 4888 b_oth->nonew = 0; 4889 4890 ierr = PetscFree(bufj);CHKERRQ(ierr); 4891 if (!startsj || !bufa_ptr){ 4892 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4893 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4894 } else { 4895 *startsj = sstartsj; 4896 *startsj_r = rstartsj; 4897 *bufa_ptr = bufa; 4898 } 4899 } 4900 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4901 PetscFunctionReturn(0); 4902 } 4903 4904 #undef __FUNCT__ 4905 #define __FUNCT__ "MatGetCommunicationStructs" 4906 /*@C 4907 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4908 4909 Not Collective 4910 4911 Input Parameters: 4912 . A - The matrix in mpiaij format 4913 4914 Output Parameter: 4915 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4916 . colmap - A map from global column index to local index into lvec 4917 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4918 4919 Level: developer 4920 4921 @*/ 4922 #if defined (PETSC_USE_CTABLE) 4923 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4924 #else 4925 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4926 #endif 4927 { 4928 Mat_MPIAIJ *a; 4929 4930 PetscFunctionBegin; 4931 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4932 PetscValidPointer(lvec, 2); 4933 PetscValidPointer(colmap, 3); 4934 PetscValidPointer(multScatter, 4); 4935 a = (Mat_MPIAIJ *) A->data; 4936 if (lvec) *lvec = a->lvec; 4937 if (colmap) *colmap = a->colmap; 4938 if (multScatter) *multScatter = a->Mvctx; 4939 PetscFunctionReturn(0); 4940 } 4941 4942 EXTERN_C_BEGIN 4943 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4944 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4945 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4946 EXTERN_C_END 4947 4948 #undef __FUNCT__ 4949 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4950 /* 4951 Computes (B'*A')' since computing B*A directly is untenable 4952 4953 n p p 4954 ( ) ( ) ( ) 4955 m ( A ) * n ( B ) = m ( C ) 4956 ( ) ( ) ( ) 4957 4958 */ 4959 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4960 { 4961 PetscErrorCode ierr; 4962 Mat At,Bt,Ct; 4963 4964 PetscFunctionBegin; 4965 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4966 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4967 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4968 ierr = MatDestroy(At);CHKERRQ(ierr); 4969 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4970 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4971 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4972 PetscFunctionReturn(0); 4973 } 4974 4975 #undef __FUNCT__ 4976 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4977 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4978 { 4979 PetscErrorCode ierr; 4980 PetscInt m=A->rmap->n,n=B->cmap->n; 4981 Mat Cmat; 4982 4983 PetscFunctionBegin; 4984 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4985 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4986 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4987 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4988 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4989 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4990 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4991 *C = Cmat; 4992 PetscFunctionReturn(0); 4993 } 4994 4995 /* ----------------------------------------------------------------*/ 4996 #undef __FUNCT__ 4997 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4998 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4999 { 5000 PetscErrorCode ierr; 5001 5002 PetscFunctionBegin; 5003 if (scall == MAT_INITIAL_MATRIX){ 5004 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5005 } 5006 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5007 PetscFunctionReturn(0); 5008 } 5009 5010 EXTERN_C_BEGIN 5011 #if defined(PETSC_HAVE_MUMPS) 5012 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5013 #endif 5014 #if defined(PETSC_HAVE_PASTIX) 5015 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5016 #endif 5017 #if defined(PETSC_HAVE_SUPERLU_DIST) 5018 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5019 #endif 5020 #if defined(PETSC_HAVE_SPOOLES) 5021 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5022 #endif 5023 EXTERN_C_END 5024 5025 /*MC 5026 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5027 5028 Options Database Keys: 5029 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5030 5031 Level: beginner 5032 5033 .seealso: MatCreateMPIAIJ() 5034 M*/ 5035 5036 EXTERN_C_BEGIN 5037 #undef __FUNCT__ 5038 #define __FUNCT__ "MatCreate_MPIAIJ" 5039 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5040 { 5041 Mat_MPIAIJ *b; 5042 PetscErrorCode ierr; 5043 PetscMPIInt size; 5044 5045 PetscFunctionBegin; 5046 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5047 5048 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5049 B->data = (void*)b; 5050 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5051 B->rmap->bs = 1; 5052 B->assembled = PETSC_FALSE; 5053 B->mapping = 0; 5054 5055 B->insertmode = NOT_SET_VALUES; 5056 b->size = size; 5057 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5058 5059 /* build cache for off array entries formed */ 5060 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5061 b->donotstash = PETSC_FALSE; 5062 b->colmap = 0; 5063 b->garray = 0; 5064 b->roworiented = PETSC_TRUE; 5065 5066 /* stuff used for matrix vector multiply */ 5067 b->lvec = PETSC_NULL; 5068 b->Mvctx = PETSC_NULL; 5069 5070 /* stuff for MatGetRow() */ 5071 b->rowindices = 0; 5072 b->rowvalues = 0; 5073 b->getrowactive = PETSC_FALSE; 5074 5075 #if defined(PETSC_HAVE_SPOOLES) 5076 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5077 "MatGetFactor_mpiaij_spooles", 5078 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5079 #endif 5080 #if defined(PETSC_HAVE_MUMPS) 5081 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5082 "MatGetFactor_aij_mumps", 5083 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5084 #endif 5085 #if defined(PETSC_HAVE_PASTIX) 5086 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5087 "MatGetFactor_mpiaij_pastix", 5088 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5089 #endif 5090 #if defined(PETSC_HAVE_SUPERLU_DIST) 5091 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5092 "MatGetFactor_mpiaij_superlu_dist", 5093 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5094 #endif 5095 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5096 "MatStoreValues_MPIAIJ", 5097 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5098 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5099 "MatRetrieveValues_MPIAIJ", 5100 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5101 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5102 "MatGetDiagonalBlock_MPIAIJ", 5103 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5104 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5105 "MatIsTranspose_MPIAIJ", 5106 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5108 "MatMPIAIJSetPreallocation_MPIAIJ", 5109 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5110 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5111 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5112 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5113 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5114 "MatDiagonalScaleLocal_MPIAIJ", 5115 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5116 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5117 "MatConvert_MPIAIJ_MPICSRPERM", 5118 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5119 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5120 "MatConvert_MPIAIJ_MPICRL", 5121 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5122 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5123 "MatConvert_MPIAIJ_MPISBAIJ", 5124 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5125 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5126 "MatMatMult_MPIDense_MPIAIJ", 5127 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5128 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5129 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5130 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5131 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5132 "MatMatMultNumeric_MPIDense_MPIAIJ", 5133 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5134 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5135 PetscFunctionReturn(0); 5136 } 5137 EXTERN_C_END 5138 5139 #undef __FUNCT__ 5140 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5141 /*@ 5142 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5143 and "off-diagonal" part of the matrix in CSR format. 5144 5145 Collective on MPI_Comm 5146 5147 Input Parameters: 5148 + comm - MPI communicator 5149 . m - number of local rows (Cannot be PETSC_DECIDE) 5150 . n - This value should be the same as the local size used in creating the 5151 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5152 calculated if N is given) For square matrices n is almost always m. 5153 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5154 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5155 . i - row indices for "diagonal" portion of matrix 5156 . j - column indices 5157 . a - matrix values 5158 . oi - row indices for "off-diagonal" portion of matrix 5159 . oj - column indices 5160 - oa - matrix values 5161 5162 Output Parameter: 5163 . mat - the matrix 5164 5165 Level: advanced 5166 5167 Notes: 5168 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5169 5170 The i and j indices are 0 based 5171 5172 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5173 5174 This sets local rows and cannot be used to set off-processor values. 5175 5176 You cannot later use MatSetValues() to change values in this matrix. 5177 5178 .keywords: matrix, aij, compressed row, sparse, parallel 5179 5180 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5181 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5182 @*/ 5183 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5184 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5185 { 5186 PetscErrorCode ierr; 5187 Mat_MPIAIJ *maij; 5188 5189 PetscFunctionBegin; 5190 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5191 if (i[0]) { 5192 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5193 } 5194 if (oi[0]) { 5195 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5196 } 5197 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5198 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5199 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5200 maij = (Mat_MPIAIJ*) (*mat)->data; 5201 maij->donotstash = PETSC_TRUE; 5202 (*mat)->preallocated = PETSC_TRUE; 5203 5204 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5205 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5206 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5207 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5208 5209 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5210 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5211 5212 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5213 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5214 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5215 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5216 5217 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5218 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5219 PetscFunctionReturn(0); 5220 } 5221 5222 /* 5223 Special version for direct calls from Fortran 5224 */ 5225 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5226 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5227 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5228 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5229 #endif 5230 5231 /* Change these macros so can be used in void function */ 5232 #undef CHKERRQ 5233 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5234 #undef SETERRQ2 5235 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5236 #undef SETERRQ 5237 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5238 5239 EXTERN_C_BEGIN 5240 #undef __FUNCT__ 5241 #define __FUNCT__ "matsetvaluesmpiaij_" 5242 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5243 { 5244 Mat mat = *mmat; 5245 PetscInt m = *mm, n = *mn; 5246 InsertMode addv = *maddv; 5247 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5248 PetscScalar value; 5249 PetscErrorCode ierr; 5250 5251 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5252 if (mat->insertmode == NOT_SET_VALUES) { 5253 mat->insertmode = addv; 5254 } 5255 #if defined(PETSC_USE_DEBUG) 5256 else if (mat->insertmode != addv) { 5257 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5258 } 5259 #endif 5260 { 5261 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5262 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5263 PetscTruth roworiented = aij->roworiented; 5264 5265 /* Some Variables required in the macro */ 5266 Mat A = aij->A; 5267 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5268 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5269 MatScalar *aa = a->a; 5270 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5271 Mat B = aij->B; 5272 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5273 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5274 MatScalar *ba = b->a; 5275 5276 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5277 PetscInt nonew = a->nonew; 5278 MatScalar *ap1,*ap2; 5279 5280 PetscFunctionBegin; 5281 for (i=0; i<m; i++) { 5282 if (im[i] < 0) continue; 5283 #if defined(PETSC_USE_DEBUG) 5284 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5285 #endif 5286 if (im[i] >= rstart && im[i] < rend) { 5287 row = im[i] - rstart; 5288 lastcol1 = -1; 5289 rp1 = aj + ai[row]; 5290 ap1 = aa + ai[row]; 5291 rmax1 = aimax[row]; 5292 nrow1 = ailen[row]; 5293 low1 = 0; 5294 high1 = nrow1; 5295 lastcol2 = -1; 5296 rp2 = bj + bi[row]; 5297 ap2 = ba + bi[row]; 5298 rmax2 = bimax[row]; 5299 nrow2 = bilen[row]; 5300 low2 = 0; 5301 high2 = nrow2; 5302 5303 for (j=0; j<n; j++) { 5304 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5305 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5306 if (in[j] >= cstart && in[j] < cend){ 5307 col = in[j] - cstart; 5308 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5309 } else if (in[j] < 0) continue; 5310 #if defined(PETSC_USE_DEBUG) 5311 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5312 #endif 5313 else { 5314 if (mat->was_assembled) { 5315 if (!aij->colmap) { 5316 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5317 } 5318 #if defined (PETSC_USE_CTABLE) 5319 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5320 col--; 5321 #else 5322 col = aij->colmap[in[j]] - 1; 5323 #endif 5324 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5325 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5326 col = in[j]; 5327 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5328 B = aij->B; 5329 b = (Mat_SeqAIJ*)B->data; 5330 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5331 rp2 = bj + bi[row]; 5332 ap2 = ba + bi[row]; 5333 rmax2 = bimax[row]; 5334 nrow2 = bilen[row]; 5335 low2 = 0; 5336 high2 = nrow2; 5337 bm = aij->B->rmap->n; 5338 ba = b->a; 5339 } 5340 } else col = in[j]; 5341 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5342 } 5343 } 5344 } else { 5345 if (!aij->donotstash) { 5346 if (roworiented) { 5347 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5348 } else { 5349 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5350 } 5351 } 5352 } 5353 }} 5354 PetscFunctionReturnVoid(); 5355 } 5356 EXTERN_C_END 5357 5358