1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscBool aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscBool roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscBool ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash || mat->nooffprocentries) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 mat->insertmode = addv; /* in case this processor had no cache */ 470 471 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 472 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 473 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 480 { 481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 482 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 483 PetscErrorCode ierr; 484 PetscMPIInt n; 485 PetscInt i,j,rstart,ncols,flg; 486 PetscInt *row,*col; 487 PetscBool other_disassembled; 488 PetscScalar *val; 489 InsertMode addv = mat->insertmode; 490 491 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 492 PetscFunctionBegin; 493 if (!aij->donotstash && !mat->nooffprocentries) { 494 while (1) { 495 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 496 if (!flg) break; 497 498 for (i=0; i<n;) { 499 /* Now identify the consecutive vals belonging to the same row */ 500 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 501 if (j < n) ncols = j-i; 502 else ncols = n-i; 503 /* Now assemble all these values with a single function call */ 504 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 505 i = j; 506 } 507 } 508 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 509 } 510 a->compressedrow.use = PETSC_FALSE; 511 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 512 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 513 514 /* determine if any processor has disassembled, if so we must 515 also disassemble ourselfs, in order that we may reassemble. */ 516 /* 517 if nonzero structure of submatrix B cannot change then we know that 518 no processor disassembled thus we can skip this stuff 519 */ 520 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 521 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 522 if (mat->was_assembled && !other_disassembled) { 523 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 524 } 525 } 526 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 527 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 528 } 529 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 530 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 531 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 532 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 533 534 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 535 aij->rowvalues = 0; 536 537 /* used by MatAXPY() */ 538 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 539 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 540 541 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 542 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 const PetscScalar *xx; 575 PetscScalar *bb; 576 #if defined(PETSC_DEBUG) 577 PetscBool found = PETSC_FALSE; 578 #endif 579 580 PetscFunctionBegin; 581 /* first count number of contributors to each processor */ 582 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 583 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 584 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 585 j = 0; 586 for (i=0; i<N; i++) { 587 if (lastidx > (idx = rows[i])) j = 0; 588 lastidx = idx; 589 for (; j<size; j++) { 590 if (idx >= owners[j] && idx < owners[j+1]) { 591 nprocs[2*j]++; 592 nprocs[2*j+1] = 1; 593 owner[i] = j; 594 #if defined(PETSC_DEBUG) 595 found = PETSC_TRUE; 596 #endif 597 break; 598 } 599 } 600 #if defined(PETSC_DEBUG) 601 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 602 found = PETSC_FALSE; 603 #endif 604 } 605 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 606 607 if (A->nooffproczerorows) { 608 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 609 nrecvs = nsends; 610 nmax = N; 611 } else { 612 /* inform other processors of number of messages and max length*/ 613 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 614 } 615 616 /* post receives: */ 617 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 618 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 619 for (i=0; i<nrecvs; i++) { 620 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 621 } 622 623 /* do sends: 624 1) starts[i] gives the starting index in svalues for stuff going to 625 the ith processor 626 */ 627 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 628 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 629 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 630 starts[0] = 0; 631 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 632 for (i=0; i<N; i++) { 633 svalues[starts[owner[i]]++] = rows[i]; 634 } 635 636 starts[0] = 0; 637 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 638 count = 0; 639 for (i=0; i<size; i++) { 640 if (nprocs[2*i+1]) { 641 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 642 } 643 } 644 ierr = PetscFree(starts);CHKERRQ(ierr); 645 646 base = owners[rank]; 647 648 /* wait on receives */ 649 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 650 count = nrecvs; slen = 0; 651 while (count) { 652 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 653 /* unpack receives into our local space */ 654 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 655 source[imdex] = recv_status.MPI_SOURCE; 656 lens[imdex] = n; 657 slen += n; 658 count--; 659 } 660 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 661 662 /* move the data into the send scatter */ 663 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 664 count = 0; 665 for (i=0; i<nrecvs; i++) { 666 values = rvalues + i*nmax; 667 for (j=0; j<lens[i]; j++) { 668 lrows[count++] = values[j] - base; 669 } 670 } 671 ierr = PetscFree(rvalues);CHKERRQ(ierr); 672 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 673 ierr = PetscFree(owner);CHKERRQ(ierr); 674 ierr = PetscFree(nprocs);CHKERRQ(ierr); 675 676 /* fix right hand side if needed */ 677 if (x && b) { 678 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 679 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 680 for (i=0; i<slen; i++) { 681 bb[lrows[i]] = diag*xx[lrows[i]]; 682 } 683 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 684 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 685 } 686 /* 687 Zero the required rows. If the "diagonal block" of the matrix 688 is square and the user wishes to set the diagonal we use separate 689 code so that MatSetValues() is not called for each diagonal allocating 690 new memory, thus calling lots of mallocs and slowing things down. 691 692 */ 693 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 694 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 695 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 696 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 697 } else if (diag != 0.0) { 698 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 699 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 700 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 701 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 702 } 703 for (i = 0; i < slen; i++) { 704 row = lrows[i] + rstart; 705 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 706 } 707 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 708 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 709 } else { 710 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 711 } 712 ierr = PetscFree(lrows);CHKERRQ(ierr); 713 714 /* wait on sends */ 715 if (nsends) { 716 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 717 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 718 ierr = PetscFree(send_status);CHKERRQ(ierr); 719 } 720 ierr = PetscFree(send_waits);CHKERRQ(ierr); 721 ierr = PetscFree(svalues);CHKERRQ(ierr); 722 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 728 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 729 { 730 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 731 PetscErrorCode ierr; 732 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 733 PetscInt i,*owners = A->rmap->range; 734 PetscInt *nprocs,j,idx,nsends; 735 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 736 PetscInt *rvalues,count,base,slen,*source; 737 PetscInt *lens,*lrows,*values,m; 738 MPI_Comm comm = ((PetscObject)A)->comm; 739 MPI_Request *send_waits,*recv_waits; 740 MPI_Status recv_status,*send_status; 741 const PetscScalar *xx; 742 PetscScalar *bb,*mask; 743 Vec xmask,lmask; 744 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 745 const PetscInt *aj, *ii,*ridx; 746 PetscScalar *aa; 747 #if defined(PETSC_DEBUG) 748 PetscBool found = PETSC_FALSE; 749 #endif 750 751 PetscFunctionBegin; 752 /* first count number of contributors to each processor */ 753 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 754 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 755 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 756 j = 0; 757 for (i=0; i<N; i++) { 758 if (lastidx > (idx = rows[i])) j = 0; 759 lastidx = idx; 760 for (; j<size; j++) { 761 if (idx >= owners[j] && idx < owners[j+1]) { 762 nprocs[2*j]++; 763 nprocs[2*j+1] = 1; 764 owner[i] = j; 765 #if defined(PETSC_DEBUG) 766 found = PETSC_TRUE; 767 #endif 768 break; 769 } 770 } 771 #if defined(PETSC_DEBUG) 772 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 773 found = PETSC_FALSE; 774 #endif 775 } 776 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 777 778 /* inform other processors of number of messages and max length*/ 779 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 780 781 /* post receives: */ 782 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 783 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 784 for (i=0; i<nrecvs; i++) { 785 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 786 } 787 788 /* do sends: 789 1) starts[i] gives the starting index in svalues for stuff going to 790 the ith processor 791 */ 792 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 793 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 794 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 795 starts[0] = 0; 796 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 797 for (i=0; i<N; i++) { 798 svalues[starts[owner[i]]++] = rows[i]; 799 } 800 801 starts[0] = 0; 802 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 803 count = 0; 804 for (i=0; i<size; i++) { 805 if (nprocs[2*i+1]) { 806 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 807 } 808 } 809 ierr = PetscFree(starts);CHKERRQ(ierr); 810 811 base = owners[rank]; 812 813 /* wait on receives */ 814 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 815 count = nrecvs; slen = 0; 816 while (count) { 817 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 818 /* unpack receives into our local space */ 819 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 820 source[imdex] = recv_status.MPI_SOURCE; 821 lens[imdex] = n; 822 slen += n; 823 count--; 824 } 825 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 826 827 /* move the data into the send scatter */ 828 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 829 count = 0; 830 for (i=0; i<nrecvs; i++) { 831 values = rvalues + i*nmax; 832 for (j=0; j<lens[i]; j++) { 833 lrows[count++] = values[j] - base; 834 } 835 } 836 ierr = PetscFree(rvalues);CHKERRQ(ierr); 837 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 838 ierr = PetscFree(owner);CHKERRQ(ierr); 839 ierr = PetscFree(nprocs);CHKERRQ(ierr); 840 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 841 842 /* zero diagonal part of matrix */ 843 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 844 845 /* handle off diagonal part of matrix */ 846 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 847 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 848 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 849 for (i=0; i<slen; i++) { 850 bb[lrows[i]] = 1; 851 } 852 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 853 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 854 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 855 ierr = VecDestroy(xmask);CHKERRQ(ierr); 856 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 857 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 858 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 859 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 860 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 861 862 /* remove zeroed rows of off diagonal matrix */ 863 ii = aij->i; 864 for (i=0; i<slen; i++) { 865 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 866 } 867 868 /* loop over all elements of off process part of matrix zeroing removed columns*/ 869 if (aij->compressedrow.use){ 870 m = aij->compressedrow.nrows; 871 ii = aij->compressedrow.i; 872 ridx = aij->compressedrow.rindex; 873 for (i=0; i<m; i++){ 874 n = ii[i+1] - ii[i]; 875 aj = aij->j + ii[i]; 876 aa = aij->a + ii[i]; 877 878 for (j=0; j<n; j++) { 879 if (PetscAbsScalar(mask[*aj])) { 880 bb[*ridx] -= *aa*xx[*aj]; 881 *aa = 0.0; 882 } 883 aa++; 884 aj++; 885 } 886 ridx++; 887 } 888 } else { /* do not use compressed row format */ 889 m = l->B->rmap->n; 890 for (i=0; i<m; i++) { 891 n = ii[i+1] - ii[i]; 892 aj = aij->j + ii[i]; 893 aa = aij->a + ii[i]; 894 for (j=0; j<n; j++) { 895 if (PetscAbsScalar(mask[*aj])) { 896 bb[i] -= *aa*xx[*aj]; 897 *aa = 0.0; 898 } 899 aa++; 900 aj++; 901 } 902 } 903 } 904 905 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 906 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 907 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 908 ierr = VecDestroy(lmask);CHKERRQ(ierr); 909 ierr = PetscFree(lrows);CHKERRQ(ierr); 910 911 /* wait on sends */ 912 if (nsends) { 913 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 914 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 915 ierr = PetscFree(send_status);CHKERRQ(ierr); 916 } 917 ierr = PetscFree(send_waits);CHKERRQ(ierr); 918 ierr = PetscFree(svalues);CHKERRQ(ierr); 919 920 PetscFunctionReturn(0); 921 } 922 923 #undef __FUNCT__ 924 #define __FUNCT__ "MatMult_MPIAIJ" 925 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 926 { 927 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 928 PetscErrorCode ierr; 929 PetscInt nt; 930 931 PetscFunctionBegin; 932 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 933 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 934 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 935 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 936 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 937 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNCT__ 942 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 943 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 944 { 945 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 946 PetscErrorCode ierr; 947 948 PetscFunctionBegin; 949 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatMultAdd_MPIAIJ" 955 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 956 { 957 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 958 PetscErrorCode ierr; 959 960 PetscFunctionBegin; 961 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 963 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 964 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 970 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 971 { 972 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 973 PetscErrorCode ierr; 974 PetscBool merged; 975 976 PetscFunctionBegin; 977 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 978 /* do nondiagonal part */ 979 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 980 if (!merged) { 981 /* send it on its way */ 982 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 983 /* do local part */ 984 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 985 /* receive remote parts: note this assumes the values are not actually */ 986 /* added in yy until the next line, */ 987 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 988 } else { 989 /* do local part */ 990 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 991 /* send it on its way */ 992 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 993 /* values actually were received in the Begin() but we need to call this nop */ 994 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 995 } 996 PetscFunctionReturn(0); 997 } 998 999 EXTERN_C_BEGIN 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1002 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1003 { 1004 MPI_Comm comm; 1005 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1006 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1007 IS Me,Notme; 1008 PetscErrorCode ierr; 1009 PetscInt M,N,first,last,*notme,i; 1010 PetscMPIInt size; 1011 1012 PetscFunctionBegin; 1013 1014 /* Easy test: symmetric diagonal block */ 1015 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1016 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1017 if (!*f) PetscFunctionReturn(0); 1018 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1019 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1020 if (size == 1) PetscFunctionReturn(0); 1021 1022 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1023 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1024 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1025 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1026 for (i=0; i<first; i++) notme[i] = i; 1027 for (i=last; i<M; i++) notme[i-last+first] = i; 1028 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1029 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1030 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1031 Aoff = Aoffs[0]; 1032 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1033 Boff = Boffs[0]; 1034 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1035 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1036 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1037 ierr = ISDestroy(Me);CHKERRQ(ierr); 1038 ierr = ISDestroy(Notme);CHKERRQ(ierr); 1039 1040 PetscFunctionReturn(0); 1041 } 1042 EXTERN_C_END 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1046 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1047 { 1048 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1049 PetscErrorCode ierr; 1050 1051 PetscFunctionBegin; 1052 /* do nondiagonal part */ 1053 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1054 /* send it on its way */ 1055 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1056 /* do local part */ 1057 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1058 /* receive remote parts */ 1059 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 /* 1064 This only works correctly for square matrices where the subblock A->A is the 1065 diagonal block 1066 */ 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1069 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1070 { 1071 PetscErrorCode ierr; 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1073 1074 PetscFunctionBegin; 1075 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1076 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1077 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatScale_MPIAIJ" 1083 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1084 { 1085 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1086 PetscErrorCode ierr; 1087 1088 PetscFunctionBegin; 1089 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1090 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "MatDestroy_MPIAIJ" 1096 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1097 { 1098 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 #if defined(PETSC_USE_LOG) 1103 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1104 #endif 1105 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1106 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 1107 if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);} 1108 if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);} 1109 #if defined (PETSC_USE_CTABLE) 1110 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 1111 #else 1112 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1113 #endif 1114 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1115 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 1116 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 1117 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1118 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1119 ierr = PetscFree(aij);CHKERRQ(ierr); 1120 1121 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1135 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1136 { 1137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1138 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1139 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1140 PetscErrorCode ierr; 1141 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1142 int fd; 1143 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1144 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1145 PetscScalar *column_values; 1146 PetscInt message_count,flowcontrolcount; 1147 1148 PetscFunctionBegin; 1149 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1150 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1151 nz = A->nz + B->nz; 1152 if (!rank) { 1153 header[0] = MAT_FILE_CLASSID; 1154 header[1] = mat->rmap->N; 1155 header[2] = mat->cmap->N; 1156 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1157 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1158 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1159 /* get largest number of rows any processor has */ 1160 rlen = mat->rmap->n; 1161 range = mat->rmap->range; 1162 for (i=1; i<size; i++) { 1163 rlen = PetscMax(rlen,range[i+1] - range[i]); 1164 } 1165 } else { 1166 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1167 rlen = mat->rmap->n; 1168 } 1169 1170 /* load up the local row counts */ 1171 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1172 for (i=0; i<mat->rmap->n; i++) { 1173 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1174 } 1175 1176 /* store the row lengths to the file */ 1177 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1178 if (!rank) { 1179 MPI_Status status; 1180 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1181 for (i=1; i<size; i++) { 1182 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1183 rlen = range[i+1] - range[i]; 1184 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1185 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1186 } 1187 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1188 } else { 1189 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1190 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1191 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1192 } 1193 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1194 1195 /* load up the local column indices */ 1196 nzmax = nz; /* )th processor needs space a largest processor needs */ 1197 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1198 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1199 cnt = 0; 1200 for (i=0; i<mat->rmap->n; i++) { 1201 for (j=B->i[i]; j<B->i[i+1]; j++) { 1202 if ( (col = garray[B->j[j]]) > cstart) break; 1203 column_indices[cnt++] = col; 1204 } 1205 for (k=A->i[i]; k<A->i[i+1]; k++) { 1206 column_indices[cnt++] = A->j[k] + cstart; 1207 } 1208 for (; j<B->i[i+1]; j++) { 1209 column_indices[cnt++] = garray[B->j[j]]; 1210 } 1211 } 1212 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1213 1214 /* store the column indices to the file */ 1215 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1216 if (!rank) { 1217 MPI_Status status; 1218 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1219 for (i=1; i<size; i++) { 1220 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1221 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1222 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1223 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1224 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1225 } 1226 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1227 } else { 1228 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1229 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1230 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1231 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1232 } 1233 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1234 1235 /* load up the local column values */ 1236 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1237 cnt = 0; 1238 for (i=0; i<mat->rmap->n; i++) { 1239 for (j=B->i[i]; j<B->i[i+1]; j++) { 1240 if ( garray[B->j[j]] > cstart) break; 1241 column_values[cnt++] = B->a[j]; 1242 } 1243 for (k=A->i[i]; k<A->i[i+1]; k++) { 1244 column_values[cnt++] = A->a[k]; 1245 } 1246 for (; j<B->i[i+1]; j++) { 1247 column_values[cnt++] = B->a[j]; 1248 } 1249 } 1250 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1251 1252 /* store the column values to the file */ 1253 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1254 if (!rank) { 1255 MPI_Status status; 1256 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1257 for (i=1; i<size; i++) { 1258 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1259 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1260 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1261 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1262 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1263 } 1264 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1265 } else { 1266 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1267 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1268 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1269 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1270 } 1271 ierr = PetscFree(column_values);CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1277 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1278 { 1279 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1280 PetscErrorCode ierr; 1281 PetscMPIInt rank = aij->rank,size = aij->size; 1282 PetscBool isdraw,iascii,isbinary; 1283 PetscViewer sviewer; 1284 PetscViewerFormat format; 1285 1286 PetscFunctionBegin; 1287 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1288 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1289 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1290 if (iascii) { 1291 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1292 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1293 MatInfo info; 1294 PetscBool inodes; 1295 1296 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1297 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1298 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1299 if (!inodes) { 1300 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1301 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1302 } else { 1303 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1304 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1305 } 1306 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1308 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1309 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1310 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1311 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1312 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1315 PetscInt inodecount,inodelimit,*inodes; 1316 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1317 if (inodes) { 1318 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1319 } else { 1320 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1321 } 1322 PetscFunctionReturn(0); 1323 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1324 PetscFunctionReturn(0); 1325 } 1326 } else if (isbinary) { 1327 if (size == 1) { 1328 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1329 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1330 } else { 1331 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1332 } 1333 PetscFunctionReturn(0); 1334 } else if (isdraw) { 1335 PetscDraw draw; 1336 PetscBool isnull; 1337 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1338 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1339 } 1340 1341 if (size == 1) { 1342 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1343 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1344 } else { 1345 /* assemble the entire matrix onto first processor. */ 1346 Mat A; 1347 Mat_SeqAIJ *Aloc; 1348 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1349 MatScalar *a; 1350 1351 if (mat->rmap->N > 1024) { 1352 PetscBool flg = PETSC_FALSE; 1353 1354 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1355 if (!flg) { 1356 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1357 } 1358 } 1359 1360 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1361 if (!rank) { 1362 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1363 } else { 1364 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1365 } 1366 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1367 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1368 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1369 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1370 1371 /* copy over the A part */ 1372 Aloc = (Mat_SeqAIJ*)aij->A->data; 1373 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1374 row = mat->rmap->rstart; 1375 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1376 for (i=0; i<m; i++) { 1377 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1378 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1379 } 1380 aj = Aloc->j; 1381 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1382 1383 /* copy over the B part */ 1384 Aloc = (Mat_SeqAIJ*)aij->B->data; 1385 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1386 row = mat->rmap->rstart; 1387 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1388 ct = cols; 1389 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1390 for (i=0; i<m; i++) { 1391 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1392 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1393 } 1394 ierr = PetscFree(ct);CHKERRQ(ierr); 1395 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1396 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1397 /* 1398 Everyone has to call to draw the matrix since the graphics waits are 1399 synchronized across all processors that share the PetscDraw object 1400 */ 1401 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1402 if (!rank) { 1403 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1404 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1405 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1406 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1407 } 1408 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1409 ierr = MatDestroy(A);CHKERRQ(ierr); 1410 } 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatView_MPIAIJ" 1416 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1417 { 1418 PetscErrorCode ierr; 1419 PetscBool iascii,isdraw,issocket,isbinary; 1420 1421 PetscFunctionBegin; 1422 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1423 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1424 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1425 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1426 if (iascii || isdraw || isbinary || issocket) { 1427 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1428 } else { 1429 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1430 } 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatSOR_MPIAIJ" 1436 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1437 { 1438 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1439 PetscErrorCode ierr; 1440 Vec bb1 = 0; 1441 PetscBool hasop; 1442 1443 PetscFunctionBegin; 1444 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1445 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1446 } 1447 1448 if (flag == SOR_APPLY_UPPER) { 1449 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1450 PetscFunctionReturn(0); 1451 } 1452 1453 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1454 if (flag & SOR_ZERO_INITIAL_GUESS) { 1455 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1456 its--; 1457 } 1458 1459 while (its--) { 1460 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1461 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1462 1463 /* update rhs: bb1 = bb - B*x */ 1464 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1465 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1466 1467 /* local sweep */ 1468 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1469 } 1470 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1471 if (flag & SOR_ZERO_INITIAL_GUESS) { 1472 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1473 its--; 1474 } 1475 while (its--) { 1476 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1477 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1478 1479 /* update rhs: bb1 = bb - B*x */ 1480 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1481 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1482 1483 /* local sweep */ 1484 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1485 } 1486 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1487 if (flag & SOR_ZERO_INITIAL_GUESS) { 1488 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1489 its--; 1490 } 1491 while (its--) { 1492 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1493 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1494 1495 /* update rhs: bb1 = bb - B*x */ 1496 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1497 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1498 1499 /* local sweep */ 1500 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1501 } 1502 } else if (flag & SOR_EISENSTAT) { 1503 Vec xx1; 1504 1505 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1506 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1507 1508 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1509 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1510 if (!mat->diag) { 1511 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1512 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1513 } 1514 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1515 if (hasop) { 1516 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1517 } else { 1518 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1519 } 1520 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1521 1522 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1523 1524 /* local sweep */ 1525 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1526 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1527 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1528 } else { 1529 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1530 } 1531 1532 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1533 PetscFunctionReturn(0); 1534 } 1535 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatPermute_MPIAIJ" 1538 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1539 { 1540 MPI_Comm comm,pcomm; 1541 PetscInt first,local_size,nrows; 1542 const PetscInt *rows; 1543 PetscMPIInt size; 1544 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1545 PetscErrorCode ierr; 1546 1547 PetscFunctionBegin; 1548 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1549 /* make a collective version of 'rowp' */ 1550 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1551 if (pcomm==comm) { 1552 crowp = rowp; 1553 } else { 1554 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1555 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1556 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1557 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1558 } 1559 /* collect the global row permutation and invert it */ 1560 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1561 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1562 if (pcomm!=comm) { 1563 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1564 } 1565 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1566 /* get the local target indices */ 1567 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1568 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1569 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1570 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1571 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1572 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1573 /* the column permutation is so much easier; 1574 make a local version of 'colp' and invert it */ 1575 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1576 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1577 if (size==1) { 1578 lcolp = colp; 1579 } else { 1580 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1581 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1582 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1583 } 1584 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1585 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1586 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1587 if (size>1) { 1588 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1589 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1590 } 1591 /* now we just get the submatrix */ 1592 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1593 /* clean up */ 1594 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1595 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1596 PetscFunctionReturn(0); 1597 } 1598 1599 #undef __FUNCT__ 1600 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1601 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1602 { 1603 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1604 Mat A = mat->A,B = mat->B; 1605 PetscErrorCode ierr; 1606 PetscReal isend[5],irecv[5]; 1607 1608 PetscFunctionBegin; 1609 info->block_size = 1.0; 1610 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1611 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1612 isend[3] = info->memory; isend[4] = info->mallocs; 1613 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1614 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1615 isend[3] += info->memory; isend[4] += info->mallocs; 1616 if (flag == MAT_LOCAL) { 1617 info->nz_used = isend[0]; 1618 info->nz_allocated = isend[1]; 1619 info->nz_unneeded = isend[2]; 1620 info->memory = isend[3]; 1621 info->mallocs = isend[4]; 1622 } else if (flag == MAT_GLOBAL_MAX) { 1623 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1624 info->nz_used = irecv[0]; 1625 info->nz_allocated = irecv[1]; 1626 info->nz_unneeded = irecv[2]; 1627 info->memory = irecv[3]; 1628 info->mallocs = irecv[4]; 1629 } else if (flag == MAT_GLOBAL_SUM) { 1630 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1631 info->nz_used = irecv[0]; 1632 info->nz_allocated = irecv[1]; 1633 info->nz_unneeded = irecv[2]; 1634 info->memory = irecv[3]; 1635 info->mallocs = irecv[4]; 1636 } 1637 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1638 info->fill_ratio_needed = 0; 1639 info->factor_mallocs = 0; 1640 1641 PetscFunctionReturn(0); 1642 } 1643 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "MatSetOption_MPIAIJ" 1646 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1647 { 1648 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1649 PetscErrorCode ierr; 1650 1651 PetscFunctionBegin; 1652 switch (op) { 1653 case MAT_NEW_NONZERO_LOCATIONS: 1654 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1655 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1656 case MAT_KEEP_NONZERO_PATTERN: 1657 case MAT_NEW_NONZERO_LOCATION_ERR: 1658 case MAT_USE_INODES: 1659 case MAT_IGNORE_ZERO_ENTRIES: 1660 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1661 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1662 break; 1663 case MAT_ROW_ORIENTED: 1664 a->roworiented = flg; 1665 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1666 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1667 break; 1668 case MAT_NEW_DIAGONALS: 1669 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1670 break; 1671 case MAT_IGNORE_OFF_PROC_ENTRIES: 1672 a->donotstash = PETSC_TRUE; 1673 break; 1674 case MAT_SPD: 1675 A->spd_set = PETSC_TRUE; 1676 A->spd = flg; 1677 if (flg) { 1678 A->symmetric = PETSC_TRUE; 1679 A->structurally_symmetric = PETSC_TRUE; 1680 A->symmetric_set = PETSC_TRUE; 1681 A->structurally_symmetric_set = PETSC_TRUE; 1682 } 1683 break; 1684 case MAT_SYMMETRIC: 1685 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1686 break; 1687 case MAT_STRUCTURALLY_SYMMETRIC: 1688 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1689 break; 1690 case MAT_HERMITIAN: 1691 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1692 break; 1693 case MAT_SYMMETRY_ETERNAL: 1694 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1695 break; 1696 default: 1697 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1698 } 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatGetRow_MPIAIJ" 1704 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1705 { 1706 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1707 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1708 PetscErrorCode ierr; 1709 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1710 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1711 PetscInt *cmap,*idx_p; 1712 1713 PetscFunctionBegin; 1714 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1715 mat->getrowactive = PETSC_TRUE; 1716 1717 if (!mat->rowvalues && (idx || v)) { 1718 /* 1719 allocate enough space to hold information from the longest row. 1720 */ 1721 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1722 PetscInt max = 1,tmp; 1723 for (i=0; i<matin->rmap->n; i++) { 1724 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1725 if (max < tmp) { max = tmp; } 1726 } 1727 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1728 } 1729 1730 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1731 lrow = row - rstart; 1732 1733 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1734 if (!v) {pvA = 0; pvB = 0;} 1735 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1736 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1737 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1738 nztot = nzA + nzB; 1739 1740 cmap = mat->garray; 1741 if (v || idx) { 1742 if (nztot) { 1743 /* Sort by increasing column numbers, assuming A and B already sorted */ 1744 PetscInt imark = -1; 1745 if (v) { 1746 *v = v_p = mat->rowvalues; 1747 for (i=0; i<nzB; i++) { 1748 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1749 else break; 1750 } 1751 imark = i; 1752 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1753 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1754 } 1755 if (idx) { 1756 *idx = idx_p = mat->rowindices; 1757 if (imark > -1) { 1758 for (i=0; i<imark; i++) { 1759 idx_p[i] = cmap[cworkB[i]]; 1760 } 1761 } else { 1762 for (i=0; i<nzB; i++) { 1763 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1764 else break; 1765 } 1766 imark = i; 1767 } 1768 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1769 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1770 } 1771 } else { 1772 if (idx) *idx = 0; 1773 if (v) *v = 0; 1774 } 1775 } 1776 *nz = nztot; 1777 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1778 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1779 PetscFunctionReturn(0); 1780 } 1781 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1784 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1785 { 1786 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1787 1788 PetscFunctionBegin; 1789 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1790 aij->getrowactive = PETSC_FALSE; 1791 PetscFunctionReturn(0); 1792 } 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatNorm_MPIAIJ" 1796 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1797 { 1798 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1799 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1800 PetscErrorCode ierr; 1801 PetscInt i,j,cstart = mat->cmap->rstart; 1802 PetscReal sum = 0.0; 1803 MatScalar *v; 1804 1805 PetscFunctionBegin; 1806 if (aij->size == 1) { 1807 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1808 } else { 1809 if (type == NORM_FROBENIUS) { 1810 v = amat->a; 1811 for (i=0; i<amat->nz; i++) { 1812 #if defined(PETSC_USE_COMPLEX) 1813 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1814 #else 1815 sum += (*v)*(*v); v++; 1816 #endif 1817 } 1818 v = bmat->a; 1819 for (i=0; i<bmat->nz; i++) { 1820 #if defined(PETSC_USE_COMPLEX) 1821 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1822 #else 1823 sum += (*v)*(*v); v++; 1824 #endif 1825 } 1826 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1827 *norm = sqrt(*norm); 1828 } else if (type == NORM_1) { /* max column norm */ 1829 PetscReal *tmp,*tmp2; 1830 PetscInt *jj,*garray = aij->garray; 1831 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1832 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1833 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1834 *norm = 0.0; 1835 v = amat->a; jj = amat->j; 1836 for (j=0; j<amat->nz; j++) { 1837 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1838 } 1839 v = bmat->a; jj = bmat->j; 1840 for (j=0; j<bmat->nz; j++) { 1841 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1842 } 1843 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1844 for (j=0; j<mat->cmap->N; j++) { 1845 if (tmp2[j] > *norm) *norm = tmp2[j]; 1846 } 1847 ierr = PetscFree(tmp);CHKERRQ(ierr); 1848 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1849 } else if (type == NORM_INFINITY) { /* max row norm */ 1850 PetscReal ntemp = 0.0; 1851 for (j=0; j<aij->A->rmap->n; j++) { 1852 v = amat->a + amat->i[j]; 1853 sum = 0.0; 1854 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1855 sum += PetscAbsScalar(*v); v++; 1856 } 1857 v = bmat->a + bmat->i[j]; 1858 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1859 sum += PetscAbsScalar(*v); v++; 1860 } 1861 if (sum > ntemp) ntemp = sum; 1862 } 1863 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1864 } else { 1865 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1866 } 1867 } 1868 PetscFunctionReturn(0); 1869 } 1870 1871 #undef __FUNCT__ 1872 #define __FUNCT__ "MatTranspose_MPIAIJ" 1873 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1874 { 1875 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1876 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1877 PetscErrorCode ierr; 1878 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1879 PetscInt cstart=A->cmap->rstart,ncol; 1880 Mat B; 1881 MatScalar *array; 1882 1883 PetscFunctionBegin; 1884 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1885 1886 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1887 ai = Aloc->i; aj = Aloc->j; 1888 bi = Bloc->i; bj = Bloc->j; 1889 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1890 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1891 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1892 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1893 for (i=0; i<ai[ma]; i++){ 1894 d_nnz[aj[i]] ++; 1895 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1896 } 1897 1898 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1899 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1900 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1901 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1902 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1903 } else { 1904 B = *matout; 1905 } 1906 1907 /* copy over the A part */ 1908 array = Aloc->a; 1909 row = A->rmap->rstart; 1910 for (i=0; i<ma; i++) { 1911 ncol = ai[i+1]-ai[i]; 1912 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1913 row++; array += ncol; aj += ncol; 1914 } 1915 aj = Aloc->j; 1916 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1917 1918 /* copy over the B part */ 1919 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1920 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1921 array = Bloc->a; 1922 row = A->rmap->rstart; 1923 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1924 cols_tmp = cols; 1925 for (i=0; i<mb; i++) { 1926 ncol = bi[i+1]-bi[i]; 1927 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1928 row++; array += ncol; cols_tmp += ncol; 1929 } 1930 ierr = PetscFree(cols);CHKERRQ(ierr); 1931 1932 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1933 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1934 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1935 *matout = B; 1936 } else { 1937 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1938 } 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1944 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1945 { 1946 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1947 Mat a = aij->A,b = aij->B; 1948 PetscErrorCode ierr; 1949 PetscInt s1,s2,s3; 1950 1951 PetscFunctionBegin; 1952 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1953 if (rr) { 1954 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1955 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1956 /* Overlap communication with computation. */ 1957 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1958 } 1959 if (ll) { 1960 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1961 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1962 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1963 } 1964 /* scale the diagonal block */ 1965 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1966 1967 if (rr) { 1968 /* Do a scatter end and then right scale the off-diagonal block */ 1969 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1970 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1971 } 1972 1973 PetscFunctionReturn(0); 1974 } 1975 1976 #undef __FUNCT__ 1977 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1978 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1979 { 1980 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1981 PetscErrorCode ierr; 1982 1983 PetscFunctionBegin; 1984 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1985 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1986 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1987 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1988 PetscFunctionReturn(0); 1989 } 1990 #undef __FUNCT__ 1991 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1992 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1993 { 1994 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1995 PetscErrorCode ierr; 1996 1997 PetscFunctionBegin; 1998 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1999 PetscFunctionReturn(0); 2000 } 2001 2002 #undef __FUNCT__ 2003 #define __FUNCT__ "MatEqual_MPIAIJ" 2004 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2005 { 2006 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2007 Mat a,b,c,d; 2008 PetscBool flg; 2009 PetscErrorCode ierr; 2010 2011 PetscFunctionBegin; 2012 a = matA->A; b = matA->B; 2013 c = matB->A; d = matB->B; 2014 2015 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2016 if (flg) { 2017 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2018 } 2019 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2020 PetscFunctionReturn(0); 2021 } 2022 2023 #undef __FUNCT__ 2024 #define __FUNCT__ "MatCopy_MPIAIJ" 2025 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2026 { 2027 PetscErrorCode ierr; 2028 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2029 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2030 2031 PetscFunctionBegin; 2032 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2033 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2034 /* because of the column compression in the off-processor part of the matrix a->B, 2035 the number of columns in a->B and b->B may be different, hence we cannot call 2036 the MatCopy() directly on the two parts. If need be, we can provide a more 2037 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2038 then copying the submatrices */ 2039 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2040 } else { 2041 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2042 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2043 } 2044 PetscFunctionReturn(0); 2045 } 2046 2047 #undef __FUNCT__ 2048 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2049 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2050 { 2051 PetscErrorCode ierr; 2052 2053 PetscFunctionBegin; 2054 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2055 PetscFunctionReturn(0); 2056 } 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "MatAXPY_MPIAIJ" 2060 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2061 { 2062 PetscErrorCode ierr; 2063 PetscInt i; 2064 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2065 PetscBLASInt bnz,one=1; 2066 Mat_SeqAIJ *x,*y; 2067 2068 PetscFunctionBegin; 2069 if (str == SAME_NONZERO_PATTERN) { 2070 PetscScalar alpha = a; 2071 x = (Mat_SeqAIJ *)xx->A->data; 2072 y = (Mat_SeqAIJ *)yy->A->data; 2073 bnz = PetscBLASIntCast(x->nz); 2074 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2075 x = (Mat_SeqAIJ *)xx->B->data; 2076 y = (Mat_SeqAIJ *)yy->B->data; 2077 bnz = PetscBLASIntCast(x->nz); 2078 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2079 } else if (str == SUBSET_NONZERO_PATTERN) { 2080 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2081 2082 x = (Mat_SeqAIJ *)xx->B->data; 2083 y = (Mat_SeqAIJ *)yy->B->data; 2084 if (y->xtoy && y->XtoY != xx->B) { 2085 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2086 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2087 } 2088 if (!y->xtoy) { /* get xtoy */ 2089 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2090 y->XtoY = xx->B; 2091 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2092 } 2093 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2094 } else { 2095 Mat B; 2096 PetscInt *nnz_d,*nnz_o; 2097 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2098 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2099 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2100 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2101 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2102 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2103 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2104 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2105 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2106 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2107 ierr = MatHeaderReplace(Y,B); 2108 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2109 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2110 } 2111 PetscFunctionReturn(0); 2112 } 2113 2114 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 2115 2116 #undef __FUNCT__ 2117 #define __FUNCT__ "MatConjugate_MPIAIJ" 2118 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 2119 { 2120 #if defined(PETSC_USE_COMPLEX) 2121 PetscErrorCode ierr; 2122 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2123 2124 PetscFunctionBegin; 2125 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2126 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2127 #else 2128 PetscFunctionBegin; 2129 #endif 2130 PetscFunctionReturn(0); 2131 } 2132 2133 #undef __FUNCT__ 2134 #define __FUNCT__ "MatRealPart_MPIAIJ" 2135 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2136 { 2137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2138 PetscErrorCode ierr; 2139 2140 PetscFunctionBegin; 2141 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2142 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 #undef __FUNCT__ 2147 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2148 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2149 { 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2151 PetscErrorCode ierr; 2152 2153 PetscFunctionBegin; 2154 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2155 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2156 PetscFunctionReturn(0); 2157 } 2158 2159 #ifdef PETSC_HAVE_PBGL 2160 2161 #include <boost/parallel/mpi/bsp_process_group.hpp> 2162 #include <boost/graph/distributed/ilu_default_graph.hpp> 2163 #include <boost/graph/distributed/ilu_0_block.hpp> 2164 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2165 #include <boost/graph/distributed/petsc/interface.hpp> 2166 #include <boost/multi_array.hpp> 2167 #include <boost/parallel/distributed_property_map->hpp> 2168 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2171 /* 2172 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2173 */ 2174 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2175 { 2176 namespace petsc = boost::distributed::petsc; 2177 2178 namespace graph_dist = boost::graph::distributed; 2179 using boost::graph::distributed::ilu_default::process_group_type; 2180 using boost::graph::ilu_permuted; 2181 2182 PetscBool row_identity, col_identity; 2183 PetscContainer c; 2184 PetscInt m, n, M, N; 2185 PetscErrorCode ierr; 2186 2187 PetscFunctionBegin; 2188 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2189 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2190 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2191 if (!row_identity || !col_identity) { 2192 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2193 } 2194 2195 process_group_type pg; 2196 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2197 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2198 lgraph_type& level_graph = *lgraph_p; 2199 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2200 2201 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2202 ilu_permuted(level_graph); 2203 2204 /* put together the new matrix */ 2205 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2206 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2207 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2208 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2209 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2210 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2211 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2212 2213 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2214 ierr = PetscContainerSetPointer(c, lgraph_p); 2215 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2216 PetscFunctionReturn(0); 2217 } 2218 2219 #undef __FUNCT__ 2220 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2221 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2222 { 2223 PetscFunctionBegin; 2224 PetscFunctionReturn(0); 2225 } 2226 2227 #undef __FUNCT__ 2228 #define __FUNCT__ "MatSolve_MPIAIJ" 2229 /* 2230 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2231 */ 2232 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2233 { 2234 namespace graph_dist = boost::graph::distributed; 2235 2236 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2237 lgraph_type* lgraph_p; 2238 PetscContainer c; 2239 PetscErrorCode ierr; 2240 2241 PetscFunctionBegin; 2242 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2243 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2244 ierr = VecCopy(b, x);CHKERRQ(ierr); 2245 2246 PetscScalar* array_x; 2247 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2248 PetscInt sx; 2249 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2250 2251 PetscScalar* array_b; 2252 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2253 PetscInt sb; 2254 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2255 2256 lgraph_type& level_graph = *lgraph_p; 2257 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2258 2259 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2260 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2261 ref_x(array_x, boost::extents[num_vertices(graph)]); 2262 2263 typedef boost::iterator_property_map<array_ref_type::iterator, 2264 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2265 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2266 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2267 2268 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2269 2270 PetscFunctionReturn(0); 2271 } 2272 #endif 2273 2274 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2275 PetscInt nzlocal,nsends,nrecvs; 2276 PetscMPIInt *send_rank,*recv_rank; 2277 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2278 PetscScalar *sbuf_a,**rbuf_a; 2279 PetscErrorCode (*MatDestroy)(Mat); 2280 } Mat_Redundant; 2281 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2284 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2285 { 2286 PetscErrorCode ierr; 2287 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2288 PetscInt i; 2289 2290 PetscFunctionBegin; 2291 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2292 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2293 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2294 for (i=0; i<redund->nrecvs; i++){ 2295 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2296 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2297 } 2298 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2299 ierr = PetscFree(redund);CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatDestroy_MatRedundant" 2305 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2306 { 2307 PetscErrorCode ierr; 2308 PetscContainer container; 2309 Mat_Redundant *redund=PETSC_NULL; 2310 2311 PetscFunctionBegin; 2312 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2313 if (container) { 2314 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2315 } else { 2316 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2317 } 2318 A->ops->destroy = redund->MatDestroy; 2319 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2320 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2321 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2322 PetscFunctionReturn(0); 2323 } 2324 2325 #undef __FUNCT__ 2326 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2327 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2328 { 2329 PetscMPIInt rank,size; 2330 MPI_Comm comm=((PetscObject)mat)->comm; 2331 PetscErrorCode ierr; 2332 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2333 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2334 PetscInt *rowrange=mat->rmap->range; 2335 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2336 Mat A=aij->A,B=aij->B,C=*matredundant; 2337 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2338 PetscScalar *sbuf_a; 2339 PetscInt nzlocal=a->nz+b->nz; 2340 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2341 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2342 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2343 MatScalar *aworkA,*aworkB; 2344 PetscScalar *vals; 2345 PetscMPIInt tag1,tag2,tag3,imdex; 2346 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2347 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2348 MPI_Status recv_status,*send_status; 2349 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2350 PetscInt **rbuf_j=PETSC_NULL; 2351 PetscScalar **rbuf_a=PETSC_NULL; 2352 Mat_Redundant *redund=PETSC_NULL; 2353 PetscContainer container; 2354 2355 PetscFunctionBegin; 2356 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2357 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2358 2359 if (reuse == MAT_REUSE_MATRIX) { 2360 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2361 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2362 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2363 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2364 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2365 if (container) { 2366 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2367 } else { 2368 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2369 } 2370 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2371 2372 nsends = redund->nsends; 2373 nrecvs = redund->nrecvs; 2374 send_rank = redund->send_rank; 2375 recv_rank = redund->recv_rank; 2376 sbuf_nz = redund->sbuf_nz; 2377 rbuf_nz = redund->rbuf_nz; 2378 sbuf_j = redund->sbuf_j; 2379 sbuf_a = redund->sbuf_a; 2380 rbuf_j = redund->rbuf_j; 2381 rbuf_a = redund->rbuf_a; 2382 } 2383 2384 if (reuse == MAT_INITIAL_MATRIX){ 2385 PetscMPIInt subrank,subsize; 2386 PetscInt nleftover,np_subcomm; 2387 /* get the destination processors' id send_rank, nsends and nrecvs */ 2388 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2389 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2390 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2391 np_subcomm = size/nsubcomm; 2392 nleftover = size - nsubcomm*np_subcomm; 2393 nsends = 0; nrecvs = 0; 2394 for (i=0; i<size; i++){ /* i=rank*/ 2395 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2396 send_rank[nsends] = i; nsends++; 2397 recv_rank[nrecvs++] = i; 2398 } 2399 } 2400 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2401 i = size-nleftover-1; 2402 j = 0; 2403 while (j < nsubcomm - nleftover){ 2404 send_rank[nsends++] = i; 2405 i--; j++; 2406 } 2407 } 2408 2409 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2410 for (i=0; i<nleftover; i++){ 2411 recv_rank[nrecvs++] = size-nleftover+i; 2412 } 2413 } 2414 2415 /* allocate sbuf_j, sbuf_a */ 2416 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2417 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2418 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2419 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2420 2421 /* copy mat's local entries into the buffers */ 2422 if (reuse == MAT_INITIAL_MATRIX){ 2423 rownz_max = 0; 2424 rptr = sbuf_j; 2425 cols = sbuf_j + rend-rstart + 1; 2426 vals = sbuf_a; 2427 rptr[0] = 0; 2428 for (i=0; i<rend-rstart; i++){ 2429 row = i + rstart; 2430 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2431 ncols = nzA + nzB; 2432 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2433 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2434 /* load the column indices for this row into cols */ 2435 lwrite = 0; 2436 for (l=0; l<nzB; l++) { 2437 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2438 vals[lwrite] = aworkB[l]; 2439 cols[lwrite++] = ctmp; 2440 } 2441 } 2442 for (l=0; l<nzA; l++){ 2443 vals[lwrite] = aworkA[l]; 2444 cols[lwrite++] = cstart + cworkA[l]; 2445 } 2446 for (l=0; l<nzB; l++) { 2447 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2448 vals[lwrite] = aworkB[l]; 2449 cols[lwrite++] = ctmp; 2450 } 2451 } 2452 vals += ncols; 2453 cols += ncols; 2454 rptr[i+1] = rptr[i] + ncols; 2455 if (rownz_max < ncols) rownz_max = ncols; 2456 } 2457 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2458 } else { /* only copy matrix values into sbuf_a */ 2459 rptr = sbuf_j; 2460 vals = sbuf_a; 2461 rptr[0] = 0; 2462 for (i=0; i<rend-rstart; i++){ 2463 row = i + rstart; 2464 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2465 ncols = nzA + nzB; 2466 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2467 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2468 lwrite = 0; 2469 for (l=0; l<nzB; l++) { 2470 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2471 } 2472 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2473 for (l=0; l<nzB; l++) { 2474 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2475 } 2476 vals += ncols; 2477 rptr[i+1] = rptr[i] + ncols; 2478 } 2479 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2480 2481 /* send nzlocal to others, and recv other's nzlocal */ 2482 /*--------------------------------------------------*/ 2483 if (reuse == MAT_INITIAL_MATRIX){ 2484 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2485 s_waits2 = s_waits3 + nsends; 2486 s_waits1 = s_waits2 + nsends; 2487 r_waits1 = s_waits1 + nsends; 2488 r_waits2 = r_waits1 + nrecvs; 2489 r_waits3 = r_waits2 + nrecvs; 2490 } else { 2491 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2492 r_waits3 = s_waits3 + nsends; 2493 } 2494 2495 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2496 if (reuse == MAT_INITIAL_MATRIX){ 2497 /* get new tags to keep the communication clean */ 2498 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2499 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2500 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2501 2502 /* post receives of other's nzlocal */ 2503 for (i=0; i<nrecvs; i++){ 2504 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2505 } 2506 /* send nzlocal to others */ 2507 for (i=0; i<nsends; i++){ 2508 sbuf_nz[i] = nzlocal; 2509 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2510 } 2511 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2512 count = nrecvs; 2513 while (count) { 2514 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2515 recv_rank[imdex] = recv_status.MPI_SOURCE; 2516 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2517 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2518 2519 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2520 rbuf_nz[imdex] += i + 2; 2521 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2522 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2523 count--; 2524 } 2525 /* wait on sends of nzlocal */ 2526 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2527 /* send mat->i,j to others, and recv from other's */ 2528 /*------------------------------------------------*/ 2529 for (i=0; i<nsends; i++){ 2530 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2531 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2532 } 2533 /* wait on receives of mat->i,j */ 2534 /*------------------------------*/ 2535 count = nrecvs; 2536 while (count) { 2537 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2538 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2539 count--; 2540 } 2541 /* wait on sends of mat->i,j */ 2542 /*---------------------------*/ 2543 if (nsends) { 2544 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2545 } 2546 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2547 2548 /* post receives, send and receive mat->a */ 2549 /*----------------------------------------*/ 2550 for (imdex=0; imdex<nrecvs; imdex++) { 2551 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2552 } 2553 for (i=0; i<nsends; i++){ 2554 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2555 } 2556 count = nrecvs; 2557 while (count) { 2558 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2559 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2560 count--; 2561 } 2562 if (nsends) { 2563 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2564 } 2565 2566 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2567 2568 /* create redundant matrix */ 2569 /*-------------------------*/ 2570 if (reuse == MAT_INITIAL_MATRIX){ 2571 /* compute rownz_max for preallocation */ 2572 for (imdex=0; imdex<nrecvs; imdex++){ 2573 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2574 rptr = rbuf_j[imdex]; 2575 for (i=0; i<j; i++){ 2576 ncols = rptr[i+1] - rptr[i]; 2577 if (rownz_max < ncols) rownz_max = ncols; 2578 } 2579 } 2580 2581 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2582 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2583 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2584 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2585 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2586 } else { 2587 C = *matredundant; 2588 } 2589 2590 /* insert local matrix entries */ 2591 rptr = sbuf_j; 2592 cols = sbuf_j + rend-rstart + 1; 2593 vals = sbuf_a; 2594 for (i=0; i<rend-rstart; i++){ 2595 row = i + rstart; 2596 ncols = rptr[i+1] - rptr[i]; 2597 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2598 vals += ncols; 2599 cols += ncols; 2600 } 2601 /* insert received matrix entries */ 2602 for (imdex=0; imdex<nrecvs; imdex++){ 2603 rstart = rowrange[recv_rank[imdex]]; 2604 rend = rowrange[recv_rank[imdex]+1]; 2605 rptr = rbuf_j[imdex]; 2606 cols = rbuf_j[imdex] + rend-rstart + 1; 2607 vals = rbuf_a[imdex]; 2608 for (i=0; i<rend-rstart; i++){ 2609 row = i + rstart; 2610 ncols = rptr[i+1] - rptr[i]; 2611 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2612 vals += ncols; 2613 cols += ncols; 2614 } 2615 } 2616 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2617 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2618 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2619 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2620 if (reuse == MAT_INITIAL_MATRIX){ 2621 PetscContainer container; 2622 *matredundant = C; 2623 /* create a supporting struct and attach it to C for reuse */ 2624 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2625 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2626 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2627 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2628 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2629 2630 redund->nzlocal = nzlocal; 2631 redund->nsends = nsends; 2632 redund->nrecvs = nrecvs; 2633 redund->send_rank = send_rank; 2634 redund->recv_rank = recv_rank; 2635 redund->sbuf_nz = sbuf_nz; 2636 redund->rbuf_nz = rbuf_nz; 2637 redund->sbuf_j = sbuf_j; 2638 redund->sbuf_a = sbuf_a; 2639 redund->rbuf_j = rbuf_j; 2640 redund->rbuf_a = rbuf_a; 2641 2642 redund->MatDestroy = C->ops->destroy; 2643 C->ops->destroy = MatDestroy_MatRedundant; 2644 } 2645 PetscFunctionReturn(0); 2646 } 2647 2648 #undef __FUNCT__ 2649 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2650 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2651 { 2652 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2653 PetscErrorCode ierr; 2654 PetscInt i,*idxb = 0; 2655 PetscScalar *va,*vb; 2656 Vec vtmp; 2657 2658 PetscFunctionBegin; 2659 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2660 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2661 if (idx) { 2662 for (i=0; i<A->rmap->n; i++) { 2663 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2664 } 2665 } 2666 2667 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2668 if (idx) { 2669 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2670 } 2671 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2672 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2673 2674 for (i=0; i<A->rmap->n; i++){ 2675 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2676 va[i] = vb[i]; 2677 if (idx) idx[i] = a->garray[idxb[i]]; 2678 } 2679 } 2680 2681 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2682 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2683 if (idxb) { 2684 ierr = PetscFree(idxb);CHKERRQ(ierr); 2685 } 2686 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2687 PetscFunctionReturn(0); 2688 } 2689 2690 #undef __FUNCT__ 2691 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2692 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2693 { 2694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2695 PetscErrorCode ierr; 2696 PetscInt i,*idxb = 0; 2697 PetscScalar *va,*vb; 2698 Vec vtmp; 2699 2700 PetscFunctionBegin; 2701 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2702 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2703 if (idx) { 2704 for (i=0; i<A->cmap->n; i++) { 2705 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2706 } 2707 } 2708 2709 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2710 if (idx) { 2711 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2712 } 2713 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2714 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2715 2716 for (i=0; i<A->rmap->n; i++){ 2717 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2718 va[i] = vb[i]; 2719 if (idx) idx[i] = a->garray[idxb[i]]; 2720 } 2721 } 2722 2723 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2724 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2725 if (idxb) { 2726 ierr = PetscFree(idxb);CHKERRQ(ierr); 2727 } 2728 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2729 PetscFunctionReturn(0); 2730 } 2731 2732 #undef __FUNCT__ 2733 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2734 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2735 { 2736 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2737 PetscInt n = A->rmap->n; 2738 PetscInt cstart = A->cmap->rstart; 2739 PetscInt *cmap = mat->garray; 2740 PetscInt *diagIdx, *offdiagIdx; 2741 Vec diagV, offdiagV; 2742 PetscScalar *a, *diagA, *offdiagA; 2743 PetscInt r; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2748 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2749 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2750 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2751 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2752 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2753 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2754 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2755 for(r = 0; r < n; ++r) { 2756 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2757 a[r] = diagA[r]; 2758 idx[r] = cstart + diagIdx[r]; 2759 } else { 2760 a[r] = offdiagA[r]; 2761 idx[r] = cmap[offdiagIdx[r]]; 2762 } 2763 } 2764 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2765 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2766 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2767 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2768 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2769 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2770 PetscFunctionReturn(0); 2771 } 2772 2773 #undef __FUNCT__ 2774 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2775 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2776 { 2777 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2778 PetscInt n = A->rmap->n; 2779 PetscInt cstart = A->cmap->rstart; 2780 PetscInt *cmap = mat->garray; 2781 PetscInt *diagIdx, *offdiagIdx; 2782 Vec diagV, offdiagV; 2783 PetscScalar *a, *diagA, *offdiagA; 2784 PetscInt r; 2785 PetscErrorCode ierr; 2786 2787 PetscFunctionBegin; 2788 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2789 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2790 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2791 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2792 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2793 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2794 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2795 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2796 for(r = 0; r < n; ++r) { 2797 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2798 a[r] = diagA[r]; 2799 idx[r] = cstart + diagIdx[r]; 2800 } else { 2801 a[r] = offdiagA[r]; 2802 idx[r] = cmap[offdiagIdx[r]]; 2803 } 2804 } 2805 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2806 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2807 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2808 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2809 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2810 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2811 PetscFunctionReturn(0); 2812 } 2813 2814 #undef __FUNCT__ 2815 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2816 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2817 { 2818 PetscErrorCode ierr; 2819 Mat *dummy; 2820 2821 PetscFunctionBegin; 2822 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2823 *newmat = *dummy; 2824 ierr = PetscFree(dummy);CHKERRQ(ierr); 2825 PetscFunctionReturn(0); 2826 } 2827 2828 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2829 /* -------------------------------------------------------------------*/ 2830 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2831 MatGetRow_MPIAIJ, 2832 MatRestoreRow_MPIAIJ, 2833 MatMult_MPIAIJ, 2834 /* 4*/ MatMultAdd_MPIAIJ, 2835 MatMultTranspose_MPIAIJ, 2836 MatMultTransposeAdd_MPIAIJ, 2837 #ifdef PETSC_HAVE_PBGL 2838 MatSolve_MPIAIJ, 2839 #else 2840 0, 2841 #endif 2842 0, 2843 0, 2844 /*10*/ 0, 2845 0, 2846 0, 2847 MatSOR_MPIAIJ, 2848 MatTranspose_MPIAIJ, 2849 /*15*/ MatGetInfo_MPIAIJ, 2850 MatEqual_MPIAIJ, 2851 MatGetDiagonal_MPIAIJ, 2852 MatDiagonalScale_MPIAIJ, 2853 MatNorm_MPIAIJ, 2854 /*20*/ MatAssemblyBegin_MPIAIJ, 2855 MatAssemblyEnd_MPIAIJ, 2856 MatSetOption_MPIAIJ, 2857 MatZeroEntries_MPIAIJ, 2858 /*24*/ MatZeroRows_MPIAIJ, 2859 0, 2860 #ifdef PETSC_HAVE_PBGL 2861 0, 2862 #else 2863 0, 2864 #endif 2865 0, 2866 0, 2867 /*29*/ MatSetUpPreallocation_MPIAIJ, 2868 #ifdef PETSC_HAVE_PBGL 2869 0, 2870 #else 2871 0, 2872 #endif 2873 0, 2874 0, 2875 0, 2876 /*34*/ MatDuplicate_MPIAIJ, 2877 0, 2878 0, 2879 0, 2880 0, 2881 /*39*/ MatAXPY_MPIAIJ, 2882 MatGetSubMatrices_MPIAIJ, 2883 MatIncreaseOverlap_MPIAIJ, 2884 MatGetValues_MPIAIJ, 2885 MatCopy_MPIAIJ, 2886 /*44*/ MatGetRowMax_MPIAIJ, 2887 MatScale_MPIAIJ, 2888 0, 2889 0, 2890 MatZeroRowsColumns_MPIAIJ, 2891 /*49*/ MatSetBlockSize_MPIAIJ, 2892 0, 2893 0, 2894 0, 2895 0, 2896 /*54*/ MatFDColoringCreate_MPIAIJ, 2897 0, 2898 MatSetUnfactored_MPIAIJ, 2899 MatPermute_MPIAIJ, 2900 0, 2901 /*59*/ MatGetSubMatrix_MPIAIJ, 2902 MatDestroy_MPIAIJ, 2903 MatView_MPIAIJ, 2904 0, 2905 0, 2906 /*64*/ 0, 2907 0, 2908 0, 2909 0, 2910 0, 2911 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2912 MatGetRowMinAbs_MPIAIJ, 2913 0, 2914 MatSetColoring_MPIAIJ, 2915 #if defined(PETSC_HAVE_ADIC) 2916 MatSetValuesAdic_MPIAIJ, 2917 #else 2918 0, 2919 #endif 2920 MatSetValuesAdifor_MPIAIJ, 2921 /*75*/ MatFDColoringApply_AIJ, 2922 0, 2923 0, 2924 0, 2925 0, 2926 /*80*/ 0, 2927 0, 2928 0, 2929 /*83*/ MatLoad_MPIAIJ, 2930 0, 2931 0, 2932 0, 2933 0, 2934 0, 2935 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2936 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2937 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2938 MatPtAP_Basic, 2939 MatPtAPSymbolic_MPIAIJ, 2940 /*94*/ MatPtAPNumeric_MPIAIJ, 2941 0, 2942 0, 2943 0, 2944 0, 2945 /*99*/ 0, 2946 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2947 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2948 MatConjugate_MPIAIJ, 2949 0, 2950 /*104*/MatSetValuesRow_MPIAIJ, 2951 MatRealPart_MPIAIJ, 2952 MatImaginaryPart_MPIAIJ, 2953 0, 2954 0, 2955 /*109*/0, 2956 MatGetRedundantMatrix_MPIAIJ, 2957 MatGetRowMin_MPIAIJ, 2958 0, 2959 0, 2960 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2961 0, 2962 0, 2963 0, 2964 0, 2965 /*119*/0, 2966 0, 2967 0, 2968 0, 2969 MatGetMultiProcBlock_MPIAIJ, 2970 /*124*/0, 2971 0, 2972 0, 2973 0, 2974 MatGetSubMatricesParallel_MPIAIJ 2975 }; 2976 2977 /* ----------------------------------------------------------------------------------------*/ 2978 2979 EXTERN_C_BEGIN 2980 #undef __FUNCT__ 2981 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2982 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2983 { 2984 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2985 PetscErrorCode ierr; 2986 2987 PetscFunctionBegin; 2988 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2989 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2990 PetscFunctionReturn(0); 2991 } 2992 EXTERN_C_END 2993 2994 EXTERN_C_BEGIN 2995 #undef __FUNCT__ 2996 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2997 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2998 { 2999 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3000 PetscErrorCode ierr; 3001 3002 PetscFunctionBegin; 3003 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3004 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3005 PetscFunctionReturn(0); 3006 } 3007 EXTERN_C_END 3008 3009 EXTERN_C_BEGIN 3010 #undef __FUNCT__ 3011 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3012 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3013 { 3014 Mat_MPIAIJ *b; 3015 PetscErrorCode ierr; 3016 PetscInt i; 3017 3018 PetscFunctionBegin; 3019 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3020 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3021 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3022 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3023 3024 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3025 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3026 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3027 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3028 if (d_nnz) { 3029 for (i=0; i<B->rmap->n; i++) { 3030 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3031 } 3032 } 3033 if (o_nnz) { 3034 for (i=0; i<B->rmap->n; i++) { 3035 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3036 } 3037 } 3038 b = (Mat_MPIAIJ*)B->data; 3039 3040 if (!B->preallocated) { 3041 /* Explicitly create 2 MATSEQAIJ matrices. */ 3042 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3043 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3044 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3045 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3046 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3047 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3048 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3049 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3050 } 3051 3052 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3053 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3054 B->preallocated = PETSC_TRUE; 3055 PetscFunctionReturn(0); 3056 } 3057 EXTERN_C_END 3058 3059 #undef __FUNCT__ 3060 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3061 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3062 { 3063 Mat mat; 3064 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3065 PetscErrorCode ierr; 3066 3067 PetscFunctionBegin; 3068 *newmat = 0; 3069 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3070 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3071 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3072 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3073 a = (Mat_MPIAIJ*)mat->data; 3074 3075 mat->factortype = matin->factortype; 3076 mat->rmap->bs = matin->rmap->bs; 3077 mat->assembled = PETSC_TRUE; 3078 mat->insertmode = NOT_SET_VALUES; 3079 mat->preallocated = PETSC_TRUE; 3080 3081 a->size = oldmat->size; 3082 a->rank = oldmat->rank; 3083 a->donotstash = oldmat->donotstash; 3084 a->roworiented = oldmat->roworiented; 3085 a->rowindices = 0; 3086 a->rowvalues = 0; 3087 a->getrowactive = PETSC_FALSE; 3088 3089 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3090 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3091 3092 if (oldmat->colmap) { 3093 #if defined (PETSC_USE_CTABLE) 3094 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3095 #else 3096 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3097 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3098 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3099 #endif 3100 } else a->colmap = 0; 3101 if (oldmat->garray) { 3102 PetscInt len; 3103 len = oldmat->B->cmap->n; 3104 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3105 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3106 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3107 } else a->garray = 0; 3108 3109 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3110 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3111 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3112 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3113 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3114 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3115 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3116 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3117 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3118 *newmat = mat; 3119 PetscFunctionReturn(0); 3120 } 3121 3122 /* 3123 Allows sending/receiving larger messages then 2 gigabytes in a single call 3124 */ 3125 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 3126 { 3127 int ierr; 3128 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3129 PetscInt i,numchunks; 3130 PetscMPIInt icnt; 3131 3132 numchunks = cnt/CHUNKSIZE + 1; 3133 for (i=0; i<numchunks; i++) { 3134 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3135 ierr = MPI_Send(mess,icnt,type,to,tag,comm); 3136 if (type == MPIU_INT) { 3137 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3138 } else if (type == MPIU_SCALAR) { 3139 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3140 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3141 } 3142 return 0; 3143 } 3144 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 3145 { 3146 int ierr; 3147 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3148 MPI_Status status; 3149 PetscInt i,numchunks; 3150 PetscMPIInt icnt; 3151 3152 numchunks = cnt/CHUNKSIZE + 1; 3153 for (i=0; i<numchunks; i++) { 3154 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3155 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status); 3156 if (type == MPIU_INT) { 3157 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3158 } else if (type == MPIU_SCALAR) { 3159 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3160 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3161 } 3162 return 0; 3163 } 3164 3165 #undef __FUNCT__ 3166 #define __FUNCT__ "MatLoad_MPIAIJ" 3167 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3168 { 3169 PetscScalar *vals,*svals; 3170 MPI_Comm comm = ((PetscObject)viewer)->comm; 3171 PetscErrorCode ierr; 3172 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3173 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3174 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3175 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3176 PetscInt cend,cstart,n,*rowners,sizesset=1; 3177 int fd; 3178 3179 PetscFunctionBegin; 3180 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3181 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3182 if (!rank) { 3183 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3184 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3185 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3186 } 3187 3188 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3189 3190 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3191 M = header[1]; N = header[2]; 3192 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3193 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3194 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3195 3196 /* If global sizes are set, check if they are consistent with that given in the file */ 3197 if (sizesset) { 3198 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3199 } 3200 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3201 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3202 3203 /* determine ownership of all rows */ 3204 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3205 else m = newMat->rmap->n; /* Set by user */ 3206 3207 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3208 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3209 3210 /* First process needs enough room for process with most rows */ 3211 if (!rank) { 3212 mmax = rowners[1]; 3213 for (i=2; i<size; i++) { 3214 mmax = PetscMax(mmax,rowners[i]); 3215 } 3216 } else mmax = m; 3217 3218 rowners[0] = 0; 3219 for (i=2; i<=size; i++) { 3220 rowners[i] += rowners[i-1]; 3221 } 3222 rstart = rowners[rank]; 3223 rend = rowners[rank+1]; 3224 3225 /* distribute row lengths to all processors */ 3226 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3227 if (!rank) { 3228 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3229 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3230 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3231 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3232 for (j=0; j<m; j++) { 3233 procsnz[0] += ourlens[j]; 3234 } 3235 for (i=1; i<size; i++) { 3236 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3237 /* calculate the number of nonzeros on each processor */ 3238 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3239 procsnz[i] += rowlengths[j]; 3240 } 3241 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3242 } 3243 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3244 } else { 3245 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3246 } 3247 3248 if (!rank) { 3249 /* determine max buffer needed and allocate it */ 3250 maxnz = 0; 3251 for (i=0; i<size; i++) { 3252 maxnz = PetscMax(maxnz,procsnz[i]); 3253 } 3254 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3255 3256 /* read in my part of the matrix column indices */ 3257 nz = procsnz[0]; 3258 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3259 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3260 3261 /* read in every one elses and ship off */ 3262 for (i=1; i<size; i++) { 3263 nz = procsnz[i]; 3264 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3265 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3266 } 3267 ierr = PetscFree(cols);CHKERRQ(ierr); 3268 } else { 3269 /* determine buffer space needed for message */ 3270 nz = 0; 3271 for (i=0; i<m; i++) { 3272 nz += ourlens[i]; 3273 } 3274 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3275 3276 /* receive message of column indices*/ 3277 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3278 } 3279 3280 /* determine column ownership if matrix is not square */ 3281 if (N != M) { 3282 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3283 else n = newMat->cmap->n; 3284 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3285 cstart = cend - n; 3286 } else { 3287 cstart = rstart; 3288 cend = rend; 3289 n = cend - cstart; 3290 } 3291 3292 /* loop over local rows, determining number of off diagonal entries */ 3293 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3294 jj = 0; 3295 for (i=0; i<m; i++) { 3296 for (j=0; j<ourlens[i]; j++) { 3297 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3298 jj++; 3299 } 3300 } 3301 3302 for (i=0; i<m; i++) { 3303 ourlens[i] -= offlens[i]; 3304 } 3305 if (!sizesset) { 3306 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3307 } 3308 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3309 3310 for (i=0; i<m; i++) { 3311 ourlens[i] += offlens[i]; 3312 } 3313 3314 if (!rank) { 3315 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3316 3317 /* read in my part of the matrix numerical values */ 3318 nz = procsnz[0]; 3319 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3320 3321 /* insert into matrix */ 3322 jj = rstart; 3323 smycols = mycols; 3324 svals = vals; 3325 for (i=0; i<m; i++) { 3326 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3327 smycols += ourlens[i]; 3328 svals += ourlens[i]; 3329 jj++; 3330 } 3331 3332 /* read in other processors and ship out */ 3333 for (i=1; i<size; i++) { 3334 nz = procsnz[i]; 3335 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3336 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3337 } 3338 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3339 } else { 3340 /* receive numeric values */ 3341 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3342 3343 /* receive message of values*/ 3344 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3345 3346 /* insert into matrix */ 3347 jj = rstart; 3348 smycols = mycols; 3349 svals = vals; 3350 for (i=0; i<m; i++) { 3351 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3352 smycols += ourlens[i]; 3353 svals += ourlens[i]; 3354 jj++; 3355 } 3356 } 3357 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3358 ierr = PetscFree(vals);CHKERRQ(ierr); 3359 ierr = PetscFree(mycols);CHKERRQ(ierr); 3360 ierr = PetscFree(rowners);CHKERRQ(ierr); 3361 3362 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3363 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3364 PetscFunctionReturn(0); 3365 } 3366 3367 #undef __FUNCT__ 3368 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3369 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3370 { 3371 PetscErrorCode ierr; 3372 IS iscol_local; 3373 PetscInt csize; 3374 3375 PetscFunctionBegin; 3376 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3377 if (call == MAT_REUSE_MATRIX) { 3378 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3379 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3380 } else { 3381 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3382 } 3383 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3384 if (call == MAT_INITIAL_MATRIX) { 3385 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3386 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3387 } 3388 PetscFunctionReturn(0); 3389 } 3390 3391 #undef __FUNCT__ 3392 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3393 /* 3394 Not great since it makes two copies of the submatrix, first an SeqAIJ 3395 in local and then by concatenating the local matrices the end result. 3396 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3397 3398 Note: This requires a sequential iscol with all indices. 3399 */ 3400 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3401 { 3402 PetscErrorCode ierr; 3403 PetscMPIInt rank,size; 3404 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3405 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3406 Mat *local,M,Mreuse; 3407 MatScalar *vwork,*aa; 3408 MPI_Comm comm = ((PetscObject)mat)->comm; 3409 Mat_SeqAIJ *aij; 3410 3411 3412 PetscFunctionBegin; 3413 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3414 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3415 3416 if (call == MAT_REUSE_MATRIX) { 3417 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3418 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3419 local = &Mreuse; 3420 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3421 } else { 3422 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3423 Mreuse = *local; 3424 ierr = PetscFree(local);CHKERRQ(ierr); 3425 } 3426 3427 /* 3428 m - number of local rows 3429 n - number of columns (same on all processors) 3430 rstart - first row in new global matrix generated 3431 */ 3432 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3433 if (call == MAT_INITIAL_MATRIX) { 3434 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3435 ii = aij->i; 3436 jj = aij->j; 3437 3438 /* 3439 Determine the number of non-zeros in the diagonal and off-diagonal 3440 portions of the matrix in order to do correct preallocation 3441 */ 3442 3443 /* first get start and end of "diagonal" columns */ 3444 if (csize == PETSC_DECIDE) { 3445 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3446 if (mglobal == n) { /* square matrix */ 3447 nlocal = m; 3448 } else { 3449 nlocal = n/size + ((n % size) > rank); 3450 } 3451 } else { 3452 nlocal = csize; 3453 } 3454 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3455 rstart = rend - nlocal; 3456 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3457 3458 /* next, compute all the lengths */ 3459 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3460 olens = dlens + m; 3461 for (i=0; i<m; i++) { 3462 jend = ii[i+1] - ii[i]; 3463 olen = 0; 3464 dlen = 0; 3465 for (j=0; j<jend; j++) { 3466 if (*jj < rstart || *jj >= rend) olen++; 3467 else dlen++; 3468 jj++; 3469 } 3470 olens[i] = olen; 3471 dlens[i] = dlen; 3472 } 3473 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3474 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3475 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3476 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3477 ierr = PetscFree(dlens);CHKERRQ(ierr); 3478 } else { 3479 PetscInt ml,nl; 3480 3481 M = *newmat; 3482 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3483 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3484 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3485 /* 3486 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3487 rather than the slower MatSetValues(). 3488 */ 3489 M->was_assembled = PETSC_TRUE; 3490 M->assembled = PETSC_FALSE; 3491 } 3492 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3493 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3494 ii = aij->i; 3495 jj = aij->j; 3496 aa = aij->a; 3497 for (i=0; i<m; i++) { 3498 row = rstart + i; 3499 nz = ii[i+1] - ii[i]; 3500 cwork = jj; jj += nz; 3501 vwork = aa; aa += nz; 3502 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3503 } 3504 3505 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3506 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3507 *newmat = M; 3508 3509 /* save submatrix used in processor for next request */ 3510 if (call == MAT_INITIAL_MATRIX) { 3511 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3512 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3513 } 3514 3515 PetscFunctionReturn(0); 3516 } 3517 3518 EXTERN_C_BEGIN 3519 #undef __FUNCT__ 3520 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3521 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3522 { 3523 PetscInt m,cstart, cend,j,nnz,i,d; 3524 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3525 const PetscInt *JJ; 3526 PetscScalar *values; 3527 PetscErrorCode ierr; 3528 3529 PetscFunctionBegin; 3530 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3531 3532 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3533 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3534 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3535 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3536 m = B->rmap->n; 3537 cstart = B->cmap->rstart; 3538 cend = B->cmap->rend; 3539 rstart = B->rmap->rstart; 3540 3541 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3542 3543 #if defined(PETSC_USE_DEBUGGING) 3544 for (i=0; i<m; i++) { 3545 nnz = Ii[i+1]- Ii[i]; 3546 JJ = J + Ii[i]; 3547 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3548 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3549 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3550 } 3551 #endif 3552 3553 for (i=0; i<m; i++) { 3554 nnz = Ii[i+1]- Ii[i]; 3555 JJ = J + Ii[i]; 3556 nnz_max = PetscMax(nnz_max,nnz); 3557 d = 0; 3558 for (j=0; j<nnz; j++) { 3559 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3560 } 3561 d_nnz[i] = d; 3562 o_nnz[i] = nnz - d; 3563 } 3564 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3565 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3566 3567 if (v) values = (PetscScalar*)v; 3568 else { 3569 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3570 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3571 } 3572 3573 for (i=0; i<m; i++) { 3574 ii = i + rstart; 3575 nnz = Ii[i+1]- Ii[i]; 3576 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3577 } 3578 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3579 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3580 3581 if (!v) { 3582 ierr = PetscFree(values);CHKERRQ(ierr); 3583 } 3584 PetscFunctionReturn(0); 3585 } 3586 EXTERN_C_END 3587 3588 #undef __FUNCT__ 3589 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3590 /*@ 3591 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3592 (the default parallel PETSc format). 3593 3594 Collective on MPI_Comm 3595 3596 Input Parameters: 3597 + B - the matrix 3598 . i - the indices into j for the start of each local row (starts with zero) 3599 . j - the column indices for each local row (starts with zero) 3600 - v - optional values in the matrix 3601 3602 Level: developer 3603 3604 Notes: 3605 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3606 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3607 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3608 3609 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3610 3611 The format which is used for the sparse matrix input, is equivalent to a 3612 row-major ordering.. i.e for the following matrix, the input data expected is 3613 as shown: 3614 3615 1 0 0 3616 2 0 3 P0 3617 ------- 3618 4 5 6 P1 3619 3620 Process0 [P0]: rows_owned=[0,1] 3621 i = {0,1,3} [size = nrow+1 = 2+1] 3622 j = {0,0,2} [size = nz = 6] 3623 v = {1,2,3} [size = nz = 6] 3624 3625 Process1 [P1]: rows_owned=[2] 3626 i = {0,3} [size = nrow+1 = 1+1] 3627 j = {0,1,2} [size = nz = 6] 3628 v = {4,5,6} [size = nz = 6] 3629 3630 .keywords: matrix, aij, compressed row, sparse, parallel 3631 3632 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3633 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3634 @*/ 3635 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3636 { 3637 PetscErrorCode ierr; 3638 3639 PetscFunctionBegin; 3640 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3641 PetscFunctionReturn(0); 3642 } 3643 3644 #undef __FUNCT__ 3645 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3646 /*@C 3647 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3648 (the default parallel PETSc format). For good matrix assembly performance 3649 the user should preallocate the matrix storage by setting the parameters 3650 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3651 performance can be increased by more than a factor of 50. 3652 3653 Collective on MPI_Comm 3654 3655 Input Parameters: 3656 + A - the matrix 3657 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3658 (same value is used for all local rows) 3659 . d_nnz - array containing the number of nonzeros in the various rows of the 3660 DIAGONAL portion of the local submatrix (possibly different for each row) 3661 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3662 The size of this array is equal to the number of local rows, i.e 'm'. 3663 You must leave room for the diagonal entry even if it is zero. 3664 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3665 submatrix (same value is used for all local rows). 3666 - o_nnz - array containing the number of nonzeros in the various rows of the 3667 OFF-DIAGONAL portion of the local submatrix (possibly different for 3668 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3669 structure. The size of this array is equal to the number 3670 of local rows, i.e 'm'. 3671 3672 If the *_nnz parameter is given then the *_nz parameter is ignored 3673 3674 The AIJ format (also called the Yale sparse matrix format or 3675 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3676 storage. The stored row and column indices begin with zero. 3677 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3678 3679 The parallel matrix is partitioned such that the first m0 rows belong to 3680 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3681 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3682 3683 The DIAGONAL portion of the local submatrix of a processor can be defined 3684 as the submatrix which is obtained by extraction the part corresponding to 3685 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3686 first row that belongs to the processor, r2 is the last row belonging to 3687 the this processor, and c1-c2 is range of indices of the local part of a 3688 vector suitable for applying the matrix to. This is an mxn matrix. In the 3689 common case of a square matrix, the row and column ranges are the same and 3690 the DIAGONAL part is also square. The remaining portion of the local 3691 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3692 3693 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3694 3695 You can call MatGetInfo() to get information on how effective the preallocation was; 3696 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3697 You can also run with the option -info and look for messages with the string 3698 malloc in them to see if additional memory allocation was needed. 3699 3700 Example usage: 3701 3702 Consider the following 8x8 matrix with 34 non-zero values, that is 3703 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3704 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3705 as follows: 3706 3707 .vb 3708 1 2 0 | 0 3 0 | 0 4 3709 Proc0 0 5 6 | 7 0 0 | 8 0 3710 9 0 10 | 11 0 0 | 12 0 3711 ------------------------------------- 3712 13 0 14 | 15 16 17 | 0 0 3713 Proc1 0 18 0 | 19 20 21 | 0 0 3714 0 0 0 | 22 23 0 | 24 0 3715 ------------------------------------- 3716 Proc2 25 26 27 | 0 0 28 | 29 0 3717 30 0 0 | 31 32 33 | 0 34 3718 .ve 3719 3720 This can be represented as a collection of submatrices as: 3721 3722 .vb 3723 A B C 3724 D E F 3725 G H I 3726 .ve 3727 3728 Where the submatrices A,B,C are owned by proc0, D,E,F are 3729 owned by proc1, G,H,I are owned by proc2. 3730 3731 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3732 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3733 The 'M','N' parameters are 8,8, and have the same values on all procs. 3734 3735 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3736 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3737 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3738 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3739 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3740 matrix, ans [DF] as another SeqAIJ matrix. 3741 3742 When d_nz, o_nz parameters are specified, d_nz storage elements are 3743 allocated for every row of the local diagonal submatrix, and o_nz 3744 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3745 One way to choose d_nz and o_nz is to use the max nonzerors per local 3746 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3747 In this case, the values of d_nz,o_nz are: 3748 .vb 3749 proc0 : dnz = 2, o_nz = 2 3750 proc1 : dnz = 3, o_nz = 2 3751 proc2 : dnz = 1, o_nz = 4 3752 .ve 3753 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3754 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3755 for proc3. i.e we are using 12+15+10=37 storage locations to store 3756 34 values. 3757 3758 When d_nnz, o_nnz parameters are specified, the storage is specified 3759 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3760 In the above case the values for d_nnz,o_nnz are: 3761 .vb 3762 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3763 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3764 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3765 .ve 3766 Here the space allocated is sum of all the above values i.e 34, and 3767 hence pre-allocation is perfect. 3768 3769 Level: intermediate 3770 3771 .keywords: matrix, aij, compressed row, sparse, parallel 3772 3773 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3774 MPIAIJ, MatGetInfo() 3775 @*/ 3776 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3777 { 3778 PetscErrorCode ierr; 3779 3780 PetscFunctionBegin; 3781 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3782 PetscFunctionReturn(0); 3783 } 3784 3785 #undef __FUNCT__ 3786 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3787 /*@ 3788 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3789 CSR format the local rows. 3790 3791 Collective on MPI_Comm 3792 3793 Input Parameters: 3794 + comm - MPI communicator 3795 . m - number of local rows (Cannot be PETSC_DECIDE) 3796 . n - This value should be the same as the local size used in creating the 3797 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3798 calculated if N is given) For square matrices n is almost always m. 3799 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3800 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3801 . i - row indices 3802 . j - column indices 3803 - a - matrix values 3804 3805 Output Parameter: 3806 . mat - the matrix 3807 3808 Level: intermediate 3809 3810 Notes: 3811 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3812 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3813 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3814 3815 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3816 3817 The format which is used for the sparse matrix input, is equivalent to a 3818 row-major ordering.. i.e for the following matrix, the input data expected is 3819 as shown: 3820 3821 1 0 0 3822 2 0 3 P0 3823 ------- 3824 4 5 6 P1 3825 3826 Process0 [P0]: rows_owned=[0,1] 3827 i = {0,1,3} [size = nrow+1 = 2+1] 3828 j = {0,0,2} [size = nz = 6] 3829 v = {1,2,3} [size = nz = 6] 3830 3831 Process1 [P1]: rows_owned=[2] 3832 i = {0,3} [size = nrow+1 = 1+1] 3833 j = {0,1,2} [size = nz = 6] 3834 v = {4,5,6} [size = nz = 6] 3835 3836 .keywords: matrix, aij, compressed row, sparse, parallel 3837 3838 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3839 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3840 @*/ 3841 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3842 { 3843 PetscErrorCode ierr; 3844 3845 PetscFunctionBegin; 3846 if (i[0]) { 3847 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3848 } 3849 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3850 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3851 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3852 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3853 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3854 PetscFunctionReturn(0); 3855 } 3856 3857 #undef __FUNCT__ 3858 #define __FUNCT__ "MatCreateMPIAIJ" 3859 /*@C 3860 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3861 (the default parallel PETSc format). For good matrix assembly performance 3862 the user should preallocate the matrix storage by setting the parameters 3863 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3864 performance can be increased by more than a factor of 50. 3865 3866 Collective on MPI_Comm 3867 3868 Input Parameters: 3869 + comm - MPI communicator 3870 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3871 This value should be the same as the local size used in creating the 3872 y vector for the matrix-vector product y = Ax. 3873 . n - This value should be the same as the local size used in creating the 3874 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3875 calculated if N is given) For square matrices n is almost always m. 3876 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3877 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3878 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3879 (same value is used for all local rows) 3880 . d_nnz - array containing the number of nonzeros in the various rows of the 3881 DIAGONAL portion of the local submatrix (possibly different for each row) 3882 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3883 The size of this array is equal to the number of local rows, i.e 'm'. 3884 You must leave room for the diagonal entry even if it is zero. 3885 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3886 submatrix (same value is used for all local rows). 3887 - o_nnz - array containing the number of nonzeros in the various rows of the 3888 OFF-DIAGONAL portion of the local submatrix (possibly different for 3889 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3890 structure. The size of this array is equal to the number 3891 of local rows, i.e 'm'. 3892 3893 Output Parameter: 3894 . A - the matrix 3895 3896 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3897 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3898 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3899 3900 Notes: 3901 If the *_nnz parameter is given then the *_nz parameter is ignored 3902 3903 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3904 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3905 storage requirements for this matrix. 3906 3907 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3908 processor than it must be used on all processors that share the object for 3909 that argument. 3910 3911 The user MUST specify either the local or global matrix dimensions 3912 (possibly both). 3913 3914 The parallel matrix is partitioned across processors such that the 3915 first m0 rows belong to process 0, the next m1 rows belong to 3916 process 1, the next m2 rows belong to process 2 etc.. where 3917 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3918 values corresponding to [m x N] submatrix. 3919 3920 The columns are logically partitioned with the n0 columns belonging 3921 to 0th partition, the next n1 columns belonging to the next 3922 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3923 3924 The DIAGONAL portion of the local submatrix on any given processor 3925 is the submatrix corresponding to the rows and columns m,n 3926 corresponding to the given processor. i.e diagonal matrix on 3927 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3928 etc. The remaining portion of the local submatrix [m x (N-n)] 3929 constitute the OFF-DIAGONAL portion. The example below better 3930 illustrates this concept. 3931 3932 For a square global matrix we define each processor's diagonal portion 3933 to be its local rows and the corresponding columns (a square submatrix); 3934 each processor's off-diagonal portion encompasses the remainder of the 3935 local matrix (a rectangular submatrix). 3936 3937 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3938 3939 When calling this routine with a single process communicator, a matrix of 3940 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3941 type of communicator, use the construction mechanism: 3942 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3943 3944 By default, this format uses inodes (identical nodes) when possible. 3945 We search for consecutive rows with the same nonzero structure, thereby 3946 reusing matrix information to achieve increased efficiency. 3947 3948 Options Database Keys: 3949 + -mat_no_inode - Do not use inodes 3950 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3951 - -mat_aij_oneindex - Internally use indexing starting at 1 3952 rather than 0. Note that when calling MatSetValues(), 3953 the user still MUST index entries starting at 0! 3954 3955 3956 Example usage: 3957 3958 Consider the following 8x8 matrix with 34 non-zero values, that is 3959 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3960 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3961 as follows: 3962 3963 .vb 3964 1 2 0 | 0 3 0 | 0 4 3965 Proc0 0 5 6 | 7 0 0 | 8 0 3966 9 0 10 | 11 0 0 | 12 0 3967 ------------------------------------- 3968 13 0 14 | 15 16 17 | 0 0 3969 Proc1 0 18 0 | 19 20 21 | 0 0 3970 0 0 0 | 22 23 0 | 24 0 3971 ------------------------------------- 3972 Proc2 25 26 27 | 0 0 28 | 29 0 3973 30 0 0 | 31 32 33 | 0 34 3974 .ve 3975 3976 This can be represented as a collection of submatrices as: 3977 3978 .vb 3979 A B C 3980 D E F 3981 G H I 3982 .ve 3983 3984 Where the submatrices A,B,C are owned by proc0, D,E,F are 3985 owned by proc1, G,H,I are owned by proc2. 3986 3987 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3988 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3989 The 'M','N' parameters are 8,8, and have the same values on all procs. 3990 3991 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3992 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3993 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3994 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3995 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3996 matrix, ans [DF] as another SeqAIJ matrix. 3997 3998 When d_nz, o_nz parameters are specified, d_nz storage elements are 3999 allocated for every row of the local diagonal submatrix, and o_nz 4000 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4001 One way to choose d_nz and o_nz is to use the max nonzerors per local 4002 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4003 In this case, the values of d_nz,o_nz are: 4004 .vb 4005 proc0 : dnz = 2, o_nz = 2 4006 proc1 : dnz = 3, o_nz = 2 4007 proc2 : dnz = 1, o_nz = 4 4008 .ve 4009 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4010 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4011 for proc3. i.e we are using 12+15+10=37 storage locations to store 4012 34 values. 4013 4014 When d_nnz, o_nnz parameters are specified, the storage is specified 4015 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4016 In the above case the values for d_nnz,o_nnz are: 4017 .vb 4018 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4019 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4020 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4021 .ve 4022 Here the space allocated is sum of all the above values i.e 34, and 4023 hence pre-allocation is perfect. 4024 4025 Level: intermediate 4026 4027 .keywords: matrix, aij, compressed row, sparse, parallel 4028 4029 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4030 MPIAIJ, MatCreateMPIAIJWithArrays() 4031 @*/ 4032 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4033 { 4034 PetscErrorCode ierr; 4035 PetscMPIInt size; 4036 4037 PetscFunctionBegin; 4038 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4039 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4040 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4041 if (size > 1) { 4042 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4043 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4044 } else { 4045 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4046 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4047 } 4048 PetscFunctionReturn(0); 4049 } 4050 4051 #undef __FUNCT__ 4052 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4053 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4054 { 4055 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4056 4057 PetscFunctionBegin; 4058 *Ad = a->A; 4059 *Ao = a->B; 4060 *colmap = a->garray; 4061 PetscFunctionReturn(0); 4062 } 4063 4064 #undef __FUNCT__ 4065 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4066 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4067 { 4068 PetscErrorCode ierr; 4069 PetscInt i; 4070 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4071 4072 PetscFunctionBegin; 4073 if (coloring->ctype == IS_COLORING_GLOBAL) { 4074 ISColoringValue *allcolors,*colors; 4075 ISColoring ocoloring; 4076 4077 /* set coloring for diagonal portion */ 4078 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4079 4080 /* set coloring for off-diagonal portion */ 4081 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4082 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4083 for (i=0; i<a->B->cmap->n; i++) { 4084 colors[i] = allcolors[a->garray[i]]; 4085 } 4086 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4087 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4088 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4089 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4090 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4091 ISColoringValue *colors; 4092 PetscInt *larray; 4093 ISColoring ocoloring; 4094 4095 /* set coloring for diagonal portion */ 4096 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4097 for (i=0; i<a->A->cmap->n; i++) { 4098 larray[i] = i + A->cmap->rstart; 4099 } 4100 ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4101 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4102 for (i=0; i<a->A->cmap->n; i++) { 4103 colors[i] = coloring->colors[larray[i]]; 4104 } 4105 ierr = PetscFree(larray);CHKERRQ(ierr); 4106 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4107 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4108 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4109 4110 /* set coloring for off-diagonal portion */ 4111 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4112 ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4113 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4114 for (i=0; i<a->B->cmap->n; i++) { 4115 colors[i] = coloring->colors[larray[i]]; 4116 } 4117 ierr = PetscFree(larray);CHKERRQ(ierr); 4118 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4119 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4120 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4121 } else { 4122 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4123 } 4124 4125 PetscFunctionReturn(0); 4126 } 4127 4128 #if defined(PETSC_HAVE_ADIC) 4129 #undef __FUNCT__ 4130 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4131 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4132 { 4133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4134 PetscErrorCode ierr; 4135 4136 PetscFunctionBegin; 4137 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4138 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4139 PetscFunctionReturn(0); 4140 } 4141 #endif 4142 4143 #undef __FUNCT__ 4144 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4145 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4146 { 4147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4148 PetscErrorCode ierr; 4149 4150 PetscFunctionBegin; 4151 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4152 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4153 PetscFunctionReturn(0); 4154 } 4155 4156 #undef __FUNCT__ 4157 #define __FUNCT__ "MatMerge" 4158 /*@ 4159 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4160 matrices from each processor 4161 4162 Collective on MPI_Comm 4163 4164 Input Parameters: 4165 + comm - the communicators the parallel matrix will live on 4166 . inmat - the input sequential matrices 4167 . n - number of local columns (or PETSC_DECIDE) 4168 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4169 4170 Output Parameter: 4171 . outmat - the parallel matrix generated 4172 4173 Level: advanced 4174 4175 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4176 4177 @*/ 4178 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4179 { 4180 PetscErrorCode ierr; 4181 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4182 PetscInt *indx; 4183 PetscScalar *values; 4184 4185 PetscFunctionBegin; 4186 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4187 if (scall == MAT_INITIAL_MATRIX){ 4188 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4189 if (n == PETSC_DECIDE){ 4190 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4191 } 4192 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4193 rstart -= m; 4194 4195 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4196 for (i=0;i<m;i++) { 4197 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4198 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4199 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4200 } 4201 /* This routine will ONLY return MPIAIJ type matrix */ 4202 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4203 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4204 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4205 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4206 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4207 4208 } else if (scall == MAT_REUSE_MATRIX){ 4209 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4210 } else { 4211 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4212 } 4213 4214 for (i=0;i<m;i++) { 4215 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4216 Ii = i + rstart; 4217 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4218 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4219 } 4220 ierr = MatDestroy(inmat);CHKERRQ(ierr); 4221 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4222 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4223 4224 PetscFunctionReturn(0); 4225 } 4226 4227 #undef __FUNCT__ 4228 #define __FUNCT__ "MatFileSplit" 4229 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4230 { 4231 PetscErrorCode ierr; 4232 PetscMPIInt rank; 4233 PetscInt m,N,i,rstart,nnz; 4234 size_t len; 4235 const PetscInt *indx; 4236 PetscViewer out; 4237 char *name; 4238 Mat B; 4239 const PetscScalar *values; 4240 4241 PetscFunctionBegin; 4242 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4243 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4244 /* Should this be the type of the diagonal block of A? */ 4245 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4246 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4247 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4248 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4249 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4250 for (i=0;i<m;i++) { 4251 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4252 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4253 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4254 } 4255 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4256 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4257 4258 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4259 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4260 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4261 sprintf(name,"%s.%d",outfile,rank); 4262 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4263 ierr = PetscFree(name); 4264 ierr = MatView(B,out);CHKERRQ(ierr); 4265 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4266 ierr = MatDestroy(B);CHKERRQ(ierr); 4267 PetscFunctionReturn(0); 4268 } 4269 4270 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 4271 #undef __FUNCT__ 4272 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4273 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4274 { 4275 PetscErrorCode ierr; 4276 Mat_Merge_SeqsToMPI *merge; 4277 PetscContainer container; 4278 4279 PetscFunctionBegin; 4280 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4281 if (container) { 4282 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4283 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4284 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4285 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4286 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4287 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4288 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4289 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4290 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4291 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4292 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4293 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4294 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4295 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4296 4297 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4298 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4299 } 4300 ierr = PetscFree(merge);CHKERRQ(ierr); 4301 4302 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4303 PetscFunctionReturn(0); 4304 } 4305 4306 #include "../src/mat/utils/freespace.h" 4307 #include "petscbt.h" 4308 4309 #undef __FUNCT__ 4310 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4311 /*@C 4312 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4313 matrices from each processor 4314 4315 Collective on MPI_Comm 4316 4317 Input Parameters: 4318 + comm - the communicators the parallel matrix will live on 4319 . seqmat - the input sequential matrices 4320 . m - number of local rows (or PETSC_DECIDE) 4321 . n - number of local columns (or PETSC_DECIDE) 4322 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4323 4324 Output Parameter: 4325 . mpimat - the parallel matrix generated 4326 4327 Level: advanced 4328 4329 Notes: 4330 The dimensions of the sequential matrix in each processor MUST be the same. 4331 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4332 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4333 @*/ 4334 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4335 { 4336 PetscErrorCode ierr; 4337 MPI_Comm comm=((PetscObject)mpimat)->comm; 4338 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4339 PetscMPIInt size,rank,taga,*len_s; 4340 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4341 PetscInt proc,m; 4342 PetscInt **buf_ri,**buf_rj; 4343 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4344 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4345 MPI_Request *s_waits,*r_waits; 4346 MPI_Status *status; 4347 MatScalar *aa=a->a; 4348 MatScalar **abuf_r,*ba_i; 4349 Mat_Merge_SeqsToMPI *merge; 4350 PetscContainer container; 4351 4352 PetscFunctionBegin; 4353 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4354 4355 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4356 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4357 4358 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4359 if (container) { 4360 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4361 } 4362 bi = merge->bi; 4363 bj = merge->bj; 4364 buf_ri = merge->buf_ri; 4365 buf_rj = merge->buf_rj; 4366 4367 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4368 owners = merge->rowmap->range; 4369 len_s = merge->len_s; 4370 4371 /* send and recv matrix values */ 4372 /*-----------------------------*/ 4373 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4374 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4375 4376 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4377 for (proc=0,k=0; proc<size; proc++){ 4378 if (!len_s[proc]) continue; 4379 i = owners[proc]; 4380 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4381 k++; 4382 } 4383 4384 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4385 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4386 ierr = PetscFree(status);CHKERRQ(ierr); 4387 4388 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4389 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4390 4391 /* insert mat values of mpimat */ 4392 /*----------------------------*/ 4393 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4394 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4395 4396 for (k=0; k<merge->nrecv; k++){ 4397 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4398 nrows = *(buf_ri_k[k]); 4399 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4400 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4401 } 4402 4403 /* set values of ba */ 4404 m = merge->rowmap->n; 4405 for (i=0; i<m; i++) { 4406 arow = owners[rank] + i; 4407 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4408 bnzi = bi[i+1] - bi[i]; 4409 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4410 4411 /* add local non-zero vals of this proc's seqmat into ba */ 4412 anzi = ai[arow+1] - ai[arow]; 4413 aj = a->j + ai[arow]; 4414 aa = a->a + ai[arow]; 4415 nextaj = 0; 4416 for (j=0; nextaj<anzi; j++){ 4417 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4418 ba_i[j] += aa[nextaj++]; 4419 } 4420 } 4421 4422 /* add received vals into ba */ 4423 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4424 /* i-th row */ 4425 if (i == *nextrow[k]) { 4426 anzi = *(nextai[k]+1) - *nextai[k]; 4427 aj = buf_rj[k] + *(nextai[k]); 4428 aa = abuf_r[k] + *(nextai[k]); 4429 nextaj = 0; 4430 for (j=0; nextaj<anzi; j++){ 4431 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4432 ba_i[j] += aa[nextaj++]; 4433 } 4434 } 4435 nextrow[k]++; nextai[k]++; 4436 } 4437 } 4438 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4439 } 4440 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4441 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4442 4443 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4444 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4445 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4446 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4447 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4448 PetscFunctionReturn(0); 4449 } 4450 4451 #undef __FUNCT__ 4452 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4453 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4454 { 4455 PetscErrorCode ierr; 4456 Mat B_mpi; 4457 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4458 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4459 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4460 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4461 PetscInt len,proc,*dnz,*onz; 4462 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4463 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4464 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4465 MPI_Status *status; 4466 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4467 PetscBT lnkbt; 4468 Mat_Merge_SeqsToMPI *merge; 4469 PetscContainer container; 4470 4471 PetscFunctionBegin; 4472 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4473 4474 /* make sure it is a PETSc comm */ 4475 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4476 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4477 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4478 4479 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4480 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4481 4482 /* determine row ownership */ 4483 /*---------------------------------------------------------*/ 4484 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4485 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4486 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4487 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4488 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4489 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4490 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4491 4492 m = merge->rowmap->n; 4493 M = merge->rowmap->N; 4494 owners = merge->rowmap->range; 4495 4496 /* determine the number of messages to send, their lengths */ 4497 /*---------------------------------------------------------*/ 4498 len_s = merge->len_s; 4499 4500 len = 0; /* length of buf_si[] */ 4501 merge->nsend = 0; 4502 for (proc=0; proc<size; proc++){ 4503 len_si[proc] = 0; 4504 if (proc == rank){ 4505 len_s[proc] = 0; 4506 } else { 4507 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4508 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4509 } 4510 if (len_s[proc]) { 4511 merge->nsend++; 4512 nrows = 0; 4513 for (i=owners[proc]; i<owners[proc+1]; i++){ 4514 if (ai[i+1] > ai[i]) nrows++; 4515 } 4516 len_si[proc] = 2*(nrows+1); 4517 len += len_si[proc]; 4518 } 4519 } 4520 4521 /* determine the number and length of messages to receive for ij-structure */ 4522 /*-------------------------------------------------------------------------*/ 4523 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4524 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4525 4526 /* post the Irecv of j-structure */ 4527 /*-------------------------------*/ 4528 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4529 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4530 4531 /* post the Isend of j-structure */ 4532 /*--------------------------------*/ 4533 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4534 4535 for (proc=0, k=0; proc<size; proc++){ 4536 if (!len_s[proc]) continue; 4537 i = owners[proc]; 4538 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4539 k++; 4540 } 4541 4542 /* receives and sends of j-structure are complete */ 4543 /*------------------------------------------------*/ 4544 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4545 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4546 4547 /* send and recv i-structure */ 4548 /*---------------------------*/ 4549 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4550 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4551 4552 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4553 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4554 for (proc=0,k=0; proc<size; proc++){ 4555 if (!len_s[proc]) continue; 4556 /* form outgoing message for i-structure: 4557 buf_si[0]: nrows to be sent 4558 [1:nrows]: row index (global) 4559 [nrows+1:2*nrows+1]: i-structure index 4560 */ 4561 /*-------------------------------------------*/ 4562 nrows = len_si[proc]/2 - 1; 4563 buf_si_i = buf_si + nrows+1; 4564 buf_si[0] = nrows; 4565 buf_si_i[0] = 0; 4566 nrows = 0; 4567 for (i=owners[proc]; i<owners[proc+1]; i++){ 4568 anzi = ai[i+1] - ai[i]; 4569 if (anzi) { 4570 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4571 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4572 nrows++; 4573 } 4574 } 4575 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4576 k++; 4577 buf_si += len_si[proc]; 4578 } 4579 4580 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4581 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4582 4583 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4584 for (i=0; i<merge->nrecv; i++){ 4585 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4586 } 4587 4588 ierr = PetscFree(len_si);CHKERRQ(ierr); 4589 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4590 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4591 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4592 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4593 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4594 ierr = PetscFree(status);CHKERRQ(ierr); 4595 4596 /* compute a local seq matrix in each processor */ 4597 /*----------------------------------------------*/ 4598 /* allocate bi array and free space for accumulating nonzero column info */ 4599 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4600 bi[0] = 0; 4601 4602 /* create and initialize a linked list */ 4603 nlnk = N+1; 4604 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4605 4606 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4607 len = 0; 4608 len = ai[owners[rank+1]] - ai[owners[rank]]; 4609 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4610 current_space = free_space; 4611 4612 /* determine symbolic info for each local row */ 4613 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4614 4615 for (k=0; k<merge->nrecv; k++){ 4616 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4617 nrows = *buf_ri_k[k]; 4618 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4619 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4620 } 4621 4622 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4623 len = 0; 4624 for (i=0;i<m;i++) { 4625 bnzi = 0; 4626 /* add local non-zero cols of this proc's seqmat into lnk */ 4627 arow = owners[rank] + i; 4628 anzi = ai[arow+1] - ai[arow]; 4629 aj = a->j + ai[arow]; 4630 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4631 bnzi += nlnk; 4632 /* add received col data into lnk */ 4633 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4634 if (i == *nextrow[k]) { /* i-th row */ 4635 anzi = *(nextai[k]+1) - *nextai[k]; 4636 aj = buf_rj[k] + *nextai[k]; 4637 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4638 bnzi += nlnk; 4639 nextrow[k]++; nextai[k]++; 4640 } 4641 } 4642 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4643 4644 /* if free space is not available, make more free space */ 4645 if (current_space->local_remaining<bnzi) { 4646 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4647 nspacedouble++; 4648 } 4649 /* copy data into free space, then initialize lnk */ 4650 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4651 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4652 4653 current_space->array += bnzi; 4654 current_space->local_used += bnzi; 4655 current_space->local_remaining -= bnzi; 4656 4657 bi[i+1] = bi[i] + bnzi; 4658 } 4659 4660 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4661 4662 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4663 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4664 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4665 4666 /* create symbolic parallel matrix B_mpi */ 4667 /*---------------------------------------*/ 4668 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4669 if (n==PETSC_DECIDE) { 4670 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4671 } else { 4672 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4673 } 4674 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4675 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4676 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4677 4678 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4679 B_mpi->assembled = PETSC_FALSE; 4680 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4681 merge->bi = bi; 4682 merge->bj = bj; 4683 merge->buf_ri = buf_ri; 4684 merge->buf_rj = buf_rj; 4685 merge->coi = PETSC_NULL; 4686 merge->coj = PETSC_NULL; 4687 merge->owners_co = PETSC_NULL; 4688 4689 /* attach the supporting struct to B_mpi for reuse */ 4690 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4691 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4692 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4693 *mpimat = B_mpi; 4694 4695 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4696 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4697 PetscFunctionReturn(0); 4698 } 4699 4700 #undef __FUNCT__ 4701 #define __FUNCT__ "MatMerge_SeqsToMPI" 4702 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4703 { 4704 PetscErrorCode ierr; 4705 4706 PetscFunctionBegin; 4707 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4708 if (scall == MAT_INITIAL_MATRIX){ 4709 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4710 } 4711 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4712 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4713 PetscFunctionReturn(0); 4714 } 4715 4716 #undef __FUNCT__ 4717 #define __FUNCT__ "MatGetLocalMat" 4718 /*@ 4719 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4720 4721 Not Collective 4722 4723 Input Parameters: 4724 + A - the matrix 4725 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4726 4727 Output Parameter: 4728 . A_loc - the local sequential matrix generated 4729 4730 Level: developer 4731 4732 @*/ 4733 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4734 { 4735 PetscErrorCode ierr; 4736 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4737 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4738 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4739 MatScalar *aa=a->a,*ba=b->a,*cam; 4740 PetscScalar *ca; 4741 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4742 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4743 4744 PetscFunctionBegin; 4745 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4746 if (scall == MAT_INITIAL_MATRIX){ 4747 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4748 ci[0] = 0; 4749 for (i=0; i<am; i++){ 4750 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4751 } 4752 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4753 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4754 k = 0; 4755 for (i=0; i<am; i++) { 4756 ncols_o = bi[i+1] - bi[i]; 4757 ncols_d = ai[i+1] - ai[i]; 4758 /* off-diagonal portion of A */ 4759 for (jo=0; jo<ncols_o; jo++) { 4760 col = cmap[*bj]; 4761 if (col >= cstart) break; 4762 cj[k] = col; bj++; 4763 ca[k++] = *ba++; 4764 } 4765 /* diagonal portion of A */ 4766 for (j=0; j<ncols_d; j++) { 4767 cj[k] = cstart + *aj++; 4768 ca[k++] = *aa++; 4769 } 4770 /* off-diagonal portion of A */ 4771 for (j=jo; j<ncols_o; j++) { 4772 cj[k] = cmap[*bj++]; 4773 ca[k++] = *ba++; 4774 } 4775 } 4776 /* put together the new matrix */ 4777 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4778 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4779 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4780 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4781 mat->free_a = PETSC_TRUE; 4782 mat->free_ij = PETSC_TRUE; 4783 mat->nonew = 0; 4784 } else if (scall == MAT_REUSE_MATRIX){ 4785 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4786 ci = mat->i; cj = mat->j; cam = mat->a; 4787 for (i=0; i<am; i++) { 4788 /* off-diagonal portion of A */ 4789 ncols_o = bi[i+1] - bi[i]; 4790 for (jo=0; jo<ncols_o; jo++) { 4791 col = cmap[*bj]; 4792 if (col >= cstart) break; 4793 *cam++ = *ba++; bj++; 4794 } 4795 /* diagonal portion of A */ 4796 ncols_d = ai[i+1] - ai[i]; 4797 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4798 /* off-diagonal portion of A */ 4799 for (j=jo; j<ncols_o; j++) { 4800 *cam++ = *ba++; bj++; 4801 } 4802 } 4803 } else { 4804 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4805 } 4806 4807 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4808 PetscFunctionReturn(0); 4809 } 4810 4811 #undef __FUNCT__ 4812 #define __FUNCT__ "MatGetLocalMatCondensed" 4813 /*@C 4814 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4815 4816 Not Collective 4817 4818 Input Parameters: 4819 + A - the matrix 4820 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4821 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4822 4823 Output Parameter: 4824 . A_loc - the local sequential matrix generated 4825 4826 Level: developer 4827 4828 @*/ 4829 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4830 { 4831 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4832 PetscErrorCode ierr; 4833 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4834 IS isrowa,iscola; 4835 Mat *aloc; 4836 4837 PetscFunctionBegin; 4838 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4839 if (!row){ 4840 start = A->rmap->rstart; end = A->rmap->rend; 4841 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4842 } else { 4843 isrowa = *row; 4844 } 4845 if (!col){ 4846 start = A->cmap->rstart; 4847 cmap = a->garray; 4848 nzA = a->A->cmap->n; 4849 nzB = a->B->cmap->n; 4850 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4851 ncols = 0; 4852 for (i=0; i<nzB; i++) { 4853 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4854 else break; 4855 } 4856 imark = i; 4857 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4858 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4859 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4860 } else { 4861 iscola = *col; 4862 } 4863 if (scall != MAT_INITIAL_MATRIX){ 4864 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4865 aloc[0] = *A_loc; 4866 } 4867 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4868 *A_loc = aloc[0]; 4869 ierr = PetscFree(aloc);CHKERRQ(ierr); 4870 if (!row){ 4871 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4872 } 4873 if (!col){ 4874 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4875 } 4876 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4877 PetscFunctionReturn(0); 4878 } 4879 4880 #undef __FUNCT__ 4881 #define __FUNCT__ "MatGetBrowsOfAcols" 4882 /*@C 4883 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4884 4885 Collective on Mat 4886 4887 Input Parameters: 4888 + A,B - the matrices in mpiaij format 4889 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4890 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4891 4892 Output Parameter: 4893 + rowb, colb - index sets of rows and columns of B to extract 4894 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4895 - B_seq - the sequential matrix generated 4896 4897 Level: developer 4898 4899 @*/ 4900 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4901 { 4902 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4903 PetscErrorCode ierr; 4904 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4905 IS isrowb,iscolb; 4906 Mat *bseq; 4907 4908 PetscFunctionBegin; 4909 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4910 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4911 } 4912 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4913 4914 if (scall == MAT_INITIAL_MATRIX){ 4915 start = A->cmap->rstart; 4916 cmap = a->garray; 4917 nzA = a->A->cmap->n; 4918 nzB = a->B->cmap->n; 4919 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4920 ncols = 0; 4921 for (i=0; i<nzB; i++) { /* row < local row index */ 4922 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4923 else break; 4924 } 4925 imark = i; 4926 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4927 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4928 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4929 *brstart = imark; 4930 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4931 } else { 4932 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4933 isrowb = *rowb; iscolb = *colb; 4934 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4935 bseq[0] = *B_seq; 4936 } 4937 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4938 *B_seq = bseq[0]; 4939 ierr = PetscFree(bseq);CHKERRQ(ierr); 4940 if (!rowb){ 4941 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4942 } else { 4943 *rowb = isrowb; 4944 } 4945 if (!colb){ 4946 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4947 } else { 4948 *colb = iscolb; 4949 } 4950 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4951 PetscFunctionReturn(0); 4952 } 4953 4954 #undef __FUNCT__ 4955 #define __FUNCT__ "MatGetBrowsOfAoCols" 4956 /*@C 4957 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4958 of the OFF-DIAGONAL portion of local A 4959 4960 Collective on Mat 4961 4962 Input Parameters: 4963 + A,B - the matrices in mpiaij format 4964 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4965 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4966 . startsj_r - similar to startsj for receives 4967 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4968 4969 Output Parameter: 4970 + B_oth - the sequential matrix generated 4971 4972 Level: developer 4973 4974 @*/ 4975 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4976 { 4977 VecScatter_MPI_General *gen_to,*gen_from; 4978 PetscErrorCode ierr; 4979 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4980 Mat_SeqAIJ *b_oth; 4981 VecScatter ctx=a->Mvctx; 4982 MPI_Comm comm=((PetscObject)ctx)->comm; 4983 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4984 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4985 PetscScalar *rvalues,*svalues; 4986 MatScalar *b_otha,*bufa,*bufA; 4987 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4988 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4989 MPI_Status *sstatus,rstatus; 4990 PetscMPIInt jj; 4991 PetscInt *cols,sbs,rbs; 4992 PetscScalar *vals; 4993 4994 PetscFunctionBegin; 4995 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4996 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4997 } 4998 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4999 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5000 5001 gen_to = (VecScatter_MPI_General*)ctx->todata; 5002 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5003 rvalues = gen_from->values; /* holds the length of receiving row */ 5004 svalues = gen_to->values; /* holds the length of sending row */ 5005 nrecvs = gen_from->n; 5006 nsends = gen_to->n; 5007 5008 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5009 srow = gen_to->indices; /* local row index to be sent */ 5010 sstarts = gen_to->starts; 5011 sprocs = gen_to->procs; 5012 sstatus = gen_to->sstatus; 5013 sbs = gen_to->bs; 5014 rstarts = gen_from->starts; 5015 rprocs = gen_from->procs; 5016 rbs = gen_from->bs; 5017 5018 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5019 if (scall == MAT_INITIAL_MATRIX){ 5020 /* i-array */ 5021 /*---------*/ 5022 /* post receives */ 5023 for (i=0; i<nrecvs; i++){ 5024 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5025 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5026 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5027 } 5028 5029 /* pack the outgoing message */ 5030 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5031 sstartsj[0] = 0; rstartsj[0] = 0; 5032 len = 0; /* total length of j or a array to be sent */ 5033 k = 0; 5034 for (i=0; i<nsends; i++){ 5035 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5036 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5037 for (j=0; j<nrows; j++) { 5038 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5039 for (l=0; l<sbs; l++){ 5040 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5041 rowlen[j*sbs+l] = ncols; 5042 len += ncols; 5043 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5044 } 5045 k++; 5046 } 5047 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5048 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5049 } 5050 /* recvs and sends of i-array are completed */ 5051 i = nrecvs; 5052 while (i--) { 5053 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5054 } 5055 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5056 5057 /* allocate buffers for sending j and a arrays */ 5058 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5059 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5060 5061 /* create i-array of B_oth */ 5062 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5063 b_othi[0] = 0; 5064 len = 0; /* total length of j or a array to be received */ 5065 k = 0; 5066 for (i=0; i<nrecvs; i++){ 5067 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5068 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5069 for (j=0; j<nrows; j++) { 5070 b_othi[k+1] = b_othi[k] + rowlen[j]; 5071 len += rowlen[j]; k++; 5072 } 5073 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5074 } 5075 5076 /* allocate space for j and a arrrays of B_oth */ 5077 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5078 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5079 5080 /* j-array */ 5081 /*---------*/ 5082 /* post receives of j-array */ 5083 for (i=0; i<nrecvs; i++){ 5084 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5085 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5086 } 5087 5088 /* pack the outgoing message j-array */ 5089 k = 0; 5090 for (i=0; i<nsends; i++){ 5091 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5092 bufJ = bufj+sstartsj[i]; 5093 for (j=0; j<nrows; j++) { 5094 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5095 for (ll=0; ll<sbs; ll++){ 5096 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5097 for (l=0; l<ncols; l++){ 5098 *bufJ++ = cols[l]; 5099 } 5100 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5101 } 5102 } 5103 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5104 } 5105 5106 /* recvs and sends of j-array are completed */ 5107 i = nrecvs; 5108 while (i--) { 5109 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5110 } 5111 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5112 } else if (scall == MAT_REUSE_MATRIX){ 5113 sstartsj = *startsj; 5114 rstartsj = *startsj_r; 5115 bufa = *bufa_ptr; 5116 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5117 b_otha = b_oth->a; 5118 } else { 5119 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5120 } 5121 5122 /* a-array */ 5123 /*---------*/ 5124 /* post receives of a-array */ 5125 for (i=0; i<nrecvs; i++){ 5126 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5127 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5128 } 5129 5130 /* pack the outgoing message a-array */ 5131 k = 0; 5132 for (i=0; i<nsends; i++){ 5133 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5134 bufA = bufa+sstartsj[i]; 5135 for (j=0; j<nrows; j++) { 5136 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5137 for (ll=0; ll<sbs; ll++){ 5138 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5139 for (l=0; l<ncols; l++){ 5140 *bufA++ = vals[l]; 5141 } 5142 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5143 } 5144 } 5145 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5146 } 5147 /* recvs and sends of a-array are completed */ 5148 i = nrecvs; 5149 while (i--) { 5150 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5151 } 5152 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5153 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5154 5155 if (scall == MAT_INITIAL_MATRIX){ 5156 /* put together the new matrix */ 5157 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5158 5159 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5160 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5161 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5162 b_oth->free_a = PETSC_TRUE; 5163 b_oth->free_ij = PETSC_TRUE; 5164 b_oth->nonew = 0; 5165 5166 ierr = PetscFree(bufj);CHKERRQ(ierr); 5167 if (!startsj || !bufa_ptr){ 5168 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5169 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5170 } else { 5171 *startsj = sstartsj; 5172 *startsj_r = rstartsj; 5173 *bufa_ptr = bufa; 5174 } 5175 } 5176 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5177 PetscFunctionReturn(0); 5178 } 5179 5180 #undef __FUNCT__ 5181 #define __FUNCT__ "MatGetCommunicationStructs" 5182 /*@C 5183 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5184 5185 Not Collective 5186 5187 Input Parameters: 5188 . A - The matrix in mpiaij format 5189 5190 Output Parameter: 5191 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5192 . colmap - A map from global column index to local index into lvec 5193 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5194 5195 Level: developer 5196 5197 @*/ 5198 #if defined (PETSC_USE_CTABLE) 5199 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5200 #else 5201 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5202 #endif 5203 { 5204 Mat_MPIAIJ *a; 5205 5206 PetscFunctionBegin; 5207 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5208 PetscValidPointer(lvec, 2); 5209 PetscValidPointer(colmap, 3); 5210 PetscValidPointer(multScatter, 4); 5211 a = (Mat_MPIAIJ *) A->data; 5212 if (lvec) *lvec = a->lvec; 5213 if (colmap) *colmap = a->colmap; 5214 if (multScatter) *multScatter = a->Mvctx; 5215 PetscFunctionReturn(0); 5216 } 5217 5218 EXTERN_C_BEGIN 5219 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5220 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5221 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5222 EXTERN_C_END 5223 5224 #undef __FUNCT__ 5225 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5226 /* 5227 Computes (B'*A')' since computing B*A directly is untenable 5228 5229 n p p 5230 ( ) ( ) ( ) 5231 m ( A ) * n ( B ) = m ( C ) 5232 ( ) ( ) ( ) 5233 5234 */ 5235 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5236 { 5237 PetscErrorCode ierr; 5238 Mat At,Bt,Ct; 5239 5240 PetscFunctionBegin; 5241 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5242 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5243 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5244 ierr = MatDestroy(At);CHKERRQ(ierr); 5245 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5246 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5247 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5248 PetscFunctionReturn(0); 5249 } 5250 5251 #undef __FUNCT__ 5252 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5253 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5254 { 5255 PetscErrorCode ierr; 5256 PetscInt m=A->rmap->n,n=B->cmap->n; 5257 Mat Cmat; 5258 5259 PetscFunctionBegin; 5260 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5261 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5262 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5263 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5264 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5265 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5266 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5267 *C = Cmat; 5268 PetscFunctionReturn(0); 5269 } 5270 5271 /* ----------------------------------------------------------------*/ 5272 #undef __FUNCT__ 5273 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5274 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5275 { 5276 PetscErrorCode ierr; 5277 5278 PetscFunctionBegin; 5279 if (scall == MAT_INITIAL_MATRIX){ 5280 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5281 } 5282 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5283 PetscFunctionReturn(0); 5284 } 5285 5286 EXTERN_C_BEGIN 5287 #if defined(PETSC_HAVE_MUMPS) 5288 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5289 #endif 5290 #if defined(PETSC_HAVE_PASTIX) 5291 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5292 #endif 5293 #if defined(PETSC_HAVE_SUPERLU_DIST) 5294 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5295 #endif 5296 #if defined(PETSC_HAVE_SPOOLES) 5297 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5298 #endif 5299 EXTERN_C_END 5300 5301 /*MC 5302 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5303 5304 Options Database Keys: 5305 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5306 5307 Level: beginner 5308 5309 .seealso: MatCreateMPIAIJ() 5310 M*/ 5311 5312 EXTERN_C_BEGIN 5313 #undef __FUNCT__ 5314 #define __FUNCT__ "MatCreate_MPIAIJ" 5315 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5316 { 5317 Mat_MPIAIJ *b; 5318 PetscErrorCode ierr; 5319 PetscMPIInt size; 5320 5321 PetscFunctionBegin; 5322 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5323 5324 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5325 B->data = (void*)b; 5326 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5327 B->rmap->bs = 1; 5328 B->assembled = PETSC_FALSE; 5329 5330 B->insertmode = NOT_SET_VALUES; 5331 b->size = size; 5332 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5333 5334 /* build cache for off array entries formed */ 5335 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5336 b->donotstash = PETSC_FALSE; 5337 b->colmap = 0; 5338 b->garray = 0; 5339 b->roworiented = PETSC_TRUE; 5340 5341 /* stuff used for matrix vector multiply */ 5342 b->lvec = PETSC_NULL; 5343 b->Mvctx = PETSC_NULL; 5344 5345 /* stuff for MatGetRow() */ 5346 b->rowindices = 0; 5347 b->rowvalues = 0; 5348 b->getrowactive = PETSC_FALSE; 5349 5350 #if defined(PETSC_HAVE_SPOOLES) 5351 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5352 "MatGetFactor_mpiaij_spooles", 5353 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5354 #endif 5355 #if defined(PETSC_HAVE_MUMPS) 5356 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5357 "MatGetFactor_aij_mumps", 5358 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5359 #endif 5360 #if defined(PETSC_HAVE_PASTIX) 5361 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5362 "MatGetFactor_mpiaij_pastix", 5363 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5364 #endif 5365 #if defined(PETSC_HAVE_SUPERLU_DIST) 5366 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5367 "MatGetFactor_mpiaij_superlu_dist", 5368 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5369 #endif 5370 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5371 "MatStoreValues_MPIAIJ", 5372 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5373 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5374 "MatRetrieveValues_MPIAIJ", 5375 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5376 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5377 "MatGetDiagonalBlock_MPIAIJ", 5378 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5379 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5380 "MatIsTranspose_MPIAIJ", 5381 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5382 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5383 "MatMPIAIJSetPreallocation_MPIAIJ", 5384 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5385 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5386 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5387 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5388 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5389 "MatDiagonalScaleLocal_MPIAIJ", 5390 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5391 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5392 "MatConvert_MPIAIJ_MPIAIJPERM", 5393 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5394 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5395 "MatConvert_MPIAIJ_MPIAIJCRL", 5396 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5397 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5398 "MatConvert_MPIAIJ_MPISBAIJ", 5399 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5400 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5401 "MatMatMult_MPIDense_MPIAIJ", 5402 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5403 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5404 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5405 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5406 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5407 "MatMatMultNumeric_MPIDense_MPIAIJ", 5408 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5409 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5410 PetscFunctionReturn(0); 5411 } 5412 EXTERN_C_END 5413 5414 #undef __FUNCT__ 5415 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5416 /*@ 5417 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5418 and "off-diagonal" part of the matrix in CSR format. 5419 5420 Collective on MPI_Comm 5421 5422 Input Parameters: 5423 + comm - MPI communicator 5424 . m - number of local rows (Cannot be PETSC_DECIDE) 5425 . n - This value should be the same as the local size used in creating the 5426 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5427 calculated if N is given) For square matrices n is almost always m. 5428 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5429 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5430 . i - row indices for "diagonal" portion of matrix 5431 . j - column indices 5432 . a - matrix values 5433 . oi - row indices for "off-diagonal" portion of matrix 5434 . oj - column indices 5435 - oa - matrix values 5436 5437 Output Parameter: 5438 . mat - the matrix 5439 5440 Level: advanced 5441 5442 Notes: 5443 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5444 5445 The i and j indices are 0 based 5446 5447 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5448 5449 This sets local rows and cannot be used to set off-processor values. 5450 5451 You cannot later use MatSetValues() to change values in this matrix. 5452 5453 .keywords: matrix, aij, compressed row, sparse, parallel 5454 5455 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5456 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5457 @*/ 5458 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5459 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5460 { 5461 PetscErrorCode ierr; 5462 Mat_MPIAIJ *maij; 5463 5464 PetscFunctionBegin; 5465 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5466 if (i[0]) { 5467 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5468 } 5469 if (oi[0]) { 5470 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5471 } 5472 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5473 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5474 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5475 maij = (Mat_MPIAIJ*) (*mat)->data; 5476 maij->donotstash = PETSC_TRUE; 5477 (*mat)->preallocated = PETSC_TRUE; 5478 5479 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5480 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5481 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5482 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5483 5484 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5485 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5486 5487 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5488 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5489 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5490 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5491 5492 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5493 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5494 PetscFunctionReturn(0); 5495 } 5496 5497 /* 5498 Special version for direct calls from Fortran 5499 */ 5500 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5501 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5502 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5503 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5504 #endif 5505 5506 /* Change these macros so can be used in void function */ 5507 #undef CHKERRQ 5508 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5509 #undef SETERRQ2 5510 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5511 #undef SETERRQ 5512 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5513 5514 EXTERN_C_BEGIN 5515 #undef __FUNCT__ 5516 #define __FUNCT__ "matsetvaluesmpiaij_" 5517 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5518 { 5519 Mat mat = *mmat; 5520 PetscInt m = *mm, n = *mn; 5521 InsertMode addv = *maddv; 5522 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5523 PetscScalar value; 5524 PetscErrorCode ierr; 5525 5526 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5527 if (mat->insertmode == NOT_SET_VALUES) { 5528 mat->insertmode = addv; 5529 } 5530 #if defined(PETSC_USE_DEBUG) 5531 else if (mat->insertmode != addv) { 5532 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5533 } 5534 #endif 5535 { 5536 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5537 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5538 PetscBool roworiented = aij->roworiented; 5539 5540 /* Some Variables required in the macro */ 5541 Mat A = aij->A; 5542 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5543 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5544 MatScalar *aa = a->a; 5545 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5546 Mat B = aij->B; 5547 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5548 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5549 MatScalar *ba = b->a; 5550 5551 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5552 PetscInt nonew = a->nonew; 5553 MatScalar *ap1,*ap2; 5554 5555 PetscFunctionBegin; 5556 for (i=0; i<m; i++) { 5557 if (im[i] < 0) continue; 5558 #if defined(PETSC_USE_DEBUG) 5559 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5560 #endif 5561 if (im[i] >= rstart && im[i] < rend) { 5562 row = im[i] - rstart; 5563 lastcol1 = -1; 5564 rp1 = aj + ai[row]; 5565 ap1 = aa + ai[row]; 5566 rmax1 = aimax[row]; 5567 nrow1 = ailen[row]; 5568 low1 = 0; 5569 high1 = nrow1; 5570 lastcol2 = -1; 5571 rp2 = bj + bi[row]; 5572 ap2 = ba + bi[row]; 5573 rmax2 = bimax[row]; 5574 nrow2 = bilen[row]; 5575 low2 = 0; 5576 high2 = nrow2; 5577 5578 for (j=0; j<n; j++) { 5579 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5580 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5581 if (in[j] >= cstart && in[j] < cend){ 5582 col = in[j] - cstart; 5583 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5584 } else if (in[j] < 0) continue; 5585 #if defined(PETSC_USE_DEBUG) 5586 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5587 #endif 5588 else { 5589 if (mat->was_assembled) { 5590 if (!aij->colmap) { 5591 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5592 } 5593 #if defined (PETSC_USE_CTABLE) 5594 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5595 col--; 5596 #else 5597 col = aij->colmap[in[j]] - 1; 5598 #endif 5599 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5600 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5601 col = in[j]; 5602 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5603 B = aij->B; 5604 b = (Mat_SeqAIJ*)B->data; 5605 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5606 rp2 = bj + bi[row]; 5607 ap2 = ba + bi[row]; 5608 rmax2 = bimax[row]; 5609 nrow2 = bilen[row]; 5610 low2 = 0; 5611 high2 = nrow2; 5612 bm = aij->B->rmap->n; 5613 ba = b->a; 5614 } 5615 } else col = in[j]; 5616 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5617 } 5618 } 5619 } else { 5620 if (!aij->donotstash) { 5621 if (roworiented) { 5622 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5623 } else { 5624 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5625 } 5626 } 5627 } 5628 }} 5629 PetscFunctionReturnVoid(); 5630 } 5631 EXTERN_C_END 5632 5633