1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscBool aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscBool roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscBool ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash || mat->nooffprocentries) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 mat->insertmode = addv; /* in case this processor had no cache */ 470 471 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 472 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 473 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 480 { 481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 482 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 483 PetscErrorCode ierr; 484 PetscMPIInt n; 485 PetscInt i,j,rstart,ncols,flg; 486 PetscInt *row,*col; 487 PetscBool other_disassembled; 488 PetscScalar *val; 489 InsertMode addv = mat->insertmode; 490 491 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 492 PetscFunctionBegin; 493 if (!aij->donotstash && !mat->nooffprocentries) { 494 while (1) { 495 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 496 if (!flg) break; 497 498 for (i=0; i<n;) { 499 /* Now identify the consecutive vals belonging to the same row */ 500 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 501 if (j < n) ncols = j-i; 502 else ncols = n-i; 503 /* Now assemble all these values with a single function call */ 504 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 505 i = j; 506 } 507 } 508 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 509 } 510 a->compressedrow.use = PETSC_FALSE; 511 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 512 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 513 514 /* determine if any processor has disassembled, if so we must 515 also disassemble ourselfs, in order that we may reassemble. */ 516 /* 517 if nonzero structure of submatrix B cannot change then we know that 518 no processor disassembled thus we can skip this stuff 519 */ 520 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 521 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 522 if (mat->was_assembled && !other_disassembled) { 523 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 524 } 525 } 526 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 527 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 528 } 529 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 530 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 531 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 532 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 533 534 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 535 aij->rowvalues = 0; 536 537 /* used by MatAXPY() */ 538 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 539 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 540 541 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 542 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 const PetscScalar *xx; 575 PetscScalar *bb; 576 #if defined(PETSC_DEBUG) 577 PetscBool found = PETSC_FALSE; 578 #endif 579 580 PetscFunctionBegin; 581 /* first count number of contributors to each processor */ 582 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 583 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 584 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 585 j = 0; 586 for (i=0; i<N; i++) { 587 if (lastidx > (idx = rows[i])) j = 0; 588 lastidx = idx; 589 for (; j<size; j++) { 590 if (idx >= owners[j] && idx < owners[j+1]) { 591 nprocs[2*j]++; 592 nprocs[2*j+1] = 1; 593 owner[i] = j; 594 #if defined(PETSC_DEBUG) 595 found = PETSC_TRUE; 596 #endif 597 break; 598 } 599 } 600 #if defined(PETSC_DEBUG) 601 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 602 found = PETSC_FALSE; 603 #endif 604 } 605 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 606 607 if (A->nooffproczerorows) { 608 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 609 nrecvs = nsends; 610 nmax = N; 611 } else { 612 /* inform other processors of number of messages and max length*/ 613 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 614 } 615 616 /* post receives: */ 617 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 618 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 619 for (i=0; i<nrecvs; i++) { 620 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 621 } 622 623 /* do sends: 624 1) starts[i] gives the starting index in svalues for stuff going to 625 the ith processor 626 */ 627 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 628 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 629 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 630 starts[0] = 0; 631 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 632 for (i=0; i<N; i++) { 633 svalues[starts[owner[i]]++] = rows[i]; 634 } 635 636 starts[0] = 0; 637 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 638 count = 0; 639 for (i=0; i<size; i++) { 640 if (nprocs[2*i+1]) { 641 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 642 } 643 } 644 ierr = PetscFree(starts);CHKERRQ(ierr); 645 646 base = owners[rank]; 647 648 /* wait on receives */ 649 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 650 count = nrecvs; slen = 0; 651 while (count) { 652 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 653 /* unpack receives into our local space */ 654 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 655 source[imdex] = recv_status.MPI_SOURCE; 656 lens[imdex] = n; 657 slen += n; 658 count--; 659 } 660 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 661 662 /* move the data into the send scatter */ 663 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 664 count = 0; 665 for (i=0; i<nrecvs; i++) { 666 values = rvalues + i*nmax; 667 for (j=0; j<lens[i]; j++) { 668 lrows[count++] = values[j] - base; 669 } 670 } 671 ierr = PetscFree(rvalues);CHKERRQ(ierr); 672 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 673 ierr = PetscFree(owner);CHKERRQ(ierr); 674 ierr = PetscFree(nprocs);CHKERRQ(ierr); 675 676 /* fix right hand side if needed */ 677 if (x && b) { 678 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 679 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 680 for (i=0; i<slen; i++) { 681 bb[lrows[i]] = diag*xx[lrows[i]]; 682 } 683 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 684 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 685 } 686 /* 687 Zero the required rows. If the "diagonal block" of the matrix 688 is square and the user wishes to set the diagonal we use separate 689 code so that MatSetValues() is not called for each diagonal allocating 690 new memory, thus calling lots of mallocs and slowing things down. 691 692 */ 693 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 694 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 695 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 696 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 697 } else if (diag != 0.0) { 698 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 699 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 700 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 701 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 702 } 703 for (i = 0; i < slen; i++) { 704 row = lrows[i] + rstart; 705 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 706 } 707 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 708 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 709 } else { 710 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 711 } 712 ierr = PetscFree(lrows);CHKERRQ(ierr); 713 714 /* wait on sends */ 715 if (nsends) { 716 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 717 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 718 ierr = PetscFree(send_status);CHKERRQ(ierr); 719 } 720 ierr = PetscFree(send_waits);CHKERRQ(ierr); 721 ierr = PetscFree(svalues);CHKERRQ(ierr); 722 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 728 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 729 { 730 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 731 PetscErrorCode ierr; 732 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 733 PetscInt i,*owners = A->rmap->range; 734 PetscInt *nprocs,j,idx,nsends; 735 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 736 PetscInt *rvalues,count,base,slen,*source; 737 PetscInt *lens,*lrows,*values,m; 738 MPI_Comm comm = ((PetscObject)A)->comm; 739 MPI_Request *send_waits,*recv_waits; 740 MPI_Status recv_status,*send_status; 741 const PetscScalar *xx; 742 PetscScalar *bb,*mask; 743 Vec xmask,lmask; 744 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 745 const PetscInt *aj, *ii,*ridx; 746 PetscScalar *aa; 747 #if defined(PETSC_DEBUG) 748 PetscBool found = PETSC_FALSE; 749 #endif 750 751 PetscFunctionBegin; 752 /* first count number of contributors to each processor */ 753 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 754 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 755 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 756 j = 0; 757 for (i=0; i<N; i++) { 758 if (lastidx > (idx = rows[i])) j = 0; 759 lastidx = idx; 760 for (; j<size; j++) { 761 if (idx >= owners[j] && idx < owners[j+1]) { 762 nprocs[2*j]++; 763 nprocs[2*j+1] = 1; 764 owner[i] = j; 765 #if defined(PETSC_DEBUG) 766 found = PETSC_TRUE; 767 #endif 768 break; 769 } 770 } 771 #if defined(PETSC_DEBUG) 772 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 773 found = PETSC_FALSE; 774 #endif 775 } 776 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 777 778 /* inform other processors of number of messages and max length*/ 779 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 780 781 /* post receives: */ 782 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 783 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 784 for (i=0; i<nrecvs; i++) { 785 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 786 } 787 788 /* do sends: 789 1) starts[i] gives the starting index in svalues for stuff going to 790 the ith processor 791 */ 792 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 793 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 794 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 795 starts[0] = 0; 796 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 797 for (i=0; i<N; i++) { 798 svalues[starts[owner[i]]++] = rows[i]; 799 } 800 801 starts[0] = 0; 802 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 803 count = 0; 804 for (i=0; i<size; i++) { 805 if (nprocs[2*i+1]) { 806 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 807 } 808 } 809 ierr = PetscFree(starts);CHKERRQ(ierr); 810 811 base = owners[rank]; 812 813 /* wait on receives */ 814 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 815 count = nrecvs; slen = 0; 816 while (count) { 817 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 818 /* unpack receives into our local space */ 819 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 820 source[imdex] = recv_status.MPI_SOURCE; 821 lens[imdex] = n; 822 slen += n; 823 count--; 824 } 825 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 826 827 /* move the data into the send scatter */ 828 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 829 count = 0; 830 for (i=0; i<nrecvs; i++) { 831 values = rvalues + i*nmax; 832 for (j=0; j<lens[i]; j++) { 833 lrows[count++] = values[j] - base; 834 } 835 } 836 ierr = PetscFree(rvalues);CHKERRQ(ierr); 837 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 838 ierr = PetscFree(owner);CHKERRQ(ierr); 839 ierr = PetscFree(nprocs);CHKERRQ(ierr); 840 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 841 842 /* zero diagonal part of matrix */ 843 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 844 845 /* handle off diagonal part of matrix */ 846 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 847 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 848 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 849 for (i=0; i<slen; i++) { 850 bb[lrows[i]] = 1; 851 } 852 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 853 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 854 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 855 ierr = VecDestroy(xmask);CHKERRQ(ierr); 856 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 857 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 858 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 859 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 860 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 861 862 /* remove zeroed rows of off diagonal matrix */ 863 ii = aij->i; 864 for (i=0; i<slen; i++) { 865 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 866 } 867 868 /* loop over all elements of off process part of matrix zeroing removed columns*/ 869 if (aij->compressedrow.use){ 870 m = aij->compressedrow.nrows; 871 ii = aij->compressedrow.i; 872 ridx = aij->compressedrow.rindex; 873 for (i=0; i<m; i++){ 874 n = ii[i+1] - ii[i]; 875 aj = aij->j + ii[i]; 876 aa = aij->a + ii[i]; 877 878 for (j=0; j<n; j++) { 879 if (mask[*aj]) { 880 bb[*ridx] -= *aa*xx[*aj]; 881 *aa = 0.0; 882 } 883 aa++; 884 aj++; 885 } 886 ridx++; 887 } 888 } else { /* do not use compressed row format */ 889 m = l->B->rmap->n; 890 for (i=0; i<m; i++) { 891 n = ii[i+1] - ii[i]; 892 aj = aij->j + ii[i]; 893 aa = aij->a + ii[i]; 894 for (j=0; j<n; j++) { 895 if (mask[*aj]) { 896 bb[i] -= *aa*xx[*aj]; 897 *aa = 0.0; 898 } 899 aa++; 900 aj++; 901 } 902 } 903 } 904 905 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 906 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 907 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 908 ierr = VecDestroy(lmask);CHKERRQ(ierr); 909 ierr = PetscFree(lrows);CHKERRQ(ierr); 910 911 /* wait on sends */ 912 if (nsends) { 913 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 914 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 915 ierr = PetscFree(send_status);CHKERRQ(ierr); 916 } 917 ierr = PetscFree(send_waits);CHKERRQ(ierr); 918 ierr = PetscFree(svalues);CHKERRQ(ierr); 919 920 PetscFunctionReturn(0); 921 } 922 923 #undef __FUNCT__ 924 #define __FUNCT__ "MatMult_MPIAIJ" 925 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 926 { 927 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 928 PetscErrorCode ierr; 929 PetscInt nt; 930 931 PetscFunctionBegin; 932 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 933 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 934 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 935 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 936 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 937 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNCT__ 942 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 943 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 944 { 945 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 946 PetscErrorCode ierr; 947 948 PetscFunctionBegin; 949 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatMultAdd_MPIAIJ" 955 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 956 { 957 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 958 PetscErrorCode ierr; 959 960 PetscFunctionBegin; 961 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 963 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 964 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 970 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 971 { 972 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 973 PetscErrorCode ierr; 974 PetscBool merged; 975 976 PetscFunctionBegin; 977 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 978 /* do nondiagonal part */ 979 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 980 if (!merged) { 981 /* send it on its way */ 982 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 983 /* do local part */ 984 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 985 /* receive remote parts: note this assumes the values are not actually */ 986 /* added in yy until the next line, */ 987 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 988 } else { 989 /* do local part */ 990 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 991 /* send it on its way */ 992 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 993 /* values actually were received in the Begin() but we need to call this nop */ 994 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 995 } 996 PetscFunctionReturn(0); 997 } 998 999 EXTERN_C_BEGIN 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1002 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1003 { 1004 MPI_Comm comm; 1005 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1006 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1007 IS Me,Notme; 1008 PetscErrorCode ierr; 1009 PetscInt M,N,first,last,*notme,i; 1010 PetscMPIInt size; 1011 1012 PetscFunctionBegin; 1013 1014 /* Easy test: symmetric diagonal block */ 1015 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1016 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1017 if (!*f) PetscFunctionReturn(0); 1018 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1019 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1020 if (size == 1) PetscFunctionReturn(0); 1021 1022 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1023 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1024 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1025 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1026 for (i=0; i<first; i++) notme[i] = i; 1027 for (i=last; i<M; i++) notme[i-last+first] = i; 1028 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1029 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1030 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1031 Aoff = Aoffs[0]; 1032 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1033 Boff = Boffs[0]; 1034 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1035 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1036 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1037 ierr = ISDestroy(Me);CHKERRQ(ierr); 1038 ierr = ISDestroy(Notme);CHKERRQ(ierr); 1039 1040 PetscFunctionReturn(0); 1041 } 1042 EXTERN_C_END 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1046 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1047 { 1048 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1049 PetscErrorCode ierr; 1050 1051 PetscFunctionBegin; 1052 /* do nondiagonal part */ 1053 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1054 /* send it on its way */ 1055 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1056 /* do local part */ 1057 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1058 /* receive remote parts */ 1059 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 /* 1064 This only works correctly for square matrices where the subblock A->A is the 1065 diagonal block 1066 */ 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1069 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1070 { 1071 PetscErrorCode ierr; 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1073 1074 PetscFunctionBegin; 1075 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1076 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1077 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatScale_MPIAIJ" 1083 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1084 { 1085 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1086 PetscErrorCode ierr; 1087 1088 PetscFunctionBegin; 1089 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1090 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "MatDestroy_MPIAIJ" 1096 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1097 { 1098 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 #if defined(PETSC_USE_LOG) 1103 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1104 #endif 1105 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1106 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 1107 if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);} 1108 if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);} 1109 #if defined (PETSC_USE_CTABLE) 1110 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 1111 #else 1112 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1113 #endif 1114 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1115 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 1116 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 1117 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1118 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1119 ierr = PetscFree(aij);CHKERRQ(ierr); 1120 1121 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1135 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1136 { 1137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1138 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1139 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1140 PetscErrorCode ierr; 1141 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1142 int fd; 1143 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1144 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1145 PetscScalar *column_values; 1146 PetscInt message_count,flowcontrolcount; 1147 1148 PetscFunctionBegin; 1149 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1150 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1151 nz = A->nz + B->nz; 1152 if (!rank) { 1153 header[0] = MAT_FILE_CLASSID; 1154 header[1] = mat->rmap->N; 1155 header[2] = mat->cmap->N; 1156 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1157 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1158 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1159 /* get largest number of rows any processor has */ 1160 rlen = mat->rmap->n; 1161 range = mat->rmap->range; 1162 for (i=1; i<size; i++) { 1163 rlen = PetscMax(rlen,range[i+1] - range[i]); 1164 } 1165 } else { 1166 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1167 rlen = mat->rmap->n; 1168 } 1169 1170 /* load up the local row counts */ 1171 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1172 for (i=0; i<mat->rmap->n; i++) { 1173 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1174 } 1175 1176 /* store the row lengths to the file */ 1177 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1178 if (!rank) { 1179 MPI_Status status; 1180 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1181 for (i=1; i<size; i++) { 1182 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1183 rlen = range[i+1] - range[i]; 1184 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1185 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1186 } 1187 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1188 } else { 1189 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1190 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1191 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1192 } 1193 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1194 1195 /* load up the local column indices */ 1196 nzmax = nz; /* )th processor needs space a largest processor needs */ 1197 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1198 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1199 cnt = 0; 1200 for (i=0; i<mat->rmap->n; i++) { 1201 for (j=B->i[i]; j<B->i[i+1]; j++) { 1202 if ( (col = garray[B->j[j]]) > cstart) break; 1203 column_indices[cnt++] = col; 1204 } 1205 for (k=A->i[i]; k<A->i[i+1]; k++) { 1206 column_indices[cnt++] = A->j[k] + cstart; 1207 } 1208 for (; j<B->i[i+1]; j++) { 1209 column_indices[cnt++] = garray[B->j[j]]; 1210 } 1211 } 1212 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1213 1214 /* store the column indices to the file */ 1215 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1216 if (!rank) { 1217 MPI_Status status; 1218 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1219 for (i=1; i<size; i++) { 1220 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1221 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1222 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1223 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1224 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1225 } 1226 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1227 } else { 1228 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1229 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1230 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1231 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1232 } 1233 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1234 1235 /* load up the local column values */ 1236 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1237 cnt = 0; 1238 for (i=0; i<mat->rmap->n; i++) { 1239 for (j=B->i[i]; j<B->i[i+1]; j++) { 1240 if ( garray[B->j[j]] > cstart) break; 1241 column_values[cnt++] = B->a[j]; 1242 } 1243 for (k=A->i[i]; k<A->i[i+1]; k++) { 1244 column_values[cnt++] = A->a[k]; 1245 } 1246 for (; j<B->i[i+1]; j++) { 1247 column_values[cnt++] = B->a[j]; 1248 } 1249 } 1250 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1251 1252 /* store the column values to the file */ 1253 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1254 if (!rank) { 1255 MPI_Status status; 1256 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1257 for (i=1; i<size; i++) { 1258 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1259 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1260 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1261 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1262 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1263 } 1264 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1265 } else { 1266 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1267 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1268 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1269 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1270 } 1271 ierr = PetscFree(column_values);CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1277 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1278 { 1279 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1280 PetscErrorCode ierr; 1281 PetscMPIInt rank = aij->rank,size = aij->size; 1282 PetscBool isdraw,iascii,isbinary; 1283 PetscViewer sviewer; 1284 PetscViewerFormat format; 1285 1286 PetscFunctionBegin; 1287 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1288 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1289 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1290 if (iascii) { 1291 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1292 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1293 MatInfo info; 1294 PetscBool inodes; 1295 1296 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1297 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1298 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1299 if (!inodes) { 1300 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1301 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1302 } else { 1303 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1304 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1305 } 1306 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1308 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1309 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1310 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1311 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1312 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1315 PetscInt inodecount,inodelimit,*inodes; 1316 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1317 if (inodes) { 1318 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1319 } else { 1320 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1321 } 1322 PetscFunctionReturn(0); 1323 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1324 PetscFunctionReturn(0); 1325 } 1326 } else if (isbinary) { 1327 if (size == 1) { 1328 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1329 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1330 } else { 1331 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1332 } 1333 PetscFunctionReturn(0); 1334 } else if (isdraw) { 1335 PetscDraw draw; 1336 PetscBool isnull; 1337 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1338 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1339 } 1340 1341 if (size == 1) { 1342 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1343 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1344 } else { 1345 /* assemble the entire matrix onto first processor. */ 1346 Mat A; 1347 Mat_SeqAIJ *Aloc; 1348 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1349 MatScalar *a; 1350 1351 if (mat->rmap->N > 1024) { 1352 PetscBool flg = PETSC_FALSE; 1353 1354 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1355 if (!flg) { 1356 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1357 } 1358 } 1359 1360 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1361 if (!rank) { 1362 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1363 } else { 1364 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1365 } 1366 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1367 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1368 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1369 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1370 1371 /* copy over the A part */ 1372 Aloc = (Mat_SeqAIJ*)aij->A->data; 1373 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1374 row = mat->rmap->rstart; 1375 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1376 for (i=0; i<m; i++) { 1377 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1378 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1379 } 1380 aj = Aloc->j; 1381 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1382 1383 /* copy over the B part */ 1384 Aloc = (Mat_SeqAIJ*)aij->B->data; 1385 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1386 row = mat->rmap->rstart; 1387 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1388 ct = cols; 1389 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1390 for (i=0; i<m; i++) { 1391 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1392 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1393 } 1394 ierr = PetscFree(ct);CHKERRQ(ierr); 1395 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1396 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1397 /* 1398 Everyone has to call to draw the matrix since the graphics waits are 1399 synchronized across all processors that share the PetscDraw object 1400 */ 1401 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1402 if (!rank) { 1403 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1404 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1405 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1406 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1407 } 1408 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1409 ierr = MatDestroy(A);CHKERRQ(ierr); 1410 } 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatView_MPIAIJ" 1416 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1417 { 1418 PetscErrorCode ierr; 1419 PetscBool iascii,isdraw,issocket,isbinary; 1420 1421 PetscFunctionBegin; 1422 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1423 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1424 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1425 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1426 if (iascii || isdraw || isbinary || issocket) { 1427 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1428 } else { 1429 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1430 } 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatSOR_MPIAIJ" 1436 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1437 { 1438 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1439 PetscErrorCode ierr; 1440 Vec bb1 = 0; 1441 PetscBool hasop; 1442 1443 PetscFunctionBegin; 1444 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1445 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1446 } 1447 1448 if (flag == SOR_APPLY_UPPER) { 1449 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1450 PetscFunctionReturn(0); 1451 } 1452 1453 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1454 if (flag & SOR_ZERO_INITIAL_GUESS) { 1455 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1456 its--; 1457 } 1458 1459 while (its--) { 1460 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1461 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1462 1463 /* update rhs: bb1 = bb - B*x */ 1464 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1465 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1466 1467 /* local sweep */ 1468 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1469 } 1470 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1471 if (flag & SOR_ZERO_INITIAL_GUESS) { 1472 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1473 its--; 1474 } 1475 while (its--) { 1476 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1477 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1478 1479 /* update rhs: bb1 = bb - B*x */ 1480 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1481 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1482 1483 /* local sweep */ 1484 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1485 } 1486 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1487 if (flag & SOR_ZERO_INITIAL_GUESS) { 1488 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1489 its--; 1490 } 1491 while (its--) { 1492 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1493 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1494 1495 /* update rhs: bb1 = bb - B*x */ 1496 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1497 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1498 1499 /* local sweep */ 1500 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1501 } 1502 } else if (flag & SOR_EISENSTAT) { 1503 Vec xx1; 1504 1505 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1506 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1507 1508 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1509 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1510 if (!mat->diag) { 1511 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1512 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1513 } 1514 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1515 if (hasop) { 1516 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1517 } else { 1518 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1519 } 1520 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1521 1522 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1523 1524 /* local sweep */ 1525 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1526 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1527 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1528 } else { 1529 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1530 } 1531 1532 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1533 PetscFunctionReturn(0); 1534 } 1535 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatPermute_MPIAIJ" 1538 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1539 { 1540 MPI_Comm comm,pcomm; 1541 PetscInt first,local_size,nrows; 1542 const PetscInt *rows; 1543 PetscMPIInt size; 1544 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1545 PetscErrorCode ierr; 1546 1547 PetscFunctionBegin; 1548 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1549 /* make a collective version of 'rowp' */ 1550 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1551 if (pcomm==comm) { 1552 crowp = rowp; 1553 } else { 1554 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1555 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1556 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1557 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1558 } 1559 /* collect the global row permutation and invert it */ 1560 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1561 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1562 if (pcomm!=comm) { 1563 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1564 } 1565 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1566 /* get the local target indices */ 1567 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1568 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1569 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1570 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1571 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1572 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1573 /* the column permutation is so much easier; 1574 make a local version of 'colp' and invert it */ 1575 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1576 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1577 if (size==1) { 1578 lcolp = colp; 1579 } else { 1580 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1581 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1582 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1583 } 1584 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1585 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1586 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1587 if (size>1) { 1588 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1589 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1590 } 1591 /* now we just get the submatrix */ 1592 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1593 /* clean up */ 1594 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1595 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1596 PetscFunctionReturn(0); 1597 } 1598 1599 #undef __FUNCT__ 1600 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1601 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1602 { 1603 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1604 Mat A = mat->A,B = mat->B; 1605 PetscErrorCode ierr; 1606 PetscReal isend[5],irecv[5]; 1607 1608 PetscFunctionBegin; 1609 info->block_size = 1.0; 1610 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1611 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1612 isend[3] = info->memory; isend[4] = info->mallocs; 1613 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1614 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1615 isend[3] += info->memory; isend[4] += info->mallocs; 1616 if (flag == MAT_LOCAL) { 1617 info->nz_used = isend[0]; 1618 info->nz_allocated = isend[1]; 1619 info->nz_unneeded = isend[2]; 1620 info->memory = isend[3]; 1621 info->mallocs = isend[4]; 1622 } else if (flag == MAT_GLOBAL_MAX) { 1623 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1624 info->nz_used = irecv[0]; 1625 info->nz_allocated = irecv[1]; 1626 info->nz_unneeded = irecv[2]; 1627 info->memory = irecv[3]; 1628 info->mallocs = irecv[4]; 1629 } else if (flag == MAT_GLOBAL_SUM) { 1630 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1631 info->nz_used = irecv[0]; 1632 info->nz_allocated = irecv[1]; 1633 info->nz_unneeded = irecv[2]; 1634 info->memory = irecv[3]; 1635 info->mallocs = irecv[4]; 1636 } 1637 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1638 info->fill_ratio_needed = 0; 1639 info->factor_mallocs = 0; 1640 1641 PetscFunctionReturn(0); 1642 } 1643 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "MatSetOption_MPIAIJ" 1646 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1647 { 1648 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1649 PetscErrorCode ierr; 1650 1651 PetscFunctionBegin; 1652 switch (op) { 1653 case MAT_NEW_NONZERO_LOCATIONS: 1654 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1655 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1656 case MAT_KEEP_NONZERO_PATTERN: 1657 case MAT_NEW_NONZERO_LOCATION_ERR: 1658 case MAT_USE_INODES: 1659 case MAT_IGNORE_ZERO_ENTRIES: 1660 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1661 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1662 break; 1663 case MAT_ROW_ORIENTED: 1664 a->roworiented = flg; 1665 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1666 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1667 break; 1668 case MAT_NEW_DIAGONALS: 1669 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1670 break; 1671 case MAT_IGNORE_OFF_PROC_ENTRIES: 1672 a->donotstash = PETSC_TRUE; 1673 break; 1674 case MAT_SPD: 1675 A->spd_set = PETSC_TRUE; 1676 A->spd = flg; 1677 if (flg) { 1678 A->symmetric = PETSC_TRUE; 1679 A->structurally_symmetric = PETSC_TRUE; 1680 A->symmetric_set = PETSC_TRUE; 1681 A->structurally_symmetric_set = PETSC_TRUE; 1682 } 1683 break; 1684 case MAT_SYMMETRIC: 1685 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1686 break; 1687 case MAT_STRUCTURALLY_SYMMETRIC: 1688 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1689 break; 1690 case MAT_HERMITIAN: 1691 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1692 break; 1693 case MAT_SYMMETRY_ETERNAL: 1694 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1695 break; 1696 default: 1697 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1698 } 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatGetRow_MPIAIJ" 1704 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1705 { 1706 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1707 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1708 PetscErrorCode ierr; 1709 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1710 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1711 PetscInt *cmap,*idx_p; 1712 1713 PetscFunctionBegin; 1714 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1715 mat->getrowactive = PETSC_TRUE; 1716 1717 if (!mat->rowvalues && (idx || v)) { 1718 /* 1719 allocate enough space to hold information from the longest row. 1720 */ 1721 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1722 PetscInt max = 1,tmp; 1723 for (i=0; i<matin->rmap->n; i++) { 1724 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1725 if (max < tmp) { max = tmp; } 1726 } 1727 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1728 } 1729 1730 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1731 lrow = row - rstart; 1732 1733 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1734 if (!v) {pvA = 0; pvB = 0;} 1735 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1736 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1737 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1738 nztot = nzA + nzB; 1739 1740 cmap = mat->garray; 1741 if (v || idx) { 1742 if (nztot) { 1743 /* Sort by increasing column numbers, assuming A and B already sorted */ 1744 PetscInt imark = -1; 1745 if (v) { 1746 *v = v_p = mat->rowvalues; 1747 for (i=0; i<nzB; i++) { 1748 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1749 else break; 1750 } 1751 imark = i; 1752 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1753 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1754 } 1755 if (idx) { 1756 *idx = idx_p = mat->rowindices; 1757 if (imark > -1) { 1758 for (i=0; i<imark; i++) { 1759 idx_p[i] = cmap[cworkB[i]]; 1760 } 1761 } else { 1762 for (i=0; i<nzB; i++) { 1763 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1764 else break; 1765 } 1766 imark = i; 1767 } 1768 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1769 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1770 } 1771 } else { 1772 if (idx) *idx = 0; 1773 if (v) *v = 0; 1774 } 1775 } 1776 *nz = nztot; 1777 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1778 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1779 PetscFunctionReturn(0); 1780 } 1781 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1784 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1785 { 1786 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1787 1788 PetscFunctionBegin; 1789 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1790 aij->getrowactive = PETSC_FALSE; 1791 PetscFunctionReturn(0); 1792 } 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatNorm_MPIAIJ" 1796 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1797 { 1798 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1799 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1800 PetscErrorCode ierr; 1801 PetscInt i,j,cstart = mat->cmap->rstart; 1802 PetscReal sum = 0.0; 1803 MatScalar *v; 1804 1805 PetscFunctionBegin; 1806 if (aij->size == 1) { 1807 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1808 } else { 1809 if (type == NORM_FROBENIUS) { 1810 v = amat->a; 1811 for (i=0; i<amat->nz; i++) { 1812 #if defined(PETSC_USE_COMPLEX) 1813 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1814 #else 1815 sum += (*v)*(*v); v++; 1816 #endif 1817 } 1818 v = bmat->a; 1819 for (i=0; i<bmat->nz; i++) { 1820 #if defined(PETSC_USE_COMPLEX) 1821 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1822 #else 1823 sum += (*v)*(*v); v++; 1824 #endif 1825 } 1826 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1827 *norm = sqrt(*norm); 1828 } else if (type == NORM_1) { /* max column norm */ 1829 PetscReal *tmp,*tmp2; 1830 PetscInt *jj,*garray = aij->garray; 1831 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1832 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1833 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1834 *norm = 0.0; 1835 v = amat->a; jj = amat->j; 1836 for (j=0; j<amat->nz; j++) { 1837 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1838 } 1839 v = bmat->a; jj = bmat->j; 1840 for (j=0; j<bmat->nz; j++) { 1841 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1842 } 1843 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1844 for (j=0; j<mat->cmap->N; j++) { 1845 if (tmp2[j] > *norm) *norm = tmp2[j]; 1846 } 1847 ierr = PetscFree(tmp);CHKERRQ(ierr); 1848 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1849 } else if (type == NORM_INFINITY) { /* max row norm */ 1850 PetscReal ntemp = 0.0; 1851 for (j=0; j<aij->A->rmap->n; j++) { 1852 v = amat->a + amat->i[j]; 1853 sum = 0.0; 1854 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1855 sum += PetscAbsScalar(*v); v++; 1856 } 1857 v = bmat->a + bmat->i[j]; 1858 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1859 sum += PetscAbsScalar(*v); v++; 1860 } 1861 if (sum > ntemp) ntemp = sum; 1862 } 1863 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1864 } else { 1865 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1866 } 1867 } 1868 PetscFunctionReturn(0); 1869 } 1870 1871 #undef __FUNCT__ 1872 #define __FUNCT__ "MatTranspose_MPIAIJ" 1873 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1874 { 1875 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1876 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1877 PetscErrorCode ierr; 1878 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1879 PetscInt cstart=A->cmap->rstart,ncol; 1880 Mat B; 1881 MatScalar *array; 1882 1883 PetscFunctionBegin; 1884 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1885 1886 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1887 ai = Aloc->i; aj = Aloc->j; 1888 bi = Bloc->i; bj = Bloc->j; 1889 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1890 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1891 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1892 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1893 for (i=0; i<ai[ma]; i++){ 1894 d_nnz[aj[i]] ++; 1895 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1896 } 1897 1898 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1899 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1900 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1901 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1902 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1903 } else { 1904 B = *matout; 1905 } 1906 1907 /* copy over the A part */ 1908 array = Aloc->a; 1909 row = A->rmap->rstart; 1910 for (i=0; i<ma; i++) { 1911 ncol = ai[i+1]-ai[i]; 1912 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1913 row++; array += ncol; aj += ncol; 1914 } 1915 aj = Aloc->j; 1916 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1917 1918 /* copy over the B part */ 1919 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1920 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1921 array = Bloc->a; 1922 row = A->rmap->rstart; 1923 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1924 cols_tmp = cols; 1925 for (i=0; i<mb; i++) { 1926 ncol = bi[i+1]-bi[i]; 1927 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1928 row++; array += ncol; cols_tmp += ncol; 1929 } 1930 ierr = PetscFree(cols);CHKERRQ(ierr); 1931 1932 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1933 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1934 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1935 *matout = B; 1936 } else { 1937 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1938 } 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1944 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1945 { 1946 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1947 Mat a = aij->A,b = aij->B; 1948 PetscErrorCode ierr; 1949 PetscInt s1,s2,s3; 1950 1951 PetscFunctionBegin; 1952 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1953 if (rr) { 1954 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1955 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1956 /* Overlap communication with computation. */ 1957 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1958 } 1959 if (ll) { 1960 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1961 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1962 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1963 } 1964 /* scale the diagonal block */ 1965 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1966 1967 if (rr) { 1968 /* Do a scatter end and then right scale the off-diagonal block */ 1969 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1970 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1971 } 1972 1973 PetscFunctionReturn(0); 1974 } 1975 1976 #undef __FUNCT__ 1977 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1978 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1979 { 1980 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1981 PetscErrorCode ierr; 1982 1983 PetscFunctionBegin; 1984 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1985 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1986 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1987 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1988 PetscFunctionReturn(0); 1989 } 1990 #undef __FUNCT__ 1991 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1992 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1993 { 1994 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1995 PetscErrorCode ierr; 1996 1997 PetscFunctionBegin; 1998 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1999 PetscFunctionReturn(0); 2000 } 2001 2002 #undef __FUNCT__ 2003 #define __FUNCT__ "MatEqual_MPIAIJ" 2004 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2005 { 2006 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2007 Mat a,b,c,d; 2008 PetscBool flg; 2009 PetscErrorCode ierr; 2010 2011 PetscFunctionBegin; 2012 a = matA->A; b = matA->B; 2013 c = matB->A; d = matB->B; 2014 2015 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2016 if (flg) { 2017 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2018 } 2019 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2020 PetscFunctionReturn(0); 2021 } 2022 2023 #undef __FUNCT__ 2024 #define __FUNCT__ "MatCopy_MPIAIJ" 2025 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2026 { 2027 PetscErrorCode ierr; 2028 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2029 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2030 2031 PetscFunctionBegin; 2032 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2033 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2034 /* because of the column compression in the off-processor part of the matrix a->B, 2035 the number of columns in a->B and b->B may be different, hence we cannot call 2036 the MatCopy() directly on the two parts. If need be, we can provide a more 2037 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2038 then copying the submatrices */ 2039 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2040 } else { 2041 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2042 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2043 } 2044 PetscFunctionReturn(0); 2045 } 2046 2047 #undef __FUNCT__ 2048 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2049 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2050 { 2051 PetscErrorCode ierr; 2052 2053 PetscFunctionBegin; 2054 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2055 PetscFunctionReturn(0); 2056 } 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "MatAXPY_MPIAIJ" 2060 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2061 { 2062 PetscErrorCode ierr; 2063 PetscInt i; 2064 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2065 PetscBLASInt bnz,one=1; 2066 Mat_SeqAIJ *x,*y; 2067 2068 PetscFunctionBegin; 2069 if (str == SAME_NONZERO_PATTERN) { 2070 PetscScalar alpha = a; 2071 x = (Mat_SeqAIJ *)xx->A->data; 2072 y = (Mat_SeqAIJ *)yy->A->data; 2073 bnz = PetscBLASIntCast(x->nz); 2074 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2075 x = (Mat_SeqAIJ *)xx->B->data; 2076 y = (Mat_SeqAIJ *)yy->B->data; 2077 bnz = PetscBLASIntCast(x->nz); 2078 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2079 } else if (str == SUBSET_NONZERO_PATTERN) { 2080 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2081 2082 x = (Mat_SeqAIJ *)xx->B->data; 2083 y = (Mat_SeqAIJ *)yy->B->data; 2084 if (y->xtoy && y->XtoY != xx->B) { 2085 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2086 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2087 } 2088 if (!y->xtoy) { /* get xtoy */ 2089 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2090 y->XtoY = xx->B; 2091 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2092 } 2093 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2094 } else { 2095 Mat B; 2096 PetscInt *nnz_d,*nnz_o; 2097 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2098 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2099 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2100 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2101 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2102 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2103 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2104 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2105 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2106 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2107 ierr = MatHeaderReplace(Y,B); 2108 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2109 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2110 } 2111 PetscFunctionReturn(0); 2112 } 2113 2114 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 2115 2116 #undef __FUNCT__ 2117 #define __FUNCT__ "MatConjugate_MPIAIJ" 2118 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 2119 { 2120 #if defined(PETSC_USE_COMPLEX) 2121 PetscErrorCode ierr; 2122 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2123 2124 PetscFunctionBegin; 2125 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2126 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2127 #else 2128 PetscFunctionBegin; 2129 #endif 2130 PetscFunctionReturn(0); 2131 } 2132 2133 #undef __FUNCT__ 2134 #define __FUNCT__ "MatRealPart_MPIAIJ" 2135 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2136 { 2137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2138 PetscErrorCode ierr; 2139 2140 PetscFunctionBegin; 2141 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2142 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 #undef __FUNCT__ 2147 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2148 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2149 { 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2151 PetscErrorCode ierr; 2152 2153 PetscFunctionBegin; 2154 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2155 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2156 PetscFunctionReturn(0); 2157 } 2158 2159 #ifdef PETSC_HAVE_PBGL 2160 2161 #include <boost/parallel/mpi/bsp_process_group.hpp> 2162 #include <boost/graph/distributed/ilu_default_graph.hpp> 2163 #include <boost/graph/distributed/ilu_0_block.hpp> 2164 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2165 #include <boost/graph/distributed/petsc/interface.hpp> 2166 #include <boost/multi_array.hpp> 2167 #include <boost/parallel/distributed_property_map->hpp> 2168 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2171 /* 2172 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2173 */ 2174 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2175 { 2176 namespace petsc = boost::distributed::petsc; 2177 2178 namespace graph_dist = boost::graph::distributed; 2179 using boost::graph::distributed::ilu_default::process_group_type; 2180 using boost::graph::ilu_permuted; 2181 2182 PetscBool row_identity, col_identity; 2183 PetscContainer c; 2184 PetscInt m, n, M, N; 2185 PetscErrorCode ierr; 2186 2187 PetscFunctionBegin; 2188 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2189 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2190 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2191 if (!row_identity || !col_identity) { 2192 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2193 } 2194 2195 process_group_type pg; 2196 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2197 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2198 lgraph_type& level_graph = *lgraph_p; 2199 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2200 2201 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2202 ilu_permuted(level_graph); 2203 2204 /* put together the new matrix */ 2205 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2206 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2207 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2208 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2209 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2210 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2211 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2212 2213 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2214 ierr = PetscContainerSetPointer(c, lgraph_p); 2215 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2216 PetscFunctionReturn(0); 2217 } 2218 2219 #undef __FUNCT__ 2220 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2221 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2222 { 2223 PetscFunctionBegin; 2224 PetscFunctionReturn(0); 2225 } 2226 2227 #undef __FUNCT__ 2228 #define __FUNCT__ "MatSolve_MPIAIJ" 2229 /* 2230 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2231 */ 2232 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2233 { 2234 namespace graph_dist = boost::graph::distributed; 2235 2236 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2237 lgraph_type* lgraph_p; 2238 PetscContainer c; 2239 PetscErrorCode ierr; 2240 2241 PetscFunctionBegin; 2242 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2243 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2244 ierr = VecCopy(b, x);CHKERRQ(ierr); 2245 2246 PetscScalar* array_x; 2247 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2248 PetscInt sx; 2249 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2250 2251 PetscScalar* array_b; 2252 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2253 PetscInt sb; 2254 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2255 2256 lgraph_type& level_graph = *lgraph_p; 2257 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2258 2259 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2260 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2261 ref_x(array_x, boost::extents[num_vertices(graph)]); 2262 2263 typedef boost::iterator_property_map<array_ref_type::iterator, 2264 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2265 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2266 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2267 2268 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2269 2270 PetscFunctionReturn(0); 2271 } 2272 #endif 2273 2274 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2275 PetscInt nzlocal,nsends,nrecvs; 2276 PetscMPIInt *send_rank,*recv_rank; 2277 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2278 PetscScalar *sbuf_a,**rbuf_a; 2279 PetscErrorCode (*MatDestroy)(Mat); 2280 } Mat_Redundant; 2281 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2284 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2285 { 2286 PetscErrorCode ierr; 2287 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2288 PetscInt i; 2289 2290 PetscFunctionBegin; 2291 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2292 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2293 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2294 for (i=0; i<redund->nrecvs; i++){ 2295 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2296 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2297 } 2298 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2299 ierr = PetscFree(redund);CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatDestroy_MatRedundant" 2305 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2306 { 2307 PetscErrorCode ierr; 2308 PetscContainer container; 2309 Mat_Redundant *redund=PETSC_NULL; 2310 2311 PetscFunctionBegin; 2312 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2313 if (container) { 2314 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2315 } else { 2316 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2317 } 2318 A->ops->destroy = redund->MatDestroy; 2319 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2320 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2321 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2322 PetscFunctionReturn(0); 2323 } 2324 2325 #undef __FUNCT__ 2326 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2327 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2328 { 2329 PetscMPIInt rank,size; 2330 MPI_Comm comm=((PetscObject)mat)->comm; 2331 PetscErrorCode ierr; 2332 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2333 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2334 PetscInt *rowrange=mat->rmap->range; 2335 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2336 Mat A=aij->A,B=aij->B,C=*matredundant; 2337 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2338 PetscScalar *sbuf_a; 2339 PetscInt nzlocal=a->nz+b->nz; 2340 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2341 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2342 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2343 MatScalar *aworkA,*aworkB; 2344 PetscScalar *vals; 2345 PetscMPIInt tag1,tag2,tag3,imdex; 2346 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2347 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2348 MPI_Status recv_status,*send_status; 2349 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2350 PetscInt **rbuf_j=PETSC_NULL; 2351 PetscScalar **rbuf_a=PETSC_NULL; 2352 Mat_Redundant *redund=PETSC_NULL; 2353 PetscContainer container; 2354 2355 PetscFunctionBegin; 2356 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2357 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2358 2359 if (reuse == MAT_REUSE_MATRIX) { 2360 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2361 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2362 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2363 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2364 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2365 if (container) { 2366 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2367 } else { 2368 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2369 } 2370 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2371 2372 nsends = redund->nsends; 2373 nrecvs = redund->nrecvs; 2374 send_rank = redund->send_rank; 2375 recv_rank = redund->recv_rank; 2376 sbuf_nz = redund->sbuf_nz; 2377 rbuf_nz = redund->rbuf_nz; 2378 sbuf_j = redund->sbuf_j; 2379 sbuf_a = redund->sbuf_a; 2380 rbuf_j = redund->rbuf_j; 2381 rbuf_a = redund->rbuf_a; 2382 } 2383 2384 if (reuse == MAT_INITIAL_MATRIX){ 2385 PetscMPIInt subrank,subsize; 2386 PetscInt nleftover,np_subcomm; 2387 /* get the destination processors' id send_rank, nsends and nrecvs */ 2388 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2389 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2390 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2391 np_subcomm = size/nsubcomm; 2392 nleftover = size - nsubcomm*np_subcomm; 2393 nsends = 0; nrecvs = 0; 2394 for (i=0; i<size; i++){ /* i=rank*/ 2395 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2396 send_rank[nsends] = i; nsends++; 2397 recv_rank[nrecvs++] = i; 2398 } 2399 } 2400 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2401 i = size-nleftover-1; 2402 j = 0; 2403 while (j < nsubcomm - nleftover){ 2404 send_rank[nsends++] = i; 2405 i--; j++; 2406 } 2407 } 2408 2409 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2410 for (i=0; i<nleftover; i++){ 2411 recv_rank[nrecvs++] = size-nleftover+i; 2412 } 2413 } 2414 2415 /* allocate sbuf_j, sbuf_a */ 2416 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2417 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2418 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2419 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2420 2421 /* copy mat's local entries into the buffers */ 2422 if (reuse == MAT_INITIAL_MATRIX){ 2423 rownz_max = 0; 2424 rptr = sbuf_j; 2425 cols = sbuf_j + rend-rstart + 1; 2426 vals = sbuf_a; 2427 rptr[0] = 0; 2428 for (i=0; i<rend-rstart; i++){ 2429 row = i + rstart; 2430 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2431 ncols = nzA + nzB; 2432 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2433 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2434 /* load the column indices for this row into cols */ 2435 lwrite = 0; 2436 for (l=0; l<nzB; l++) { 2437 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2438 vals[lwrite] = aworkB[l]; 2439 cols[lwrite++] = ctmp; 2440 } 2441 } 2442 for (l=0; l<nzA; l++){ 2443 vals[lwrite] = aworkA[l]; 2444 cols[lwrite++] = cstart + cworkA[l]; 2445 } 2446 for (l=0; l<nzB; l++) { 2447 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2448 vals[lwrite] = aworkB[l]; 2449 cols[lwrite++] = ctmp; 2450 } 2451 } 2452 vals += ncols; 2453 cols += ncols; 2454 rptr[i+1] = rptr[i] + ncols; 2455 if (rownz_max < ncols) rownz_max = ncols; 2456 } 2457 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2458 } else { /* only copy matrix values into sbuf_a */ 2459 rptr = sbuf_j; 2460 vals = sbuf_a; 2461 rptr[0] = 0; 2462 for (i=0; i<rend-rstart; i++){ 2463 row = i + rstart; 2464 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2465 ncols = nzA + nzB; 2466 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2467 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2468 lwrite = 0; 2469 for (l=0; l<nzB; l++) { 2470 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2471 } 2472 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2473 for (l=0; l<nzB; l++) { 2474 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2475 } 2476 vals += ncols; 2477 rptr[i+1] = rptr[i] + ncols; 2478 } 2479 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2480 2481 /* send nzlocal to others, and recv other's nzlocal */ 2482 /*--------------------------------------------------*/ 2483 if (reuse == MAT_INITIAL_MATRIX){ 2484 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2485 s_waits2 = s_waits3 + nsends; 2486 s_waits1 = s_waits2 + nsends; 2487 r_waits1 = s_waits1 + nsends; 2488 r_waits2 = r_waits1 + nrecvs; 2489 r_waits3 = r_waits2 + nrecvs; 2490 } else { 2491 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2492 r_waits3 = s_waits3 + nsends; 2493 } 2494 2495 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2496 if (reuse == MAT_INITIAL_MATRIX){ 2497 /* get new tags to keep the communication clean */ 2498 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2499 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2500 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2501 2502 /* post receives of other's nzlocal */ 2503 for (i=0; i<nrecvs; i++){ 2504 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2505 } 2506 /* send nzlocal to others */ 2507 for (i=0; i<nsends; i++){ 2508 sbuf_nz[i] = nzlocal; 2509 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2510 } 2511 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2512 count = nrecvs; 2513 while (count) { 2514 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2515 recv_rank[imdex] = recv_status.MPI_SOURCE; 2516 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2517 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2518 2519 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2520 rbuf_nz[imdex] += i + 2; 2521 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2522 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2523 count--; 2524 } 2525 /* wait on sends of nzlocal */ 2526 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2527 /* send mat->i,j to others, and recv from other's */ 2528 /*------------------------------------------------*/ 2529 for (i=0; i<nsends; i++){ 2530 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2531 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2532 } 2533 /* wait on receives of mat->i,j */ 2534 /*------------------------------*/ 2535 count = nrecvs; 2536 while (count) { 2537 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2538 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2539 count--; 2540 } 2541 /* wait on sends of mat->i,j */ 2542 /*---------------------------*/ 2543 if (nsends) { 2544 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2545 } 2546 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2547 2548 /* post receives, send and receive mat->a */ 2549 /*----------------------------------------*/ 2550 for (imdex=0; imdex<nrecvs; imdex++) { 2551 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2552 } 2553 for (i=0; i<nsends; i++){ 2554 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2555 } 2556 count = nrecvs; 2557 while (count) { 2558 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2559 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2560 count--; 2561 } 2562 if (nsends) { 2563 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2564 } 2565 2566 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2567 2568 /* create redundant matrix */ 2569 /*-------------------------*/ 2570 if (reuse == MAT_INITIAL_MATRIX){ 2571 /* compute rownz_max for preallocation */ 2572 for (imdex=0; imdex<nrecvs; imdex++){ 2573 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2574 rptr = rbuf_j[imdex]; 2575 for (i=0; i<j; i++){ 2576 ncols = rptr[i+1] - rptr[i]; 2577 if (rownz_max < ncols) rownz_max = ncols; 2578 } 2579 } 2580 2581 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2582 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2583 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2584 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2585 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2586 } else { 2587 C = *matredundant; 2588 } 2589 2590 /* insert local matrix entries */ 2591 rptr = sbuf_j; 2592 cols = sbuf_j + rend-rstart + 1; 2593 vals = sbuf_a; 2594 for (i=0; i<rend-rstart; i++){ 2595 row = i + rstart; 2596 ncols = rptr[i+1] - rptr[i]; 2597 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2598 vals += ncols; 2599 cols += ncols; 2600 } 2601 /* insert received matrix entries */ 2602 for (imdex=0; imdex<nrecvs; imdex++){ 2603 rstart = rowrange[recv_rank[imdex]]; 2604 rend = rowrange[recv_rank[imdex]+1]; 2605 rptr = rbuf_j[imdex]; 2606 cols = rbuf_j[imdex] + rend-rstart + 1; 2607 vals = rbuf_a[imdex]; 2608 for (i=0; i<rend-rstart; i++){ 2609 row = i + rstart; 2610 ncols = rptr[i+1] - rptr[i]; 2611 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2612 vals += ncols; 2613 cols += ncols; 2614 } 2615 } 2616 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2617 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2618 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2619 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2620 if (reuse == MAT_INITIAL_MATRIX){ 2621 PetscContainer container; 2622 *matredundant = C; 2623 /* create a supporting struct and attach it to C for reuse */ 2624 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2625 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2626 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2627 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2628 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2629 2630 redund->nzlocal = nzlocal; 2631 redund->nsends = nsends; 2632 redund->nrecvs = nrecvs; 2633 redund->send_rank = send_rank; 2634 redund->recv_rank = recv_rank; 2635 redund->sbuf_nz = sbuf_nz; 2636 redund->rbuf_nz = rbuf_nz; 2637 redund->sbuf_j = sbuf_j; 2638 redund->sbuf_a = sbuf_a; 2639 redund->rbuf_j = rbuf_j; 2640 redund->rbuf_a = rbuf_a; 2641 2642 redund->MatDestroy = C->ops->destroy; 2643 C->ops->destroy = MatDestroy_MatRedundant; 2644 } 2645 PetscFunctionReturn(0); 2646 } 2647 2648 #undef __FUNCT__ 2649 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2650 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2651 { 2652 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2653 PetscErrorCode ierr; 2654 PetscInt i,*idxb = 0; 2655 PetscScalar *va,*vb; 2656 Vec vtmp; 2657 2658 PetscFunctionBegin; 2659 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2660 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2661 if (idx) { 2662 for (i=0; i<A->rmap->n; i++) { 2663 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2664 } 2665 } 2666 2667 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2668 if (idx) { 2669 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2670 } 2671 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2672 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2673 2674 for (i=0; i<A->rmap->n; i++){ 2675 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2676 va[i] = vb[i]; 2677 if (idx) idx[i] = a->garray[idxb[i]]; 2678 } 2679 } 2680 2681 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2682 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2683 if (idxb) { 2684 ierr = PetscFree(idxb);CHKERRQ(ierr); 2685 } 2686 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2687 PetscFunctionReturn(0); 2688 } 2689 2690 #undef __FUNCT__ 2691 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2692 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2693 { 2694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2695 PetscErrorCode ierr; 2696 PetscInt i,*idxb = 0; 2697 PetscScalar *va,*vb; 2698 Vec vtmp; 2699 2700 PetscFunctionBegin; 2701 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2702 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2703 if (idx) { 2704 for (i=0; i<A->cmap->n; i++) { 2705 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2706 } 2707 } 2708 2709 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2710 if (idx) { 2711 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2712 } 2713 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2714 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2715 2716 for (i=0; i<A->rmap->n; i++){ 2717 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2718 va[i] = vb[i]; 2719 if (idx) idx[i] = a->garray[idxb[i]]; 2720 } 2721 } 2722 2723 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2724 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2725 if (idxb) { 2726 ierr = PetscFree(idxb);CHKERRQ(ierr); 2727 } 2728 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2729 PetscFunctionReturn(0); 2730 } 2731 2732 #undef __FUNCT__ 2733 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2734 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2735 { 2736 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2737 PetscInt n = A->rmap->n; 2738 PetscInt cstart = A->cmap->rstart; 2739 PetscInt *cmap = mat->garray; 2740 PetscInt *diagIdx, *offdiagIdx; 2741 Vec diagV, offdiagV; 2742 PetscScalar *a, *diagA, *offdiagA; 2743 PetscInt r; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2748 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2749 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2750 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2751 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2752 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2753 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2754 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2755 for(r = 0; r < n; ++r) { 2756 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2757 a[r] = diagA[r]; 2758 idx[r] = cstart + diagIdx[r]; 2759 } else { 2760 a[r] = offdiagA[r]; 2761 idx[r] = cmap[offdiagIdx[r]]; 2762 } 2763 } 2764 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2765 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2766 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2767 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2768 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2769 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2770 PetscFunctionReturn(0); 2771 } 2772 2773 #undef __FUNCT__ 2774 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2775 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2776 { 2777 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2778 PetscInt n = A->rmap->n; 2779 PetscInt cstart = A->cmap->rstart; 2780 PetscInt *cmap = mat->garray; 2781 PetscInt *diagIdx, *offdiagIdx; 2782 Vec diagV, offdiagV; 2783 PetscScalar *a, *diagA, *offdiagA; 2784 PetscInt r; 2785 PetscErrorCode ierr; 2786 2787 PetscFunctionBegin; 2788 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2789 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2790 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2791 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2792 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2793 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2794 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2795 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2796 for(r = 0; r < n; ++r) { 2797 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2798 a[r] = diagA[r]; 2799 idx[r] = cstart + diagIdx[r]; 2800 } else { 2801 a[r] = offdiagA[r]; 2802 idx[r] = cmap[offdiagIdx[r]]; 2803 } 2804 } 2805 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2806 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2807 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2808 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2809 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2810 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2811 PetscFunctionReturn(0); 2812 } 2813 2814 #undef __FUNCT__ 2815 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2816 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2817 { 2818 PetscErrorCode ierr; 2819 Mat *dummy; 2820 2821 PetscFunctionBegin; 2822 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2823 *newmat = *dummy; 2824 ierr = PetscFree(dummy);CHKERRQ(ierr); 2825 PetscFunctionReturn(0); 2826 } 2827 2828 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2829 /* -------------------------------------------------------------------*/ 2830 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2831 MatGetRow_MPIAIJ, 2832 MatRestoreRow_MPIAIJ, 2833 MatMult_MPIAIJ, 2834 /* 4*/ MatMultAdd_MPIAIJ, 2835 MatMultTranspose_MPIAIJ, 2836 MatMultTransposeAdd_MPIAIJ, 2837 #ifdef PETSC_HAVE_PBGL 2838 MatSolve_MPIAIJ, 2839 #else 2840 0, 2841 #endif 2842 0, 2843 0, 2844 /*10*/ 0, 2845 0, 2846 0, 2847 MatSOR_MPIAIJ, 2848 MatTranspose_MPIAIJ, 2849 /*15*/ MatGetInfo_MPIAIJ, 2850 MatEqual_MPIAIJ, 2851 MatGetDiagonal_MPIAIJ, 2852 MatDiagonalScale_MPIAIJ, 2853 MatNorm_MPIAIJ, 2854 /*20*/ MatAssemblyBegin_MPIAIJ, 2855 MatAssemblyEnd_MPIAIJ, 2856 MatSetOption_MPIAIJ, 2857 MatZeroEntries_MPIAIJ, 2858 /*24*/ MatZeroRows_MPIAIJ, 2859 0, 2860 #ifdef PETSC_HAVE_PBGL 2861 0, 2862 #else 2863 0, 2864 #endif 2865 0, 2866 0, 2867 /*29*/ MatSetUpPreallocation_MPIAIJ, 2868 #ifdef PETSC_HAVE_PBGL 2869 0, 2870 #else 2871 0, 2872 #endif 2873 0, 2874 0, 2875 0, 2876 /*34*/ MatDuplicate_MPIAIJ, 2877 0, 2878 0, 2879 0, 2880 0, 2881 /*39*/ MatAXPY_MPIAIJ, 2882 MatGetSubMatrices_MPIAIJ, 2883 MatIncreaseOverlap_MPIAIJ, 2884 MatGetValues_MPIAIJ, 2885 MatCopy_MPIAIJ, 2886 /*44*/ MatGetRowMax_MPIAIJ, 2887 MatScale_MPIAIJ, 2888 0, 2889 0, 2890 MatZeroRowsColumns_MPIAIJ, 2891 /*49*/ MatSetBlockSize_MPIAIJ, 2892 0, 2893 0, 2894 0, 2895 0, 2896 /*54*/ MatFDColoringCreate_MPIAIJ, 2897 0, 2898 MatSetUnfactored_MPIAIJ, 2899 MatPermute_MPIAIJ, 2900 0, 2901 /*59*/ MatGetSubMatrix_MPIAIJ, 2902 MatDestroy_MPIAIJ, 2903 MatView_MPIAIJ, 2904 0, 2905 0, 2906 /*64*/ 0, 2907 0, 2908 0, 2909 0, 2910 0, 2911 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2912 MatGetRowMinAbs_MPIAIJ, 2913 0, 2914 MatSetColoring_MPIAIJ, 2915 #if defined(PETSC_HAVE_ADIC) 2916 MatSetValuesAdic_MPIAIJ, 2917 #else 2918 0, 2919 #endif 2920 MatSetValuesAdifor_MPIAIJ, 2921 /*75*/ MatFDColoringApply_AIJ, 2922 0, 2923 0, 2924 0, 2925 0, 2926 /*80*/ 0, 2927 0, 2928 0, 2929 /*83*/ MatLoad_MPIAIJ, 2930 0, 2931 0, 2932 0, 2933 0, 2934 0, 2935 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2936 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2937 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2938 MatPtAP_Basic, 2939 MatPtAPSymbolic_MPIAIJ, 2940 /*94*/ MatPtAPNumeric_MPIAIJ, 2941 0, 2942 0, 2943 0, 2944 0, 2945 /*99*/ 0, 2946 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2947 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2948 MatConjugate_MPIAIJ, 2949 0, 2950 /*104*/MatSetValuesRow_MPIAIJ, 2951 MatRealPart_MPIAIJ, 2952 MatImaginaryPart_MPIAIJ, 2953 0, 2954 0, 2955 /*109*/0, 2956 MatGetRedundantMatrix_MPIAIJ, 2957 MatGetRowMin_MPIAIJ, 2958 0, 2959 0, 2960 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2961 0, 2962 0, 2963 0, 2964 0, 2965 /*119*/0, 2966 0, 2967 0, 2968 0, 2969 MatGetMultiProcBlock_MPIAIJ 2970 }; 2971 2972 /* ----------------------------------------------------------------------------------------*/ 2973 2974 EXTERN_C_BEGIN 2975 #undef __FUNCT__ 2976 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2977 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2978 { 2979 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2980 PetscErrorCode ierr; 2981 2982 PetscFunctionBegin; 2983 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2984 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2985 PetscFunctionReturn(0); 2986 } 2987 EXTERN_C_END 2988 2989 EXTERN_C_BEGIN 2990 #undef __FUNCT__ 2991 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2992 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2993 { 2994 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2995 PetscErrorCode ierr; 2996 2997 PetscFunctionBegin; 2998 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2999 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3000 PetscFunctionReturn(0); 3001 } 3002 EXTERN_C_END 3003 3004 EXTERN_C_BEGIN 3005 #undef __FUNCT__ 3006 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3007 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3008 { 3009 Mat_MPIAIJ *b; 3010 PetscErrorCode ierr; 3011 PetscInt i; 3012 3013 PetscFunctionBegin; 3014 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3015 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3016 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3017 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3018 3019 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3020 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3021 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3022 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3023 if (d_nnz) { 3024 for (i=0; i<B->rmap->n; i++) { 3025 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3026 } 3027 } 3028 if (o_nnz) { 3029 for (i=0; i<B->rmap->n; i++) { 3030 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3031 } 3032 } 3033 b = (Mat_MPIAIJ*)B->data; 3034 3035 if (!B->preallocated) { 3036 /* Explicitly create 2 MATSEQAIJ matrices. */ 3037 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3038 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3039 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3040 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3041 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3042 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3043 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3044 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3045 } 3046 3047 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3048 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3049 B->preallocated = PETSC_TRUE; 3050 PetscFunctionReturn(0); 3051 } 3052 EXTERN_C_END 3053 3054 #undef __FUNCT__ 3055 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3056 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3057 { 3058 Mat mat; 3059 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3060 PetscErrorCode ierr; 3061 3062 PetscFunctionBegin; 3063 *newmat = 0; 3064 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3065 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3066 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3067 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3068 a = (Mat_MPIAIJ*)mat->data; 3069 3070 mat->factortype = matin->factortype; 3071 mat->rmap->bs = matin->rmap->bs; 3072 mat->assembled = PETSC_TRUE; 3073 mat->insertmode = NOT_SET_VALUES; 3074 mat->preallocated = PETSC_TRUE; 3075 3076 a->size = oldmat->size; 3077 a->rank = oldmat->rank; 3078 a->donotstash = oldmat->donotstash; 3079 a->roworiented = oldmat->roworiented; 3080 a->rowindices = 0; 3081 a->rowvalues = 0; 3082 a->getrowactive = PETSC_FALSE; 3083 3084 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3085 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3086 3087 if (oldmat->colmap) { 3088 #if defined (PETSC_USE_CTABLE) 3089 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3090 #else 3091 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3092 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3093 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3094 #endif 3095 } else a->colmap = 0; 3096 if (oldmat->garray) { 3097 PetscInt len; 3098 len = oldmat->B->cmap->n; 3099 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3100 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3101 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3102 } else a->garray = 0; 3103 3104 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3105 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3106 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3107 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3108 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3109 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3110 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3111 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3112 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3113 *newmat = mat; 3114 PetscFunctionReturn(0); 3115 } 3116 3117 /* 3118 Allows sending/receiving larger messages then 2 gigabytes in a single call 3119 */ 3120 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 3121 { 3122 int ierr; 3123 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3124 PetscInt i,numchunks; 3125 PetscMPIInt icnt; 3126 3127 numchunks = cnt/CHUNKSIZE + 1; 3128 for (i=0; i<numchunks; i++) { 3129 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3130 ierr = MPI_Send(mess,icnt,type,to,tag,comm); 3131 if (type == MPIU_INT) { 3132 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3133 } else if (type == MPIU_SCALAR) { 3134 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3135 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3136 } 3137 return 0; 3138 } 3139 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 3140 { 3141 int ierr; 3142 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3143 MPI_Status status; 3144 PetscInt i,numchunks; 3145 PetscMPIInt icnt; 3146 3147 numchunks = cnt/CHUNKSIZE + 1; 3148 for (i=0; i<numchunks; i++) { 3149 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3150 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status); 3151 if (type == MPIU_INT) { 3152 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3153 } else if (type == MPIU_SCALAR) { 3154 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3155 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3156 } 3157 return 0; 3158 } 3159 3160 #undef __FUNCT__ 3161 #define __FUNCT__ "MatLoad_MPIAIJ" 3162 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3163 { 3164 PetscScalar *vals,*svals; 3165 MPI_Comm comm = ((PetscObject)viewer)->comm; 3166 PetscErrorCode ierr; 3167 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3168 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3169 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3170 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3171 PetscInt cend,cstart,n,*rowners,sizesset=1; 3172 int fd; 3173 3174 PetscFunctionBegin; 3175 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3176 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3177 if (!rank) { 3178 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3179 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3180 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3181 } 3182 3183 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3184 3185 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3186 M = header[1]; N = header[2]; 3187 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3188 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3189 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3190 3191 /* If global sizes are set, check if they are consistent with that given in the file */ 3192 if (sizesset) { 3193 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3194 } 3195 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3196 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3197 3198 /* determine ownership of all rows */ 3199 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3200 else m = newMat->rmap->n; /* Set by user */ 3201 3202 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3203 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3204 3205 /* First process needs enough room for process with most rows */ 3206 if (!rank) { 3207 mmax = rowners[1]; 3208 for (i=2; i<size; i++) { 3209 mmax = PetscMax(mmax,rowners[i]); 3210 } 3211 } else mmax = m; 3212 3213 rowners[0] = 0; 3214 for (i=2; i<=size; i++) { 3215 rowners[i] += rowners[i-1]; 3216 } 3217 rstart = rowners[rank]; 3218 rend = rowners[rank+1]; 3219 3220 /* distribute row lengths to all processors */ 3221 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3222 if (!rank) { 3223 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3224 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3225 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3226 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3227 for (j=0; j<m; j++) { 3228 procsnz[0] += ourlens[j]; 3229 } 3230 for (i=1; i<size; i++) { 3231 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3232 /* calculate the number of nonzeros on each processor */ 3233 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3234 procsnz[i] += rowlengths[j]; 3235 } 3236 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3237 } 3238 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3239 } else { 3240 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3241 } 3242 3243 if (!rank) { 3244 /* determine max buffer needed and allocate it */ 3245 maxnz = 0; 3246 for (i=0; i<size; i++) { 3247 maxnz = PetscMax(maxnz,procsnz[i]); 3248 } 3249 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3250 3251 /* read in my part of the matrix column indices */ 3252 nz = procsnz[0]; 3253 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3254 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3255 3256 /* read in every one elses and ship off */ 3257 for (i=1; i<size; i++) { 3258 nz = procsnz[i]; 3259 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3260 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3261 } 3262 ierr = PetscFree(cols);CHKERRQ(ierr); 3263 } else { 3264 /* determine buffer space needed for message */ 3265 nz = 0; 3266 for (i=0; i<m; i++) { 3267 nz += ourlens[i]; 3268 } 3269 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3270 3271 /* receive message of column indices*/ 3272 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3273 } 3274 3275 /* determine column ownership if matrix is not square */ 3276 if (N != M) { 3277 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3278 else n = newMat->cmap->n; 3279 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3280 cstart = cend - n; 3281 } else { 3282 cstart = rstart; 3283 cend = rend; 3284 n = cend - cstart; 3285 } 3286 3287 /* loop over local rows, determining number of off diagonal entries */ 3288 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3289 jj = 0; 3290 for (i=0; i<m; i++) { 3291 for (j=0; j<ourlens[i]; j++) { 3292 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3293 jj++; 3294 } 3295 } 3296 3297 for (i=0; i<m; i++) { 3298 ourlens[i] -= offlens[i]; 3299 } 3300 if (!sizesset) { 3301 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3302 } 3303 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3304 3305 for (i=0; i<m; i++) { 3306 ourlens[i] += offlens[i]; 3307 } 3308 3309 if (!rank) { 3310 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3311 3312 /* read in my part of the matrix numerical values */ 3313 nz = procsnz[0]; 3314 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3315 3316 /* insert into matrix */ 3317 jj = rstart; 3318 smycols = mycols; 3319 svals = vals; 3320 for (i=0; i<m; i++) { 3321 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3322 smycols += ourlens[i]; 3323 svals += ourlens[i]; 3324 jj++; 3325 } 3326 3327 /* read in other processors and ship out */ 3328 for (i=1; i<size; i++) { 3329 nz = procsnz[i]; 3330 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3331 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3332 } 3333 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3334 } else { 3335 /* receive numeric values */ 3336 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3337 3338 /* receive message of values*/ 3339 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3340 3341 /* insert into matrix */ 3342 jj = rstart; 3343 smycols = mycols; 3344 svals = vals; 3345 for (i=0; i<m; i++) { 3346 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3347 smycols += ourlens[i]; 3348 svals += ourlens[i]; 3349 jj++; 3350 } 3351 } 3352 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3353 ierr = PetscFree(vals);CHKERRQ(ierr); 3354 ierr = PetscFree(mycols);CHKERRQ(ierr); 3355 ierr = PetscFree(rowners);CHKERRQ(ierr); 3356 3357 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3358 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3359 PetscFunctionReturn(0); 3360 } 3361 3362 #undef __FUNCT__ 3363 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3364 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3365 { 3366 PetscErrorCode ierr; 3367 IS iscol_local; 3368 PetscInt csize; 3369 3370 PetscFunctionBegin; 3371 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3372 if (call == MAT_REUSE_MATRIX) { 3373 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3374 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3375 } else { 3376 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3377 } 3378 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3379 if (call == MAT_INITIAL_MATRIX) { 3380 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3381 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3382 } 3383 PetscFunctionReturn(0); 3384 } 3385 3386 #undef __FUNCT__ 3387 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3388 /* 3389 Not great since it makes two copies of the submatrix, first an SeqAIJ 3390 in local and then by concatenating the local matrices the end result. 3391 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3392 3393 Note: This requires a sequential iscol with all indices. 3394 */ 3395 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3396 { 3397 PetscErrorCode ierr; 3398 PetscMPIInt rank,size; 3399 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3400 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3401 Mat *local,M,Mreuse; 3402 MatScalar *vwork,*aa; 3403 MPI_Comm comm = ((PetscObject)mat)->comm; 3404 Mat_SeqAIJ *aij; 3405 3406 3407 PetscFunctionBegin; 3408 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3409 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3410 3411 if (call == MAT_REUSE_MATRIX) { 3412 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3413 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3414 local = &Mreuse; 3415 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3416 } else { 3417 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3418 Mreuse = *local; 3419 ierr = PetscFree(local);CHKERRQ(ierr); 3420 } 3421 3422 /* 3423 m - number of local rows 3424 n - number of columns (same on all processors) 3425 rstart - first row in new global matrix generated 3426 */ 3427 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3428 if (call == MAT_INITIAL_MATRIX) { 3429 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3430 ii = aij->i; 3431 jj = aij->j; 3432 3433 /* 3434 Determine the number of non-zeros in the diagonal and off-diagonal 3435 portions of the matrix in order to do correct preallocation 3436 */ 3437 3438 /* first get start and end of "diagonal" columns */ 3439 if (csize == PETSC_DECIDE) { 3440 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3441 if (mglobal == n) { /* square matrix */ 3442 nlocal = m; 3443 } else { 3444 nlocal = n/size + ((n % size) > rank); 3445 } 3446 } else { 3447 nlocal = csize; 3448 } 3449 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3450 rstart = rend - nlocal; 3451 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3452 3453 /* next, compute all the lengths */ 3454 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3455 olens = dlens + m; 3456 for (i=0; i<m; i++) { 3457 jend = ii[i+1] - ii[i]; 3458 olen = 0; 3459 dlen = 0; 3460 for (j=0; j<jend; j++) { 3461 if (*jj < rstart || *jj >= rend) olen++; 3462 else dlen++; 3463 jj++; 3464 } 3465 olens[i] = olen; 3466 dlens[i] = dlen; 3467 } 3468 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3469 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3470 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3471 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3472 ierr = PetscFree(dlens);CHKERRQ(ierr); 3473 } else { 3474 PetscInt ml,nl; 3475 3476 M = *newmat; 3477 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3478 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3479 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3480 /* 3481 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3482 rather than the slower MatSetValues(). 3483 */ 3484 M->was_assembled = PETSC_TRUE; 3485 M->assembled = PETSC_FALSE; 3486 } 3487 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3488 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3489 ii = aij->i; 3490 jj = aij->j; 3491 aa = aij->a; 3492 for (i=0; i<m; i++) { 3493 row = rstart + i; 3494 nz = ii[i+1] - ii[i]; 3495 cwork = jj; jj += nz; 3496 vwork = aa; aa += nz; 3497 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3498 } 3499 3500 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3501 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3502 *newmat = M; 3503 3504 /* save submatrix used in processor for next request */ 3505 if (call == MAT_INITIAL_MATRIX) { 3506 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3507 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3508 } 3509 3510 PetscFunctionReturn(0); 3511 } 3512 3513 EXTERN_C_BEGIN 3514 #undef __FUNCT__ 3515 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3516 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3517 { 3518 PetscInt m,cstart, cend,j,nnz,i,d; 3519 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3520 const PetscInt *JJ; 3521 PetscScalar *values; 3522 PetscErrorCode ierr; 3523 3524 PetscFunctionBegin; 3525 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3526 3527 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3528 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3529 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3530 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3531 m = B->rmap->n; 3532 cstart = B->cmap->rstart; 3533 cend = B->cmap->rend; 3534 rstart = B->rmap->rstart; 3535 3536 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3537 3538 #if defined(PETSC_USE_DEBUGGING) 3539 for (i=0; i<m; i++) { 3540 nnz = Ii[i+1]- Ii[i]; 3541 JJ = J + Ii[i]; 3542 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3543 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3544 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3545 } 3546 #endif 3547 3548 for (i=0; i<m; i++) { 3549 nnz = Ii[i+1]- Ii[i]; 3550 JJ = J + Ii[i]; 3551 nnz_max = PetscMax(nnz_max,nnz); 3552 d = 0; 3553 for (j=0; j<nnz; j++) { 3554 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3555 } 3556 d_nnz[i] = d; 3557 o_nnz[i] = nnz - d; 3558 } 3559 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3560 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3561 3562 if (v) values = (PetscScalar*)v; 3563 else { 3564 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3565 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3566 } 3567 3568 for (i=0; i<m; i++) { 3569 ii = i + rstart; 3570 nnz = Ii[i+1]- Ii[i]; 3571 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3572 } 3573 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3574 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3575 3576 if (!v) { 3577 ierr = PetscFree(values);CHKERRQ(ierr); 3578 } 3579 PetscFunctionReturn(0); 3580 } 3581 EXTERN_C_END 3582 3583 #undef __FUNCT__ 3584 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3585 /*@ 3586 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3587 (the default parallel PETSc format). 3588 3589 Collective on MPI_Comm 3590 3591 Input Parameters: 3592 + B - the matrix 3593 . i - the indices into j for the start of each local row (starts with zero) 3594 . j - the column indices for each local row (starts with zero) 3595 - v - optional values in the matrix 3596 3597 Level: developer 3598 3599 Notes: 3600 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3601 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3602 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3603 3604 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3605 3606 The format which is used for the sparse matrix input, is equivalent to a 3607 row-major ordering.. i.e for the following matrix, the input data expected is 3608 as shown: 3609 3610 1 0 0 3611 2 0 3 P0 3612 ------- 3613 4 5 6 P1 3614 3615 Process0 [P0]: rows_owned=[0,1] 3616 i = {0,1,3} [size = nrow+1 = 2+1] 3617 j = {0,0,2} [size = nz = 6] 3618 v = {1,2,3} [size = nz = 6] 3619 3620 Process1 [P1]: rows_owned=[2] 3621 i = {0,3} [size = nrow+1 = 1+1] 3622 j = {0,1,2} [size = nz = 6] 3623 v = {4,5,6} [size = nz = 6] 3624 3625 .keywords: matrix, aij, compressed row, sparse, parallel 3626 3627 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3628 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3629 @*/ 3630 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3631 { 3632 PetscErrorCode ierr; 3633 3634 PetscFunctionBegin; 3635 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3636 PetscFunctionReturn(0); 3637 } 3638 3639 #undef __FUNCT__ 3640 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3641 /*@C 3642 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3643 (the default parallel PETSc format). For good matrix assembly performance 3644 the user should preallocate the matrix storage by setting the parameters 3645 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3646 performance can be increased by more than a factor of 50. 3647 3648 Collective on MPI_Comm 3649 3650 Input Parameters: 3651 + A - the matrix 3652 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3653 (same value is used for all local rows) 3654 . d_nnz - array containing the number of nonzeros in the various rows of the 3655 DIAGONAL portion of the local submatrix (possibly different for each row) 3656 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3657 The size of this array is equal to the number of local rows, i.e 'm'. 3658 You must leave room for the diagonal entry even if it is zero. 3659 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3660 submatrix (same value is used for all local rows). 3661 - o_nnz - array containing the number of nonzeros in the various rows of the 3662 OFF-DIAGONAL portion of the local submatrix (possibly different for 3663 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3664 structure. The size of this array is equal to the number 3665 of local rows, i.e 'm'. 3666 3667 If the *_nnz parameter is given then the *_nz parameter is ignored 3668 3669 The AIJ format (also called the Yale sparse matrix format or 3670 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3671 storage. The stored row and column indices begin with zero. 3672 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3673 3674 The parallel matrix is partitioned such that the first m0 rows belong to 3675 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3676 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3677 3678 The DIAGONAL portion of the local submatrix of a processor can be defined 3679 as the submatrix which is obtained by extraction the part corresponding to 3680 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3681 first row that belongs to the processor, r2 is the last row belonging to 3682 the this processor, and c1-c2 is range of indices of the local part of a 3683 vector suitable for applying the matrix to. This is an mxn matrix. In the 3684 common case of a square matrix, the row and column ranges are the same and 3685 the DIAGONAL part is also square. The remaining portion of the local 3686 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3687 3688 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3689 3690 You can call MatGetInfo() to get information on how effective the preallocation was; 3691 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3692 You can also run with the option -info and look for messages with the string 3693 malloc in them to see if additional memory allocation was needed. 3694 3695 Example usage: 3696 3697 Consider the following 8x8 matrix with 34 non-zero values, that is 3698 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3699 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3700 as follows: 3701 3702 .vb 3703 1 2 0 | 0 3 0 | 0 4 3704 Proc0 0 5 6 | 7 0 0 | 8 0 3705 9 0 10 | 11 0 0 | 12 0 3706 ------------------------------------- 3707 13 0 14 | 15 16 17 | 0 0 3708 Proc1 0 18 0 | 19 20 21 | 0 0 3709 0 0 0 | 22 23 0 | 24 0 3710 ------------------------------------- 3711 Proc2 25 26 27 | 0 0 28 | 29 0 3712 30 0 0 | 31 32 33 | 0 34 3713 .ve 3714 3715 This can be represented as a collection of submatrices as: 3716 3717 .vb 3718 A B C 3719 D E F 3720 G H I 3721 .ve 3722 3723 Where the submatrices A,B,C are owned by proc0, D,E,F are 3724 owned by proc1, G,H,I are owned by proc2. 3725 3726 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3727 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3728 The 'M','N' parameters are 8,8, and have the same values on all procs. 3729 3730 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3731 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3732 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3733 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3734 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3735 matrix, ans [DF] as another SeqAIJ matrix. 3736 3737 When d_nz, o_nz parameters are specified, d_nz storage elements are 3738 allocated for every row of the local diagonal submatrix, and o_nz 3739 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3740 One way to choose d_nz and o_nz is to use the max nonzerors per local 3741 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3742 In this case, the values of d_nz,o_nz are: 3743 .vb 3744 proc0 : dnz = 2, o_nz = 2 3745 proc1 : dnz = 3, o_nz = 2 3746 proc2 : dnz = 1, o_nz = 4 3747 .ve 3748 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3749 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3750 for proc3. i.e we are using 12+15+10=37 storage locations to store 3751 34 values. 3752 3753 When d_nnz, o_nnz parameters are specified, the storage is specified 3754 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3755 In the above case the values for d_nnz,o_nnz are: 3756 .vb 3757 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3758 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3759 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3760 .ve 3761 Here the space allocated is sum of all the above values i.e 34, and 3762 hence pre-allocation is perfect. 3763 3764 Level: intermediate 3765 3766 .keywords: matrix, aij, compressed row, sparse, parallel 3767 3768 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3769 MPIAIJ, MatGetInfo() 3770 @*/ 3771 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3772 { 3773 PetscErrorCode ierr; 3774 3775 PetscFunctionBegin; 3776 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3777 PetscFunctionReturn(0); 3778 } 3779 3780 #undef __FUNCT__ 3781 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3782 /*@ 3783 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3784 CSR format the local rows. 3785 3786 Collective on MPI_Comm 3787 3788 Input Parameters: 3789 + comm - MPI communicator 3790 . m - number of local rows (Cannot be PETSC_DECIDE) 3791 . n - This value should be the same as the local size used in creating the 3792 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3793 calculated if N is given) For square matrices n is almost always m. 3794 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3795 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3796 . i - row indices 3797 . j - column indices 3798 - a - matrix values 3799 3800 Output Parameter: 3801 . mat - the matrix 3802 3803 Level: intermediate 3804 3805 Notes: 3806 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3807 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3808 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3809 3810 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3811 3812 The format which is used for the sparse matrix input, is equivalent to a 3813 row-major ordering.. i.e for the following matrix, the input data expected is 3814 as shown: 3815 3816 1 0 0 3817 2 0 3 P0 3818 ------- 3819 4 5 6 P1 3820 3821 Process0 [P0]: rows_owned=[0,1] 3822 i = {0,1,3} [size = nrow+1 = 2+1] 3823 j = {0,0,2} [size = nz = 6] 3824 v = {1,2,3} [size = nz = 6] 3825 3826 Process1 [P1]: rows_owned=[2] 3827 i = {0,3} [size = nrow+1 = 1+1] 3828 j = {0,1,2} [size = nz = 6] 3829 v = {4,5,6} [size = nz = 6] 3830 3831 .keywords: matrix, aij, compressed row, sparse, parallel 3832 3833 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3834 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3835 @*/ 3836 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3837 { 3838 PetscErrorCode ierr; 3839 3840 PetscFunctionBegin; 3841 if (i[0]) { 3842 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3843 } 3844 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3845 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3846 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3847 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3848 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3849 PetscFunctionReturn(0); 3850 } 3851 3852 #undef __FUNCT__ 3853 #define __FUNCT__ "MatCreateMPIAIJ" 3854 /*@C 3855 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3856 (the default parallel PETSc format). For good matrix assembly performance 3857 the user should preallocate the matrix storage by setting the parameters 3858 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3859 performance can be increased by more than a factor of 50. 3860 3861 Collective on MPI_Comm 3862 3863 Input Parameters: 3864 + comm - MPI communicator 3865 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3866 This value should be the same as the local size used in creating the 3867 y vector for the matrix-vector product y = Ax. 3868 . n - This value should be the same as the local size used in creating the 3869 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3870 calculated if N is given) For square matrices n is almost always m. 3871 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3872 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3873 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3874 (same value is used for all local rows) 3875 . d_nnz - array containing the number of nonzeros in the various rows of the 3876 DIAGONAL portion of the local submatrix (possibly different for each row) 3877 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3878 The size of this array is equal to the number of local rows, i.e 'm'. 3879 You must leave room for the diagonal entry even if it is zero. 3880 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3881 submatrix (same value is used for all local rows). 3882 - o_nnz - array containing the number of nonzeros in the various rows of the 3883 OFF-DIAGONAL portion of the local submatrix (possibly different for 3884 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3885 structure. The size of this array is equal to the number 3886 of local rows, i.e 'm'. 3887 3888 Output Parameter: 3889 . A - the matrix 3890 3891 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3892 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3893 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3894 3895 Notes: 3896 If the *_nnz parameter is given then the *_nz parameter is ignored 3897 3898 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3899 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3900 storage requirements for this matrix. 3901 3902 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3903 processor than it must be used on all processors that share the object for 3904 that argument. 3905 3906 The user MUST specify either the local or global matrix dimensions 3907 (possibly both). 3908 3909 The parallel matrix is partitioned across processors such that the 3910 first m0 rows belong to process 0, the next m1 rows belong to 3911 process 1, the next m2 rows belong to process 2 etc.. where 3912 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3913 values corresponding to [m x N] submatrix. 3914 3915 The columns are logically partitioned with the n0 columns belonging 3916 to 0th partition, the next n1 columns belonging to the next 3917 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3918 3919 The DIAGONAL portion of the local submatrix on any given processor 3920 is the submatrix corresponding to the rows and columns m,n 3921 corresponding to the given processor. i.e diagonal matrix on 3922 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3923 etc. The remaining portion of the local submatrix [m x (N-n)] 3924 constitute the OFF-DIAGONAL portion. The example below better 3925 illustrates this concept. 3926 3927 For a square global matrix we define each processor's diagonal portion 3928 to be its local rows and the corresponding columns (a square submatrix); 3929 each processor's off-diagonal portion encompasses the remainder of the 3930 local matrix (a rectangular submatrix). 3931 3932 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3933 3934 When calling this routine with a single process communicator, a matrix of 3935 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3936 type of communicator, use the construction mechanism: 3937 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3938 3939 By default, this format uses inodes (identical nodes) when possible. 3940 We search for consecutive rows with the same nonzero structure, thereby 3941 reusing matrix information to achieve increased efficiency. 3942 3943 Options Database Keys: 3944 + -mat_no_inode - Do not use inodes 3945 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3946 - -mat_aij_oneindex - Internally use indexing starting at 1 3947 rather than 0. Note that when calling MatSetValues(), 3948 the user still MUST index entries starting at 0! 3949 3950 3951 Example usage: 3952 3953 Consider the following 8x8 matrix with 34 non-zero values, that is 3954 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3955 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3956 as follows: 3957 3958 .vb 3959 1 2 0 | 0 3 0 | 0 4 3960 Proc0 0 5 6 | 7 0 0 | 8 0 3961 9 0 10 | 11 0 0 | 12 0 3962 ------------------------------------- 3963 13 0 14 | 15 16 17 | 0 0 3964 Proc1 0 18 0 | 19 20 21 | 0 0 3965 0 0 0 | 22 23 0 | 24 0 3966 ------------------------------------- 3967 Proc2 25 26 27 | 0 0 28 | 29 0 3968 30 0 0 | 31 32 33 | 0 34 3969 .ve 3970 3971 This can be represented as a collection of submatrices as: 3972 3973 .vb 3974 A B C 3975 D E F 3976 G H I 3977 .ve 3978 3979 Where the submatrices A,B,C are owned by proc0, D,E,F are 3980 owned by proc1, G,H,I are owned by proc2. 3981 3982 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3983 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3984 The 'M','N' parameters are 8,8, and have the same values on all procs. 3985 3986 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3987 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3988 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3989 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3990 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3991 matrix, ans [DF] as another SeqAIJ matrix. 3992 3993 When d_nz, o_nz parameters are specified, d_nz storage elements are 3994 allocated for every row of the local diagonal submatrix, and o_nz 3995 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3996 One way to choose d_nz and o_nz is to use the max nonzerors per local 3997 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3998 In this case, the values of d_nz,o_nz are: 3999 .vb 4000 proc0 : dnz = 2, o_nz = 2 4001 proc1 : dnz = 3, o_nz = 2 4002 proc2 : dnz = 1, o_nz = 4 4003 .ve 4004 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4005 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4006 for proc3. i.e we are using 12+15+10=37 storage locations to store 4007 34 values. 4008 4009 When d_nnz, o_nnz parameters are specified, the storage is specified 4010 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4011 In the above case the values for d_nnz,o_nnz are: 4012 .vb 4013 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4014 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4015 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4016 .ve 4017 Here the space allocated is sum of all the above values i.e 34, and 4018 hence pre-allocation is perfect. 4019 4020 Level: intermediate 4021 4022 .keywords: matrix, aij, compressed row, sparse, parallel 4023 4024 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4025 MPIAIJ, MatCreateMPIAIJWithArrays() 4026 @*/ 4027 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4028 { 4029 PetscErrorCode ierr; 4030 PetscMPIInt size; 4031 4032 PetscFunctionBegin; 4033 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4034 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4035 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4036 if (size > 1) { 4037 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4038 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4039 } else { 4040 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4041 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4042 } 4043 PetscFunctionReturn(0); 4044 } 4045 4046 #undef __FUNCT__ 4047 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4048 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4049 { 4050 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4051 4052 PetscFunctionBegin; 4053 *Ad = a->A; 4054 *Ao = a->B; 4055 *colmap = a->garray; 4056 PetscFunctionReturn(0); 4057 } 4058 4059 #undef __FUNCT__ 4060 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4061 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4062 { 4063 PetscErrorCode ierr; 4064 PetscInt i; 4065 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4066 4067 PetscFunctionBegin; 4068 if (coloring->ctype == IS_COLORING_GLOBAL) { 4069 ISColoringValue *allcolors,*colors; 4070 ISColoring ocoloring; 4071 4072 /* set coloring for diagonal portion */ 4073 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4074 4075 /* set coloring for off-diagonal portion */ 4076 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4077 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4078 for (i=0; i<a->B->cmap->n; i++) { 4079 colors[i] = allcolors[a->garray[i]]; 4080 } 4081 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4082 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4083 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4084 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4085 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4086 ISColoringValue *colors; 4087 PetscInt *larray; 4088 ISColoring ocoloring; 4089 4090 /* set coloring for diagonal portion */ 4091 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4092 for (i=0; i<a->A->cmap->n; i++) { 4093 larray[i] = i + A->cmap->rstart; 4094 } 4095 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4096 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4097 for (i=0; i<a->A->cmap->n; i++) { 4098 colors[i] = coloring->colors[larray[i]]; 4099 } 4100 ierr = PetscFree(larray);CHKERRQ(ierr); 4101 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4102 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4103 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4104 4105 /* set coloring for off-diagonal portion */ 4106 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4107 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4108 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4109 for (i=0; i<a->B->cmap->n; i++) { 4110 colors[i] = coloring->colors[larray[i]]; 4111 } 4112 ierr = PetscFree(larray);CHKERRQ(ierr); 4113 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4114 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4115 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4116 } else { 4117 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4118 } 4119 4120 PetscFunctionReturn(0); 4121 } 4122 4123 #if defined(PETSC_HAVE_ADIC) 4124 #undef __FUNCT__ 4125 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4126 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4127 { 4128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4129 PetscErrorCode ierr; 4130 4131 PetscFunctionBegin; 4132 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4133 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4134 PetscFunctionReturn(0); 4135 } 4136 #endif 4137 4138 #undef __FUNCT__ 4139 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4140 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4141 { 4142 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4143 PetscErrorCode ierr; 4144 4145 PetscFunctionBegin; 4146 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4147 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4148 PetscFunctionReturn(0); 4149 } 4150 4151 #undef __FUNCT__ 4152 #define __FUNCT__ "MatMerge" 4153 /*@ 4154 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4155 matrices from each processor 4156 4157 Collective on MPI_Comm 4158 4159 Input Parameters: 4160 + comm - the communicators the parallel matrix will live on 4161 . inmat - the input sequential matrices 4162 . n - number of local columns (or PETSC_DECIDE) 4163 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4164 4165 Output Parameter: 4166 . outmat - the parallel matrix generated 4167 4168 Level: advanced 4169 4170 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4171 4172 @*/ 4173 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4174 { 4175 PetscErrorCode ierr; 4176 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4177 PetscInt *indx; 4178 PetscScalar *values; 4179 4180 PetscFunctionBegin; 4181 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4182 if (scall == MAT_INITIAL_MATRIX){ 4183 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4184 if (n == PETSC_DECIDE){ 4185 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4186 } 4187 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4188 rstart -= m; 4189 4190 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4191 for (i=0;i<m;i++) { 4192 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4193 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4194 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4195 } 4196 /* This routine will ONLY return MPIAIJ type matrix */ 4197 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4198 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4199 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4200 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4201 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4202 4203 } else if (scall == MAT_REUSE_MATRIX){ 4204 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4205 } else { 4206 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4207 } 4208 4209 for (i=0;i<m;i++) { 4210 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4211 Ii = i + rstart; 4212 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4213 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4214 } 4215 ierr = MatDestroy(inmat);CHKERRQ(ierr); 4216 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4217 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4218 4219 PetscFunctionReturn(0); 4220 } 4221 4222 #undef __FUNCT__ 4223 #define __FUNCT__ "MatFileSplit" 4224 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4225 { 4226 PetscErrorCode ierr; 4227 PetscMPIInt rank; 4228 PetscInt m,N,i,rstart,nnz; 4229 size_t len; 4230 const PetscInt *indx; 4231 PetscViewer out; 4232 char *name; 4233 Mat B; 4234 const PetscScalar *values; 4235 4236 PetscFunctionBegin; 4237 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4238 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4239 /* Should this be the type of the diagonal block of A? */ 4240 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4241 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4242 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4243 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4244 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4245 for (i=0;i<m;i++) { 4246 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4247 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4248 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4249 } 4250 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4251 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4252 4253 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4254 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4255 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4256 sprintf(name,"%s.%d",outfile,rank); 4257 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4258 ierr = PetscFree(name); 4259 ierr = MatView(B,out);CHKERRQ(ierr); 4260 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4261 ierr = MatDestroy(B);CHKERRQ(ierr); 4262 PetscFunctionReturn(0); 4263 } 4264 4265 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 4266 #undef __FUNCT__ 4267 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4268 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4269 { 4270 PetscErrorCode ierr; 4271 Mat_Merge_SeqsToMPI *merge; 4272 PetscContainer container; 4273 4274 PetscFunctionBegin; 4275 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4276 if (container) { 4277 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4278 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4279 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4280 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4281 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4282 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4283 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4284 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4285 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4286 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4287 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4288 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4289 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4290 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4291 4292 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4293 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4294 } 4295 ierr = PetscFree(merge);CHKERRQ(ierr); 4296 4297 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4298 PetscFunctionReturn(0); 4299 } 4300 4301 #include "../src/mat/utils/freespace.h" 4302 #include "petscbt.h" 4303 4304 #undef __FUNCT__ 4305 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4306 /*@C 4307 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4308 matrices from each processor 4309 4310 Collective on MPI_Comm 4311 4312 Input Parameters: 4313 + comm - the communicators the parallel matrix will live on 4314 . seqmat - the input sequential matrices 4315 . m - number of local rows (or PETSC_DECIDE) 4316 . n - number of local columns (or PETSC_DECIDE) 4317 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4318 4319 Output Parameter: 4320 . mpimat - the parallel matrix generated 4321 4322 Level: advanced 4323 4324 Notes: 4325 The dimensions of the sequential matrix in each processor MUST be the same. 4326 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4327 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4328 @*/ 4329 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4330 { 4331 PetscErrorCode ierr; 4332 MPI_Comm comm=((PetscObject)mpimat)->comm; 4333 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4334 PetscMPIInt size,rank,taga,*len_s; 4335 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4336 PetscInt proc,m; 4337 PetscInt **buf_ri,**buf_rj; 4338 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4339 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4340 MPI_Request *s_waits,*r_waits; 4341 MPI_Status *status; 4342 MatScalar *aa=a->a; 4343 MatScalar **abuf_r,*ba_i; 4344 Mat_Merge_SeqsToMPI *merge; 4345 PetscContainer container; 4346 4347 PetscFunctionBegin; 4348 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4349 4350 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4351 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4352 4353 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4354 if (container) { 4355 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4356 } 4357 bi = merge->bi; 4358 bj = merge->bj; 4359 buf_ri = merge->buf_ri; 4360 buf_rj = merge->buf_rj; 4361 4362 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4363 owners = merge->rowmap->range; 4364 len_s = merge->len_s; 4365 4366 /* send and recv matrix values */ 4367 /*-----------------------------*/ 4368 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4369 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4370 4371 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4372 for (proc=0,k=0; proc<size; proc++){ 4373 if (!len_s[proc]) continue; 4374 i = owners[proc]; 4375 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4376 k++; 4377 } 4378 4379 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4380 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4381 ierr = PetscFree(status);CHKERRQ(ierr); 4382 4383 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4384 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4385 4386 /* insert mat values of mpimat */ 4387 /*----------------------------*/ 4388 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4389 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4390 4391 for (k=0; k<merge->nrecv; k++){ 4392 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4393 nrows = *(buf_ri_k[k]); 4394 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4395 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4396 } 4397 4398 /* set values of ba */ 4399 m = merge->rowmap->n; 4400 for (i=0; i<m; i++) { 4401 arow = owners[rank] + i; 4402 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4403 bnzi = bi[i+1] - bi[i]; 4404 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4405 4406 /* add local non-zero vals of this proc's seqmat into ba */ 4407 anzi = ai[arow+1] - ai[arow]; 4408 aj = a->j + ai[arow]; 4409 aa = a->a + ai[arow]; 4410 nextaj = 0; 4411 for (j=0; nextaj<anzi; j++){ 4412 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4413 ba_i[j] += aa[nextaj++]; 4414 } 4415 } 4416 4417 /* add received vals into ba */ 4418 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4419 /* i-th row */ 4420 if (i == *nextrow[k]) { 4421 anzi = *(nextai[k]+1) - *nextai[k]; 4422 aj = buf_rj[k] + *(nextai[k]); 4423 aa = abuf_r[k] + *(nextai[k]); 4424 nextaj = 0; 4425 for (j=0; nextaj<anzi; j++){ 4426 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4427 ba_i[j] += aa[nextaj++]; 4428 } 4429 } 4430 nextrow[k]++; nextai[k]++; 4431 } 4432 } 4433 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4434 } 4435 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4436 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4437 4438 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4439 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4440 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4441 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4442 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4443 PetscFunctionReturn(0); 4444 } 4445 4446 #undef __FUNCT__ 4447 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4448 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4449 { 4450 PetscErrorCode ierr; 4451 Mat B_mpi; 4452 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4453 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4454 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4455 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4456 PetscInt len,proc,*dnz,*onz; 4457 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4458 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4459 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4460 MPI_Status *status; 4461 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4462 PetscBT lnkbt; 4463 Mat_Merge_SeqsToMPI *merge; 4464 PetscContainer container; 4465 4466 PetscFunctionBegin; 4467 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4468 4469 /* make sure it is a PETSc comm */ 4470 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4471 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4472 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4473 4474 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4475 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4476 4477 /* determine row ownership */ 4478 /*---------------------------------------------------------*/ 4479 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4480 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4481 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4482 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4483 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4484 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4485 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4486 4487 m = merge->rowmap->n; 4488 M = merge->rowmap->N; 4489 owners = merge->rowmap->range; 4490 4491 /* determine the number of messages to send, their lengths */ 4492 /*---------------------------------------------------------*/ 4493 len_s = merge->len_s; 4494 4495 len = 0; /* length of buf_si[] */ 4496 merge->nsend = 0; 4497 for (proc=0; proc<size; proc++){ 4498 len_si[proc] = 0; 4499 if (proc == rank){ 4500 len_s[proc] = 0; 4501 } else { 4502 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4503 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4504 } 4505 if (len_s[proc]) { 4506 merge->nsend++; 4507 nrows = 0; 4508 for (i=owners[proc]; i<owners[proc+1]; i++){ 4509 if (ai[i+1] > ai[i]) nrows++; 4510 } 4511 len_si[proc] = 2*(nrows+1); 4512 len += len_si[proc]; 4513 } 4514 } 4515 4516 /* determine the number and length of messages to receive for ij-structure */ 4517 /*-------------------------------------------------------------------------*/ 4518 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4519 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4520 4521 /* post the Irecv of j-structure */ 4522 /*-------------------------------*/ 4523 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4524 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4525 4526 /* post the Isend of j-structure */ 4527 /*--------------------------------*/ 4528 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4529 4530 for (proc=0, k=0; proc<size; proc++){ 4531 if (!len_s[proc]) continue; 4532 i = owners[proc]; 4533 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4534 k++; 4535 } 4536 4537 /* receives and sends of j-structure are complete */ 4538 /*------------------------------------------------*/ 4539 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4540 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4541 4542 /* send and recv i-structure */ 4543 /*---------------------------*/ 4544 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4545 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4546 4547 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4548 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4549 for (proc=0,k=0; proc<size; proc++){ 4550 if (!len_s[proc]) continue; 4551 /* form outgoing message for i-structure: 4552 buf_si[0]: nrows to be sent 4553 [1:nrows]: row index (global) 4554 [nrows+1:2*nrows+1]: i-structure index 4555 */ 4556 /*-------------------------------------------*/ 4557 nrows = len_si[proc]/2 - 1; 4558 buf_si_i = buf_si + nrows+1; 4559 buf_si[0] = nrows; 4560 buf_si_i[0] = 0; 4561 nrows = 0; 4562 for (i=owners[proc]; i<owners[proc+1]; i++){ 4563 anzi = ai[i+1] - ai[i]; 4564 if (anzi) { 4565 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4566 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4567 nrows++; 4568 } 4569 } 4570 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4571 k++; 4572 buf_si += len_si[proc]; 4573 } 4574 4575 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4576 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4577 4578 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4579 for (i=0; i<merge->nrecv; i++){ 4580 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4581 } 4582 4583 ierr = PetscFree(len_si);CHKERRQ(ierr); 4584 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4585 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4586 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4587 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4588 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4589 ierr = PetscFree(status);CHKERRQ(ierr); 4590 4591 /* compute a local seq matrix in each processor */ 4592 /*----------------------------------------------*/ 4593 /* allocate bi array and free space for accumulating nonzero column info */ 4594 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4595 bi[0] = 0; 4596 4597 /* create and initialize a linked list */ 4598 nlnk = N+1; 4599 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4600 4601 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4602 len = 0; 4603 len = ai[owners[rank+1]] - ai[owners[rank]]; 4604 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4605 current_space = free_space; 4606 4607 /* determine symbolic info for each local row */ 4608 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4609 4610 for (k=0; k<merge->nrecv; k++){ 4611 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4612 nrows = *buf_ri_k[k]; 4613 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4614 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4615 } 4616 4617 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4618 len = 0; 4619 for (i=0;i<m;i++) { 4620 bnzi = 0; 4621 /* add local non-zero cols of this proc's seqmat into lnk */ 4622 arow = owners[rank] + i; 4623 anzi = ai[arow+1] - ai[arow]; 4624 aj = a->j + ai[arow]; 4625 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4626 bnzi += nlnk; 4627 /* add received col data into lnk */ 4628 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4629 if (i == *nextrow[k]) { /* i-th row */ 4630 anzi = *(nextai[k]+1) - *nextai[k]; 4631 aj = buf_rj[k] + *nextai[k]; 4632 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4633 bnzi += nlnk; 4634 nextrow[k]++; nextai[k]++; 4635 } 4636 } 4637 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4638 4639 /* if free space is not available, make more free space */ 4640 if (current_space->local_remaining<bnzi) { 4641 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4642 nspacedouble++; 4643 } 4644 /* copy data into free space, then initialize lnk */ 4645 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4646 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4647 4648 current_space->array += bnzi; 4649 current_space->local_used += bnzi; 4650 current_space->local_remaining -= bnzi; 4651 4652 bi[i+1] = bi[i] + bnzi; 4653 } 4654 4655 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4656 4657 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4658 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4659 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4660 4661 /* create symbolic parallel matrix B_mpi */ 4662 /*---------------------------------------*/ 4663 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4664 if (n==PETSC_DECIDE) { 4665 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4666 } else { 4667 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4668 } 4669 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4670 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4671 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4672 4673 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4674 B_mpi->assembled = PETSC_FALSE; 4675 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4676 merge->bi = bi; 4677 merge->bj = bj; 4678 merge->buf_ri = buf_ri; 4679 merge->buf_rj = buf_rj; 4680 merge->coi = PETSC_NULL; 4681 merge->coj = PETSC_NULL; 4682 merge->owners_co = PETSC_NULL; 4683 4684 /* attach the supporting struct to B_mpi for reuse */ 4685 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4686 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4687 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4688 *mpimat = B_mpi; 4689 4690 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4691 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4692 PetscFunctionReturn(0); 4693 } 4694 4695 #undef __FUNCT__ 4696 #define __FUNCT__ "MatMerge_SeqsToMPI" 4697 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4698 { 4699 PetscErrorCode ierr; 4700 4701 PetscFunctionBegin; 4702 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4703 if (scall == MAT_INITIAL_MATRIX){ 4704 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4705 } 4706 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4707 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4708 PetscFunctionReturn(0); 4709 } 4710 4711 #undef __FUNCT__ 4712 #define __FUNCT__ "MatGetLocalMat" 4713 /*@ 4714 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4715 4716 Not Collective 4717 4718 Input Parameters: 4719 + A - the matrix 4720 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4721 4722 Output Parameter: 4723 . A_loc - the local sequential matrix generated 4724 4725 Level: developer 4726 4727 @*/ 4728 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4729 { 4730 PetscErrorCode ierr; 4731 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4732 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4733 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4734 MatScalar *aa=a->a,*ba=b->a,*cam; 4735 PetscScalar *ca; 4736 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4737 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4738 4739 PetscFunctionBegin; 4740 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4741 if (scall == MAT_INITIAL_MATRIX){ 4742 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4743 ci[0] = 0; 4744 for (i=0; i<am; i++){ 4745 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4746 } 4747 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4748 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4749 k = 0; 4750 for (i=0; i<am; i++) { 4751 ncols_o = bi[i+1] - bi[i]; 4752 ncols_d = ai[i+1] - ai[i]; 4753 /* off-diagonal portion of A */ 4754 for (jo=0; jo<ncols_o; jo++) { 4755 col = cmap[*bj]; 4756 if (col >= cstart) break; 4757 cj[k] = col; bj++; 4758 ca[k++] = *ba++; 4759 } 4760 /* diagonal portion of A */ 4761 for (j=0; j<ncols_d; j++) { 4762 cj[k] = cstart + *aj++; 4763 ca[k++] = *aa++; 4764 } 4765 /* off-diagonal portion of A */ 4766 for (j=jo; j<ncols_o; j++) { 4767 cj[k] = cmap[*bj++]; 4768 ca[k++] = *ba++; 4769 } 4770 } 4771 /* put together the new matrix */ 4772 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4773 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4774 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4775 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4776 mat->free_a = PETSC_TRUE; 4777 mat->free_ij = PETSC_TRUE; 4778 mat->nonew = 0; 4779 } else if (scall == MAT_REUSE_MATRIX){ 4780 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4781 ci = mat->i; cj = mat->j; cam = mat->a; 4782 for (i=0; i<am; i++) { 4783 /* off-diagonal portion of A */ 4784 ncols_o = bi[i+1] - bi[i]; 4785 for (jo=0; jo<ncols_o; jo++) { 4786 col = cmap[*bj]; 4787 if (col >= cstart) break; 4788 *cam++ = *ba++; bj++; 4789 } 4790 /* diagonal portion of A */ 4791 ncols_d = ai[i+1] - ai[i]; 4792 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4793 /* off-diagonal portion of A */ 4794 for (j=jo; j<ncols_o; j++) { 4795 *cam++ = *ba++; bj++; 4796 } 4797 } 4798 } else { 4799 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4800 } 4801 4802 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4803 PetscFunctionReturn(0); 4804 } 4805 4806 #undef __FUNCT__ 4807 #define __FUNCT__ "MatGetLocalMatCondensed" 4808 /*@C 4809 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4810 4811 Not Collective 4812 4813 Input Parameters: 4814 + A - the matrix 4815 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4816 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4817 4818 Output Parameter: 4819 . A_loc - the local sequential matrix generated 4820 4821 Level: developer 4822 4823 @*/ 4824 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4825 { 4826 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4827 PetscErrorCode ierr; 4828 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4829 IS isrowa,iscola; 4830 Mat *aloc; 4831 4832 PetscFunctionBegin; 4833 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4834 if (!row){ 4835 start = A->rmap->rstart; end = A->rmap->rend; 4836 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4837 } else { 4838 isrowa = *row; 4839 } 4840 if (!col){ 4841 start = A->cmap->rstart; 4842 cmap = a->garray; 4843 nzA = a->A->cmap->n; 4844 nzB = a->B->cmap->n; 4845 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4846 ncols = 0; 4847 for (i=0; i<nzB; i++) { 4848 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4849 else break; 4850 } 4851 imark = i; 4852 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4853 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4854 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4855 } else { 4856 iscola = *col; 4857 } 4858 if (scall != MAT_INITIAL_MATRIX){ 4859 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4860 aloc[0] = *A_loc; 4861 } 4862 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4863 *A_loc = aloc[0]; 4864 ierr = PetscFree(aloc);CHKERRQ(ierr); 4865 if (!row){ 4866 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4867 } 4868 if (!col){ 4869 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4870 } 4871 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4872 PetscFunctionReturn(0); 4873 } 4874 4875 #undef __FUNCT__ 4876 #define __FUNCT__ "MatGetBrowsOfAcols" 4877 /*@C 4878 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4879 4880 Collective on Mat 4881 4882 Input Parameters: 4883 + A,B - the matrices in mpiaij format 4884 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4885 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4886 4887 Output Parameter: 4888 + rowb, colb - index sets of rows and columns of B to extract 4889 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4890 - B_seq - the sequential matrix generated 4891 4892 Level: developer 4893 4894 @*/ 4895 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4896 { 4897 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4898 PetscErrorCode ierr; 4899 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4900 IS isrowb,iscolb; 4901 Mat *bseq; 4902 4903 PetscFunctionBegin; 4904 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4905 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4906 } 4907 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4908 4909 if (scall == MAT_INITIAL_MATRIX){ 4910 start = A->cmap->rstart; 4911 cmap = a->garray; 4912 nzA = a->A->cmap->n; 4913 nzB = a->B->cmap->n; 4914 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4915 ncols = 0; 4916 for (i=0; i<nzB; i++) { /* row < local row index */ 4917 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4918 else break; 4919 } 4920 imark = i; 4921 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4922 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4923 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4924 *brstart = imark; 4925 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4926 } else { 4927 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4928 isrowb = *rowb; iscolb = *colb; 4929 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4930 bseq[0] = *B_seq; 4931 } 4932 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4933 *B_seq = bseq[0]; 4934 ierr = PetscFree(bseq);CHKERRQ(ierr); 4935 if (!rowb){ 4936 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4937 } else { 4938 *rowb = isrowb; 4939 } 4940 if (!colb){ 4941 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4942 } else { 4943 *colb = iscolb; 4944 } 4945 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4946 PetscFunctionReturn(0); 4947 } 4948 4949 #undef __FUNCT__ 4950 #define __FUNCT__ "MatGetBrowsOfAoCols" 4951 /*@C 4952 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4953 of the OFF-DIAGONAL portion of local A 4954 4955 Collective on Mat 4956 4957 Input Parameters: 4958 + A,B - the matrices in mpiaij format 4959 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4960 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4961 . startsj_r - similar to startsj for receives 4962 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4963 4964 Output Parameter: 4965 + B_oth - the sequential matrix generated 4966 4967 Level: developer 4968 4969 @*/ 4970 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4971 { 4972 VecScatter_MPI_General *gen_to,*gen_from; 4973 PetscErrorCode ierr; 4974 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4975 Mat_SeqAIJ *b_oth; 4976 VecScatter ctx=a->Mvctx; 4977 MPI_Comm comm=((PetscObject)ctx)->comm; 4978 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4979 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4980 PetscScalar *rvalues,*svalues; 4981 MatScalar *b_otha,*bufa,*bufA; 4982 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4983 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4984 MPI_Status *sstatus,rstatus; 4985 PetscMPIInt jj; 4986 PetscInt *cols,sbs,rbs; 4987 PetscScalar *vals; 4988 4989 PetscFunctionBegin; 4990 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4991 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4992 } 4993 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4994 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4995 4996 gen_to = (VecScatter_MPI_General*)ctx->todata; 4997 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4998 rvalues = gen_from->values; /* holds the length of receiving row */ 4999 svalues = gen_to->values; /* holds the length of sending row */ 5000 nrecvs = gen_from->n; 5001 nsends = gen_to->n; 5002 5003 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5004 srow = gen_to->indices; /* local row index to be sent */ 5005 sstarts = gen_to->starts; 5006 sprocs = gen_to->procs; 5007 sstatus = gen_to->sstatus; 5008 sbs = gen_to->bs; 5009 rstarts = gen_from->starts; 5010 rprocs = gen_from->procs; 5011 rbs = gen_from->bs; 5012 5013 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5014 if (scall == MAT_INITIAL_MATRIX){ 5015 /* i-array */ 5016 /*---------*/ 5017 /* post receives */ 5018 for (i=0; i<nrecvs; i++){ 5019 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5020 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5021 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5022 } 5023 5024 /* pack the outgoing message */ 5025 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5026 sstartsj[0] = 0; rstartsj[0] = 0; 5027 len = 0; /* total length of j or a array to be sent */ 5028 k = 0; 5029 for (i=0; i<nsends; i++){ 5030 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5031 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5032 for (j=0; j<nrows; j++) { 5033 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5034 for (l=0; l<sbs; l++){ 5035 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5036 rowlen[j*sbs+l] = ncols; 5037 len += ncols; 5038 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5039 } 5040 k++; 5041 } 5042 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5043 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5044 } 5045 /* recvs and sends of i-array are completed */ 5046 i = nrecvs; 5047 while (i--) { 5048 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5049 } 5050 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5051 5052 /* allocate buffers for sending j and a arrays */ 5053 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5054 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5055 5056 /* create i-array of B_oth */ 5057 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5058 b_othi[0] = 0; 5059 len = 0; /* total length of j or a array to be received */ 5060 k = 0; 5061 for (i=0; i<nrecvs; i++){ 5062 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5063 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5064 for (j=0; j<nrows; j++) { 5065 b_othi[k+1] = b_othi[k] + rowlen[j]; 5066 len += rowlen[j]; k++; 5067 } 5068 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5069 } 5070 5071 /* allocate space for j and a arrrays of B_oth */ 5072 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5073 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5074 5075 /* j-array */ 5076 /*---------*/ 5077 /* post receives of j-array */ 5078 for (i=0; i<nrecvs; i++){ 5079 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5080 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5081 } 5082 5083 /* pack the outgoing message j-array */ 5084 k = 0; 5085 for (i=0; i<nsends; i++){ 5086 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5087 bufJ = bufj+sstartsj[i]; 5088 for (j=0; j<nrows; j++) { 5089 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5090 for (ll=0; ll<sbs; ll++){ 5091 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5092 for (l=0; l<ncols; l++){ 5093 *bufJ++ = cols[l]; 5094 } 5095 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5096 } 5097 } 5098 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5099 } 5100 5101 /* recvs and sends of j-array are completed */ 5102 i = nrecvs; 5103 while (i--) { 5104 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5105 } 5106 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5107 } else if (scall == MAT_REUSE_MATRIX){ 5108 sstartsj = *startsj; 5109 rstartsj = *startsj_r; 5110 bufa = *bufa_ptr; 5111 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5112 b_otha = b_oth->a; 5113 } else { 5114 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5115 } 5116 5117 /* a-array */ 5118 /*---------*/ 5119 /* post receives of a-array */ 5120 for (i=0; i<nrecvs; i++){ 5121 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5122 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5123 } 5124 5125 /* pack the outgoing message a-array */ 5126 k = 0; 5127 for (i=0; i<nsends; i++){ 5128 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5129 bufA = bufa+sstartsj[i]; 5130 for (j=0; j<nrows; j++) { 5131 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5132 for (ll=0; ll<sbs; ll++){ 5133 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5134 for (l=0; l<ncols; l++){ 5135 *bufA++ = vals[l]; 5136 } 5137 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5138 } 5139 } 5140 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5141 } 5142 /* recvs and sends of a-array are completed */ 5143 i = nrecvs; 5144 while (i--) { 5145 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5146 } 5147 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5148 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5149 5150 if (scall == MAT_INITIAL_MATRIX){ 5151 /* put together the new matrix */ 5152 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5153 5154 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5155 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5156 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5157 b_oth->free_a = PETSC_TRUE; 5158 b_oth->free_ij = PETSC_TRUE; 5159 b_oth->nonew = 0; 5160 5161 ierr = PetscFree(bufj);CHKERRQ(ierr); 5162 if (!startsj || !bufa_ptr){ 5163 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5164 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5165 } else { 5166 *startsj = sstartsj; 5167 *startsj_r = rstartsj; 5168 *bufa_ptr = bufa; 5169 } 5170 } 5171 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5172 PetscFunctionReturn(0); 5173 } 5174 5175 #undef __FUNCT__ 5176 #define __FUNCT__ "MatGetCommunicationStructs" 5177 /*@C 5178 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5179 5180 Not Collective 5181 5182 Input Parameters: 5183 . A - The matrix in mpiaij format 5184 5185 Output Parameter: 5186 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5187 . colmap - A map from global column index to local index into lvec 5188 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5189 5190 Level: developer 5191 5192 @*/ 5193 #if defined (PETSC_USE_CTABLE) 5194 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5195 #else 5196 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5197 #endif 5198 { 5199 Mat_MPIAIJ *a; 5200 5201 PetscFunctionBegin; 5202 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5203 PetscValidPointer(lvec, 2); 5204 PetscValidPointer(colmap, 3); 5205 PetscValidPointer(multScatter, 4); 5206 a = (Mat_MPIAIJ *) A->data; 5207 if (lvec) *lvec = a->lvec; 5208 if (colmap) *colmap = a->colmap; 5209 if (multScatter) *multScatter = a->Mvctx; 5210 PetscFunctionReturn(0); 5211 } 5212 5213 EXTERN_C_BEGIN 5214 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5215 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5216 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5217 EXTERN_C_END 5218 5219 #undef __FUNCT__ 5220 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5221 /* 5222 Computes (B'*A')' since computing B*A directly is untenable 5223 5224 n p p 5225 ( ) ( ) ( ) 5226 m ( A ) * n ( B ) = m ( C ) 5227 ( ) ( ) ( ) 5228 5229 */ 5230 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5231 { 5232 PetscErrorCode ierr; 5233 Mat At,Bt,Ct; 5234 5235 PetscFunctionBegin; 5236 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5237 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5238 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5239 ierr = MatDestroy(At);CHKERRQ(ierr); 5240 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5241 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5242 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5243 PetscFunctionReturn(0); 5244 } 5245 5246 #undef __FUNCT__ 5247 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5248 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5249 { 5250 PetscErrorCode ierr; 5251 PetscInt m=A->rmap->n,n=B->cmap->n; 5252 Mat Cmat; 5253 5254 PetscFunctionBegin; 5255 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5256 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5257 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5258 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5259 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5260 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5261 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5262 *C = Cmat; 5263 PetscFunctionReturn(0); 5264 } 5265 5266 /* ----------------------------------------------------------------*/ 5267 #undef __FUNCT__ 5268 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5269 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5270 { 5271 PetscErrorCode ierr; 5272 5273 PetscFunctionBegin; 5274 if (scall == MAT_INITIAL_MATRIX){ 5275 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5276 } 5277 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5278 PetscFunctionReturn(0); 5279 } 5280 5281 EXTERN_C_BEGIN 5282 #if defined(PETSC_HAVE_MUMPS) 5283 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5284 #endif 5285 #if defined(PETSC_HAVE_PASTIX) 5286 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5287 #endif 5288 #if defined(PETSC_HAVE_SUPERLU_DIST) 5289 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5290 #endif 5291 #if defined(PETSC_HAVE_SPOOLES) 5292 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5293 #endif 5294 EXTERN_C_END 5295 5296 /*MC 5297 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5298 5299 Options Database Keys: 5300 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5301 5302 Level: beginner 5303 5304 .seealso: MatCreateMPIAIJ() 5305 M*/ 5306 5307 EXTERN_C_BEGIN 5308 #undef __FUNCT__ 5309 #define __FUNCT__ "MatCreate_MPIAIJ" 5310 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5311 { 5312 Mat_MPIAIJ *b; 5313 PetscErrorCode ierr; 5314 PetscMPIInt size; 5315 5316 PetscFunctionBegin; 5317 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5318 5319 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5320 B->data = (void*)b; 5321 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5322 B->rmap->bs = 1; 5323 B->assembled = PETSC_FALSE; 5324 B->mapping = 0; 5325 5326 B->insertmode = NOT_SET_VALUES; 5327 b->size = size; 5328 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5329 5330 /* build cache for off array entries formed */ 5331 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5332 b->donotstash = PETSC_FALSE; 5333 b->colmap = 0; 5334 b->garray = 0; 5335 b->roworiented = PETSC_TRUE; 5336 5337 /* stuff used for matrix vector multiply */ 5338 b->lvec = PETSC_NULL; 5339 b->Mvctx = PETSC_NULL; 5340 5341 /* stuff for MatGetRow() */ 5342 b->rowindices = 0; 5343 b->rowvalues = 0; 5344 b->getrowactive = PETSC_FALSE; 5345 5346 #if defined(PETSC_HAVE_SPOOLES) 5347 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5348 "MatGetFactor_mpiaij_spooles", 5349 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5350 #endif 5351 #if defined(PETSC_HAVE_MUMPS) 5352 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5353 "MatGetFactor_aij_mumps", 5354 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5355 #endif 5356 #if defined(PETSC_HAVE_PASTIX) 5357 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5358 "MatGetFactor_mpiaij_pastix", 5359 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5360 #endif 5361 #if defined(PETSC_HAVE_SUPERLU_DIST) 5362 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5363 "MatGetFactor_mpiaij_superlu_dist", 5364 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5365 #endif 5366 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5367 "MatStoreValues_MPIAIJ", 5368 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5369 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5370 "MatRetrieveValues_MPIAIJ", 5371 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5372 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5373 "MatGetDiagonalBlock_MPIAIJ", 5374 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5375 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5376 "MatIsTranspose_MPIAIJ", 5377 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5378 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5379 "MatMPIAIJSetPreallocation_MPIAIJ", 5380 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5381 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5382 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5383 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5384 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5385 "MatDiagonalScaleLocal_MPIAIJ", 5386 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5387 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5388 "MatConvert_MPIAIJ_MPIAIJPERM", 5389 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5390 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5391 "MatConvert_MPIAIJ_MPIAIJCRL", 5392 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5393 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5394 "MatConvert_MPIAIJ_MPISBAIJ", 5395 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5396 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5397 "MatMatMult_MPIDense_MPIAIJ", 5398 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5399 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5400 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5401 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5402 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5403 "MatMatMultNumeric_MPIDense_MPIAIJ", 5404 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5405 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5406 PetscFunctionReturn(0); 5407 } 5408 EXTERN_C_END 5409 5410 #undef __FUNCT__ 5411 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5412 /*@ 5413 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5414 and "off-diagonal" part of the matrix in CSR format. 5415 5416 Collective on MPI_Comm 5417 5418 Input Parameters: 5419 + comm - MPI communicator 5420 . m - number of local rows (Cannot be PETSC_DECIDE) 5421 . n - This value should be the same as the local size used in creating the 5422 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5423 calculated if N is given) For square matrices n is almost always m. 5424 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5425 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5426 . i - row indices for "diagonal" portion of matrix 5427 . j - column indices 5428 . a - matrix values 5429 . oi - row indices for "off-diagonal" portion of matrix 5430 . oj - column indices 5431 - oa - matrix values 5432 5433 Output Parameter: 5434 . mat - the matrix 5435 5436 Level: advanced 5437 5438 Notes: 5439 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5440 5441 The i and j indices are 0 based 5442 5443 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5444 5445 This sets local rows and cannot be used to set off-processor values. 5446 5447 You cannot later use MatSetValues() to change values in this matrix. 5448 5449 .keywords: matrix, aij, compressed row, sparse, parallel 5450 5451 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5452 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5453 @*/ 5454 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5455 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5456 { 5457 PetscErrorCode ierr; 5458 Mat_MPIAIJ *maij; 5459 5460 PetscFunctionBegin; 5461 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5462 if (i[0]) { 5463 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5464 } 5465 if (oi[0]) { 5466 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5467 } 5468 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5469 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5470 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5471 maij = (Mat_MPIAIJ*) (*mat)->data; 5472 maij->donotstash = PETSC_TRUE; 5473 (*mat)->preallocated = PETSC_TRUE; 5474 5475 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5476 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5477 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5478 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5479 5480 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5481 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5482 5483 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5484 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5485 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5486 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5487 5488 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5489 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5490 PetscFunctionReturn(0); 5491 } 5492 5493 /* 5494 Special version for direct calls from Fortran 5495 */ 5496 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5497 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5498 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5499 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5500 #endif 5501 5502 /* Change these macros so can be used in void function */ 5503 #undef CHKERRQ 5504 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5505 #undef SETERRQ2 5506 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5507 #undef SETERRQ 5508 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5509 5510 EXTERN_C_BEGIN 5511 #undef __FUNCT__ 5512 #define __FUNCT__ "matsetvaluesmpiaij_" 5513 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5514 { 5515 Mat mat = *mmat; 5516 PetscInt m = *mm, n = *mn; 5517 InsertMode addv = *maddv; 5518 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5519 PetscScalar value; 5520 PetscErrorCode ierr; 5521 5522 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5523 if (mat->insertmode == NOT_SET_VALUES) { 5524 mat->insertmode = addv; 5525 } 5526 #if defined(PETSC_USE_DEBUG) 5527 else if (mat->insertmode != addv) { 5528 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5529 } 5530 #endif 5531 { 5532 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5533 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5534 PetscBool roworiented = aij->roworiented; 5535 5536 /* Some Variables required in the macro */ 5537 Mat A = aij->A; 5538 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5539 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5540 MatScalar *aa = a->a; 5541 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5542 Mat B = aij->B; 5543 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5544 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5545 MatScalar *ba = b->a; 5546 5547 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5548 PetscInt nonew = a->nonew; 5549 MatScalar *ap1,*ap2; 5550 5551 PetscFunctionBegin; 5552 for (i=0; i<m; i++) { 5553 if (im[i] < 0) continue; 5554 #if defined(PETSC_USE_DEBUG) 5555 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5556 #endif 5557 if (im[i] >= rstart && im[i] < rend) { 5558 row = im[i] - rstart; 5559 lastcol1 = -1; 5560 rp1 = aj + ai[row]; 5561 ap1 = aa + ai[row]; 5562 rmax1 = aimax[row]; 5563 nrow1 = ailen[row]; 5564 low1 = 0; 5565 high1 = nrow1; 5566 lastcol2 = -1; 5567 rp2 = bj + bi[row]; 5568 ap2 = ba + bi[row]; 5569 rmax2 = bimax[row]; 5570 nrow2 = bilen[row]; 5571 low2 = 0; 5572 high2 = nrow2; 5573 5574 for (j=0; j<n; j++) { 5575 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5576 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5577 if (in[j] >= cstart && in[j] < cend){ 5578 col = in[j] - cstart; 5579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5580 } else if (in[j] < 0) continue; 5581 #if defined(PETSC_USE_DEBUG) 5582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5583 #endif 5584 else { 5585 if (mat->was_assembled) { 5586 if (!aij->colmap) { 5587 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5588 } 5589 #if defined (PETSC_USE_CTABLE) 5590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5591 col--; 5592 #else 5593 col = aij->colmap[in[j]] - 1; 5594 #endif 5595 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5596 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5597 col = in[j]; 5598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5599 B = aij->B; 5600 b = (Mat_SeqAIJ*)B->data; 5601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5602 rp2 = bj + bi[row]; 5603 ap2 = ba + bi[row]; 5604 rmax2 = bimax[row]; 5605 nrow2 = bilen[row]; 5606 low2 = 0; 5607 high2 = nrow2; 5608 bm = aij->B->rmap->n; 5609 ba = b->a; 5610 } 5611 } else col = in[j]; 5612 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5613 } 5614 } 5615 } else { 5616 if (!aij->donotstash) { 5617 if (roworiented) { 5618 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5619 } else { 5620 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5621 } 5622 } 5623 } 5624 }} 5625 PetscFunctionReturnVoid(); 5626 } 5627 EXTERN_C_END 5628 5629