1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscTruth roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscTruth ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash || mat->nooffprocentries) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 mat->insertmode = addv; /* in case this processor had no cache */ 470 471 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 472 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 473 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 480 { 481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 482 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 483 PetscErrorCode ierr; 484 PetscMPIInt n; 485 PetscInt i,j,rstart,ncols,flg; 486 PetscInt *row,*col; 487 PetscTruth other_disassembled; 488 PetscScalar *val; 489 InsertMode addv = mat->insertmode; 490 491 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 492 PetscFunctionBegin; 493 if (!aij->donotstash && !mat->nooffprocentries) { 494 while (1) { 495 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 496 if (!flg) break; 497 498 for (i=0; i<n;) { 499 /* Now identify the consecutive vals belonging to the same row */ 500 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 501 if (j < n) ncols = j-i; 502 else ncols = n-i; 503 /* Now assemble all these values with a single function call */ 504 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 505 i = j; 506 } 507 } 508 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 509 } 510 a->compressedrow.use = PETSC_FALSE; 511 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 512 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 513 514 /* determine if any processor has disassembled, if so we must 515 also disassemble ourselfs, in order that we may reassemble. */ 516 /* 517 if nonzero structure of submatrix B cannot change then we know that 518 no processor disassembled thus we can skip this stuff 519 */ 520 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 521 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 522 if (mat->was_assembled && !other_disassembled) { 523 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 524 } 525 } 526 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 527 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 528 } 529 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 530 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 531 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 532 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 533 534 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 535 aij->rowvalues = 0; 536 537 /* used by MatAXPY() */ 538 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 539 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 540 541 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 542 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 if (A->nooffproczerorows) { 606 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 607 nrecvs = nsends; 608 nmax = N; 609 } else { 610 /* inform other processors of number of messages and max length*/ 611 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 612 } 613 614 /* post receives: */ 615 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 616 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 617 for (i=0; i<nrecvs; i++) { 618 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 619 } 620 621 /* do sends: 622 1) starts[i] gives the starting index in svalues for stuff going to 623 the ith processor 624 */ 625 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 626 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 627 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 628 starts[0] = 0; 629 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 for (i=0; i<N; i++) { 631 svalues[starts[owner[i]]++] = rows[i]; 632 } 633 634 starts[0] = 0; 635 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 636 count = 0; 637 for (i=0; i<size; i++) { 638 if (nprocs[2*i+1]) { 639 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 640 } 641 } 642 ierr = PetscFree(starts);CHKERRQ(ierr); 643 644 base = owners[rank]; 645 646 /* wait on receives */ 647 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 648 count = nrecvs; slen = 0; 649 while (count) { 650 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 651 /* unpack receives into our local space */ 652 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 653 source[imdex] = recv_status.MPI_SOURCE; 654 lens[imdex] = n; 655 slen += n; 656 count--; 657 } 658 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 659 660 /* move the data into the send scatter */ 661 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 662 count = 0; 663 for (i=0; i<nrecvs; i++) { 664 values = rvalues + i*nmax; 665 for (j=0; j<lens[i]; j++) { 666 lrows[count++] = values[j] - base; 667 } 668 } 669 ierr = PetscFree(rvalues);CHKERRQ(ierr); 670 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 671 ierr = PetscFree(owner);CHKERRQ(ierr); 672 ierr = PetscFree(nprocs);CHKERRQ(ierr); 673 674 /* actually zap the local rows */ 675 /* 676 Zero the required rows. If the "diagonal block" of the matrix 677 is square and the user wishes to set the diagonal we use separate 678 code so that MatSetValues() is not called for each diagonal allocating 679 new memory, thus calling lots of mallocs and slowing things down. 680 681 */ 682 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 683 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 684 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 685 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 686 } else if (diag != 0.0) { 687 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 688 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 689 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 690 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 691 } 692 for (i = 0; i < slen; i++) { 693 row = lrows[i] + rstart; 694 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 695 } 696 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 697 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 698 } else { 699 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 700 } 701 ierr = PetscFree(lrows);CHKERRQ(ierr); 702 703 /* wait on sends */ 704 if (nsends) { 705 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 706 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 707 ierr = PetscFree(send_status);CHKERRQ(ierr); 708 } 709 ierr = PetscFree(send_waits);CHKERRQ(ierr); 710 ierr = PetscFree(svalues);CHKERRQ(ierr); 711 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatMult_MPIAIJ" 717 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 720 PetscErrorCode ierr; 721 PetscInt nt; 722 723 PetscFunctionBegin; 724 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 725 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 726 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 728 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 729 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 730 PetscFunctionReturn(0); 731 } 732 733 #undef __FUNCT__ 734 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 735 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 736 { 737 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 738 PetscErrorCode ierr; 739 740 PetscFunctionBegin; 741 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatMultAdd_MPIAIJ" 747 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 748 { 749 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 750 PetscErrorCode ierr; 751 752 PetscFunctionBegin; 753 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 755 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 756 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } 759 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 762 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 763 { 764 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 765 PetscErrorCode ierr; 766 PetscTruth merged; 767 768 PetscFunctionBegin; 769 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 770 /* do nondiagonal part */ 771 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 772 if (!merged) { 773 /* send it on its way */ 774 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 775 /* do local part */ 776 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 777 /* receive remote parts: note this assumes the values are not actually */ 778 /* added in yy until the next line, */ 779 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 } else { 781 /* do local part */ 782 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 783 /* send it on its way */ 784 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 /* values actually were received in the Begin() but we need to call this nop */ 786 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 787 } 788 PetscFunctionReturn(0); 789 } 790 791 EXTERN_C_BEGIN 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 794 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 795 { 796 MPI_Comm comm; 797 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 798 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 799 IS Me,Notme; 800 PetscErrorCode ierr; 801 PetscInt M,N,first,last,*notme,i; 802 PetscMPIInt size; 803 804 PetscFunctionBegin; 805 806 /* Easy test: symmetric diagonal block */ 807 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 808 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 809 if (!*f) PetscFunctionReturn(0); 810 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 811 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 812 if (size == 1) PetscFunctionReturn(0); 813 814 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 815 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 816 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 817 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 818 for (i=0; i<first; i++) notme[i] = i; 819 for (i=last; i<M; i++) notme[i-last+first] = i; 820 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 821 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 822 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 823 Aoff = Aoffs[0]; 824 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 825 Boff = Boffs[0]; 826 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 827 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 828 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 829 ierr = ISDestroy(Me);CHKERRQ(ierr); 830 ierr = ISDestroy(Notme);CHKERRQ(ierr); 831 832 PetscFunctionReturn(0); 833 } 834 EXTERN_C_END 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 838 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 839 { 840 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 843 PetscFunctionBegin; 844 /* do nondiagonal part */ 845 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 846 /* send it on its way */ 847 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 848 /* do local part */ 849 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 850 /* receive remote parts */ 851 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 852 PetscFunctionReturn(0); 853 } 854 855 /* 856 This only works correctly for square matrices where the subblock A->A is the 857 diagonal block 858 */ 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 861 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 862 { 863 PetscErrorCode ierr; 864 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 865 866 PetscFunctionBegin; 867 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 868 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 869 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatScale_MPIAIJ" 875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876 { 877 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_MPIAIJ" 888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_LOG) 895 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896 #endif 897 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 899 if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);} 900 if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);} 901 #if defined (PETSC_USE_CTABLE) 902 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903 #else 904 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905 #endif 906 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 907 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 908 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 909 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 910 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911 ierr = PetscFree(aij);CHKERRQ(ierr); 912 913 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatView_MPIAIJ_Binary" 927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 928 { 929 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 930 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 931 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 932 PetscErrorCode ierr; 933 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 934 int fd; 935 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 936 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 937 PetscScalar *column_values; 938 939 PetscFunctionBegin; 940 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 941 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 942 nz = A->nz + B->nz; 943 if (!rank) { 944 header[0] = MAT_FILE_CLASSID; 945 header[1] = mat->rmap->N; 946 header[2] = mat->cmap->N; 947 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 948 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 949 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 950 /* get largest number of rows any processor has */ 951 rlen = mat->rmap->n; 952 range = mat->rmap->range; 953 for (i=1; i<size; i++) { 954 rlen = PetscMax(rlen,range[i+1] - range[i]); 955 } 956 } else { 957 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 958 rlen = mat->rmap->n; 959 } 960 961 /* load up the local row counts */ 962 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 963 for (i=0; i<mat->rmap->n; i++) { 964 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 965 } 966 967 /* store the row lengths to the file */ 968 if (!rank) { 969 MPI_Status status; 970 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 971 for (i=1; i<size; i++) { 972 rlen = range[i+1] - range[i]; 973 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 974 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 975 } 976 } else { 977 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 978 } 979 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 980 981 /* load up the local column indices */ 982 nzmax = nz; /* )th processor needs space a largest processor needs */ 983 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 984 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 985 cnt = 0; 986 for (i=0; i<mat->rmap->n; i++) { 987 for (j=B->i[i]; j<B->i[i+1]; j++) { 988 if ( (col = garray[B->j[j]]) > cstart) break; 989 column_indices[cnt++] = col; 990 } 991 for (k=A->i[i]; k<A->i[i+1]; k++) { 992 column_indices[cnt++] = A->j[k] + cstart; 993 } 994 for (; j<B->i[i+1]; j++) { 995 column_indices[cnt++] = garray[B->j[j]]; 996 } 997 } 998 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 999 1000 /* store the column indices to the file */ 1001 if (!rank) { 1002 MPI_Status status; 1003 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1004 for (i=1; i<size; i++) { 1005 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1006 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1007 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1008 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1009 } 1010 } else { 1011 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1012 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1013 } 1014 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1015 1016 /* load up the local column values */ 1017 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1018 cnt = 0; 1019 for (i=0; i<mat->rmap->n; i++) { 1020 for (j=B->i[i]; j<B->i[i+1]; j++) { 1021 if ( garray[B->j[j]] > cstart) break; 1022 column_values[cnt++] = B->a[j]; 1023 } 1024 for (k=A->i[i]; k<A->i[i+1]; k++) { 1025 column_values[cnt++] = A->a[k]; 1026 } 1027 for (; j<B->i[i+1]; j++) { 1028 column_values[cnt++] = B->a[j]; 1029 } 1030 } 1031 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1032 1033 /* store the column values to the file */ 1034 if (!rank) { 1035 MPI_Status status; 1036 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1037 for (i=1; i<size; i++) { 1038 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1039 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1040 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1041 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1042 } 1043 } else { 1044 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1045 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1046 } 1047 ierr = PetscFree(column_values);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1054 { 1055 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1056 PetscErrorCode ierr; 1057 PetscMPIInt rank = aij->rank,size = aij->size; 1058 PetscTruth isdraw,iascii,isbinary; 1059 PetscViewer sviewer; 1060 PetscViewerFormat format; 1061 1062 PetscFunctionBegin; 1063 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1065 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1066 if (iascii) { 1067 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1068 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1069 MatInfo info; 1070 PetscTruth inodes; 1071 1072 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1073 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1074 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1075 if (!inodes) { 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1077 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1078 } else { 1079 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1080 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1081 } 1082 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1083 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1084 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1086 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1088 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1091 PetscInt inodecount,inodelimit,*inodes; 1092 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1093 if (inodes) { 1094 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1095 } else { 1096 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1097 } 1098 PetscFunctionReturn(0); 1099 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1100 PetscFunctionReturn(0); 1101 } 1102 } else if (isbinary) { 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1108 } 1109 PetscFunctionReturn(0); 1110 } else if (isdraw) { 1111 PetscDraw draw; 1112 PetscTruth isnull; 1113 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1114 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1115 } 1116 1117 if (size == 1) { 1118 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1119 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1120 } else { 1121 /* assemble the entire matrix onto first processor. */ 1122 Mat A; 1123 Mat_SeqAIJ *Aloc; 1124 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1125 MatScalar *a; 1126 1127 if (mat->rmap->N > 1024) { 1128 PetscTruth flg = PETSC_FALSE; 1129 1130 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1131 if (!flg) { 1132 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1133 } 1134 } 1135 1136 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1137 if (!rank) { 1138 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1139 } else { 1140 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1141 } 1142 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1143 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1144 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1145 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1146 1147 /* copy over the A part */ 1148 Aloc = (Mat_SeqAIJ*)aij->A->data; 1149 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1150 row = mat->rmap->rstart; 1151 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1155 } 1156 aj = Aloc->j; 1157 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1158 1159 /* copy over the B part */ 1160 Aloc = (Mat_SeqAIJ*)aij->B->data; 1161 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1162 row = mat->rmap->rstart; 1163 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1164 ct = cols; 1165 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1166 for (i=0; i<m; i++) { 1167 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1168 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1169 } 1170 ierr = PetscFree(ct);CHKERRQ(ierr); 1171 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 /* 1174 Everyone has to call to draw the matrix since the graphics waits are 1175 synchronized across all processors that share the PetscDraw object 1176 */ 1177 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1180 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1181 } 1182 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 ierr = MatDestroy(A);CHKERRQ(ierr); 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_MPIAIJ" 1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1191 { 1192 PetscErrorCode ierr; 1193 PetscTruth iascii,isdraw,issocket,isbinary; 1194 1195 PetscFunctionBegin; 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1200 if (iascii || isdraw || isbinary || issocket) { 1201 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1202 } else { 1203 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1204 } 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatSOR_MPIAIJ" 1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1211 { 1212 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1213 PetscErrorCode ierr; 1214 Vec bb1 = 0; 1215 PetscTruth hasop; 1216 1217 PetscFunctionBegin; 1218 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1219 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1220 } 1221 1222 if (flag == SOR_APPLY_UPPER) { 1223 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1228 if (flag & SOR_ZERO_INITIAL_GUESS) { 1229 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1230 its--; 1231 } 1232 1233 while (its--) { 1234 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1236 1237 /* update rhs: bb1 = bb - B*x */ 1238 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1239 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1240 1241 /* local sweep */ 1242 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1243 } 1244 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1245 if (flag & SOR_ZERO_INITIAL_GUESS) { 1246 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1247 its--; 1248 } 1249 while (its--) { 1250 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1251 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 1253 /* update rhs: bb1 = bb - B*x */ 1254 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1255 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1256 1257 /* local sweep */ 1258 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1259 } 1260 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1261 if (flag & SOR_ZERO_INITIAL_GUESS) { 1262 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1263 its--; 1264 } 1265 while (its--) { 1266 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 1269 /* update rhs: bb1 = bb - B*x */ 1270 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1271 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1272 1273 /* local sweep */ 1274 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1275 } 1276 } else if (flag & SOR_EISENSTAT) { 1277 Vec xx1; 1278 1279 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1280 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1281 1282 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 if (!mat->diag) { 1285 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1286 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1287 } 1288 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1289 if (hasop) { 1290 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1291 } else { 1292 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1293 } 1294 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1295 1296 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1297 1298 /* local sweep */ 1299 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1300 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1301 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1302 } else { 1303 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1304 } 1305 1306 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatPermute_MPIAIJ" 1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1313 { 1314 MPI_Comm comm,pcomm; 1315 PetscInt first,local_size,nrows; 1316 const PetscInt *rows; 1317 PetscMPIInt size; 1318 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1319 PetscErrorCode ierr; 1320 1321 PetscFunctionBegin; 1322 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1323 /* make a collective version of 'rowp' */ 1324 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1325 if (pcomm==comm) { 1326 crowp = rowp; 1327 } else { 1328 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1329 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1330 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1331 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1332 } 1333 /* collect the global row permutation and invert it */ 1334 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1335 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1336 if (pcomm!=comm) { 1337 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1338 } 1339 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1340 /* get the local target indices */ 1341 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1342 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1343 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1344 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1345 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1346 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1347 /* the column permutation is so much easier; 1348 make a local version of 'colp' and invert it */ 1349 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1350 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1351 if (size==1) { 1352 lcolp = colp; 1353 } else { 1354 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1355 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1357 } 1358 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1359 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1360 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1361 if (size>1) { 1362 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1363 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1364 } 1365 /* now we just get the submatrix */ 1366 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1367 /* clean up */ 1368 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1369 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1376 { 1377 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1378 Mat A = mat->A,B = mat->B; 1379 PetscErrorCode ierr; 1380 PetscReal isend[5],irecv[5]; 1381 1382 PetscFunctionBegin; 1383 info->block_size = 1.0; 1384 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1385 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1386 isend[3] = info->memory; isend[4] = info->mallocs; 1387 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1388 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1389 isend[3] += info->memory; isend[4] += info->mallocs; 1390 if (flag == MAT_LOCAL) { 1391 info->nz_used = isend[0]; 1392 info->nz_allocated = isend[1]; 1393 info->nz_unneeded = isend[2]; 1394 info->memory = isend[3]; 1395 info->mallocs = isend[4]; 1396 } else if (flag == MAT_GLOBAL_MAX) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } else if (flag == MAT_GLOBAL_SUM) { 1404 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1405 info->nz_used = irecv[0]; 1406 info->nz_allocated = irecv[1]; 1407 info->nz_unneeded = irecv[2]; 1408 info->memory = irecv[3]; 1409 info->mallocs = irecv[4]; 1410 } 1411 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1412 info->fill_ratio_needed = 0; 1413 info->factor_mallocs = 0; 1414 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatSetOption_MPIAIJ" 1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1421 { 1422 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1423 PetscErrorCode ierr; 1424 1425 PetscFunctionBegin; 1426 switch (op) { 1427 case MAT_NEW_NONZERO_LOCATIONS: 1428 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1429 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1430 case MAT_KEEP_NONZERO_PATTERN: 1431 case MAT_NEW_NONZERO_LOCATION_ERR: 1432 case MAT_USE_INODES: 1433 case MAT_IGNORE_ZERO_ENTRIES: 1434 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1435 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1436 break; 1437 case MAT_ROW_ORIENTED: 1438 a->roworiented = flg; 1439 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1440 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1441 break; 1442 case MAT_NEW_DIAGONALS: 1443 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1444 break; 1445 case MAT_IGNORE_OFF_PROC_ENTRIES: 1446 a->donotstash = PETSC_TRUE; 1447 break; 1448 case MAT_SPD: 1449 A->spd_set = PETSC_TRUE; 1450 A->spd = flg; 1451 if (flg) { 1452 A->symmetric = PETSC_TRUE; 1453 A->structurally_symmetric = PETSC_TRUE; 1454 A->symmetric_set = PETSC_TRUE; 1455 A->structurally_symmetric_set = PETSC_TRUE; 1456 } 1457 break; 1458 case MAT_SYMMETRIC: 1459 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1460 break; 1461 case MAT_STRUCTURALLY_SYMMETRIC: 1462 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1463 break; 1464 case MAT_HERMITIAN: 1465 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1466 break; 1467 case MAT_SYMMETRY_ETERNAL: 1468 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1469 break; 1470 default: 1471 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1472 } 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatGetRow_MPIAIJ" 1478 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1479 { 1480 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1481 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1482 PetscErrorCode ierr; 1483 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1484 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1485 PetscInt *cmap,*idx_p; 1486 1487 PetscFunctionBegin; 1488 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1489 mat->getrowactive = PETSC_TRUE; 1490 1491 if (!mat->rowvalues && (idx || v)) { 1492 /* 1493 allocate enough space to hold information from the longest row. 1494 */ 1495 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1496 PetscInt max = 1,tmp; 1497 for (i=0; i<matin->rmap->n; i++) { 1498 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1499 if (max < tmp) { max = tmp; } 1500 } 1501 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1502 } 1503 1504 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1505 lrow = row - rstart; 1506 1507 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1508 if (!v) {pvA = 0; pvB = 0;} 1509 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1510 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1511 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1512 nztot = nzA + nzB; 1513 1514 cmap = mat->garray; 1515 if (v || idx) { 1516 if (nztot) { 1517 /* Sort by increasing column numbers, assuming A and B already sorted */ 1518 PetscInt imark = -1; 1519 if (v) { 1520 *v = v_p = mat->rowvalues; 1521 for (i=0; i<nzB; i++) { 1522 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1523 else break; 1524 } 1525 imark = i; 1526 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1527 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1528 } 1529 if (idx) { 1530 *idx = idx_p = mat->rowindices; 1531 if (imark > -1) { 1532 for (i=0; i<imark; i++) { 1533 idx_p[i] = cmap[cworkB[i]]; 1534 } 1535 } else { 1536 for (i=0; i<nzB; i++) { 1537 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1538 else break; 1539 } 1540 imark = i; 1541 } 1542 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1543 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1544 } 1545 } else { 1546 if (idx) *idx = 0; 1547 if (v) *v = 0; 1548 } 1549 } 1550 *nz = nztot; 1551 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1552 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1558 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1559 { 1560 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1561 1562 PetscFunctionBegin; 1563 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1564 aij->getrowactive = PETSC_FALSE; 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatNorm_MPIAIJ" 1570 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1571 { 1572 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1573 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1574 PetscErrorCode ierr; 1575 PetscInt i,j,cstart = mat->cmap->rstart; 1576 PetscReal sum = 0.0; 1577 MatScalar *v; 1578 1579 PetscFunctionBegin; 1580 if (aij->size == 1) { 1581 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1582 } else { 1583 if (type == NORM_FROBENIUS) { 1584 v = amat->a; 1585 for (i=0; i<amat->nz; i++) { 1586 #if defined(PETSC_USE_COMPLEX) 1587 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1588 #else 1589 sum += (*v)*(*v); v++; 1590 #endif 1591 } 1592 v = bmat->a; 1593 for (i=0; i<bmat->nz; i++) { 1594 #if defined(PETSC_USE_COMPLEX) 1595 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1596 #else 1597 sum += (*v)*(*v); v++; 1598 #endif 1599 } 1600 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1601 *norm = sqrt(*norm); 1602 } else if (type == NORM_1) { /* max column norm */ 1603 PetscReal *tmp,*tmp2; 1604 PetscInt *jj,*garray = aij->garray; 1605 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1606 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1607 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1608 *norm = 0.0; 1609 v = amat->a; jj = amat->j; 1610 for (j=0; j<amat->nz; j++) { 1611 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1612 } 1613 v = bmat->a; jj = bmat->j; 1614 for (j=0; j<bmat->nz; j++) { 1615 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1616 } 1617 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1618 for (j=0; j<mat->cmap->N; j++) { 1619 if (tmp2[j] > *norm) *norm = tmp2[j]; 1620 } 1621 ierr = PetscFree(tmp);CHKERRQ(ierr); 1622 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1623 } else if (type == NORM_INFINITY) { /* max row norm */ 1624 PetscReal ntemp = 0.0; 1625 for (j=0; j<aij->A->rmap->n; j++) { 1626 v = amat->a + amat->i[j]; 1627 sum = 0.0; 1628 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1629 sum += PetscAbsScalar(*v); v++; 1630 } 1631 v = bmat->a + bmat->i[j]; 1632 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1633 sum += PetscAbsScalar(*v); v++; 1634 } 1635 if (sum > ntemp) ntemp = sum; 1636 } 1637 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1638 } else { 1639 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1640 } 1641 } 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #undef __FUNCT__ 1646 #define __FUNCT__ "MatTranspose_MPIAIJ" 1647 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1648 { 1649 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1650 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1651 PetscErrorCode ierr; 1652 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1653 PetscInt cstart=A->cmap->rstart,ncol; 1654 Mat B; 1655 MatScalar *array; 1656 1657 PetscFunctionBegin; 1658 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1659 1660 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1661 ai = Aloc->i; aj = Aloc->j; 1662 bi = Bloc->i; bj = Bloc->j; 1663 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1664 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1665 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1666 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1667 for (i=0; i<ai[ma]; i++){ 1668 d_nnz[aj[i]] ++; 1669 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1670 } 1671 1672 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1673 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1674 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1675 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1676 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1677 } else { 1678 B = *matout; 1679 } 1680 1681 /* copy over the A part */ 1682 array = Aloc->a; 1683 row = A->rmap->rstart; 1684 for (i=0; i<ma; i++) { 1685 ncol = ai[i+1]-ai[i]; 1686 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1687 row++; array += ncol; aj += ncol; 1688 } 1689 aj = Aloc->j; 1690 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1691 1692 /* copy over the B part */ 1693 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1694 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1695 array = Bloc->a; 1696 row = A->rmap->rstart; 1697 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1698 cols_tmp = cols; 1699 for (i=0; i<mb; i++) { 1700 ncol = bi[i+1]-bi[i]; 1701 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1702 row++; array += ncol; cols_tmp += ncol; 1703 } 1704 ierr = PetscFree(cols);CHKERRQ(ierr); 1705 1706 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1707 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1708 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1709 *matout = B; 1710 } else { 1711 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1712 } 1713 PetscFunctionReturn(0); 1714 } 1715 1716 #undef __FUNCT__ 1717 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1718 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1719 { 1720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1721 Mat a = aij->A,b = aij->B; 1722 PetscErrorCode ierr; 1723 PetscInt s1,s2,s3; 1724 1725 PetscFunctionBegin; 1726 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1727 if (rr) { 1728 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1729 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1730 /* Overlap communication with computation. */ 1731 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1732 } 1733 if (ll) { 1734 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1735 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1736 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1737 } 1738 /* scale the diagonal block */ 1739 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1740 1741 if (rr) { 1742 /* Do a scatter end and then right scale the off-diagonal block */ 1743 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1744 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1745 } 1746 1747 PetscFunctionReturn(0); 1748 } 1749 1750 #undef __FUNCT__ 1751 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1752 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1753 { 1754 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1755 PetscErrorCode ierr; 1756 1757 PetscFunctionBegin; 1758 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1759 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1760 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1761 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1762 PetscFunctionReturn(0); 1763 } 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1766 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1767 { 1768 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1769 PetscErrorCode ierr; 1770 1771 PetscFunctionBegin; 1772 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1773 PetscFunctionReturn(0); 1774 } 1775 1776 #undef __FUNCT__ 1777 #define __FUNCT__ "MatEqual_MPIAIJ" 1778 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1779 { 1780 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1781 Mat a,b,c,d; 1782 PetscTruth flg; 1783 PetscErrorCode ierr; 1784 1785 PetscFunctionBegin; 1786 a = matA->A; b = matA->B; 1787 c = matB->A; d = matB->B; 1788 1789 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1790 if (flg) { 1791 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1792 } 1793 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1794 PetscFunctionReturn(0); 1795 } 1796 1797 #undef __FUNCT__ 1798 #define __FUNCT__ "MatCopy_MPIAIJ" 1799 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1800 { 1801 PetscErrorCode ierr; 1802 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1803 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1804 1805 PetscFunctionBegin; 1806 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1807 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1808 /* because of the column compression in the off-processor part of the matrix a->B, 1809 the number of columns in a->B and b->B may be different, hence we cannot call 1810 the MatCopy() directly on the two parts. If need be, we can provide a more 1811 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1812 then copying the submatrices */ 1813 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1814 } else { 1815 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1816 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1817 } 1818 PetscFunctionReturn(0); 1819 } 1820 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1823 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1824 { 1825 PetscErrorCode ierr; 1826 1827 PetscFunctionBegin; 1828 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1829 PetscFunctionReturn(0); 1830 } 1831 1832 #undef __FUNCT__ 1833 #define __FUNCT__ "MatAXPY_MPIAIJ" 1834 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1835 { 1836 PetscErrorCode ierr; 1837 PetscInt i; 1838 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1839 PetscBLASInt bnz,one=1; 1840 Mat_SeqAIJ *x,*y; 1841 1842 PetscFunctionBegin; 1843 if (str == SAME_NONZERO_PATTERN) { 1844 PetscScalar alpha = a; 1845 x = (Mat_SeqAIJ *)xx->A->data; 1846 y = (Mat_SeqAIJ *)yy->A->data; 1847 bnz = PetscBLASIntCast(x->nz); 1848 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1849 x = (Mat_SeqAIJ *)xx->B->data; 1850 y = (Mat_SeqAIJ *)yy->B->data; 1851 bnz = PetscBLASIntCast(x->nz); 1852 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1853 } else if (str == SUBSET_NONZERO_PATTERN) { 1854 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1855 1856 x = (Mat_SeqAIJ *)xx->B->data; 1857 y = (Mat_SeqAIJ *)yy->B->data; 1858 if (y->xtoy && y->XtoY != xx->B) { 1859 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1860 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1861 } 1862 if (!y->xtoy) { /* get xtoy */ 1863 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1864 y->XtoY = xx->B; 1865 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1866 } 1867 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1868 } else { 1869 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1870 } 1871 PetscFunctionReturn(0); 1872 } 1873 1874 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatConjugate_MPIAIJ" 1878 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1879 { 1880 #if defined(PETSC_USE_COMPLEX) 1881 PetscErrorCode ierr; 1882 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1883 1884 PetscFunctionBegin; 1885 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1886 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1887 #else 1888 PetscFunctionBegin; 1889 #endif 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatRealPart_MPIAIJ" 1895 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1896 { 1897 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1898 PetscErrorCode ierr; 1899 1900 PetscFunctionBegin; 1901 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1902 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1903 PetscFunctionReturn(0); 1904 } 1905 1906 #undef __FUNCT__ 1907 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1908 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1909 { 1910 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1911 PetscErrorCode ierr; 1912 1913 PetscFunctionBegin; 1914 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1915 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 #ifdef PETSC_HAVE_PBGL 1920 1921 #include <boost/parallel/mpi/bsp_process_group.hpp> 1922 #include <boost/graph/distributed/ilu_default_graph.hpp> 1923 #include <boost/graph/distributed/ilu_0_block.hpp> 1924 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1925 #include <boost/graph/distributed/petsc/interface.hpp> 1926 #include <boost/multi_array.hpp> 1927 #include <boost/parallel/distributed_property_map->hpp> 1928 1929 #undef __FUNCT__ 1930 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1931 /* 1932 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1933 */ 1934 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1935 { 1936 namespace petsc = boost::distributed::petsc; 1937 1938 namespace graph_dist = boost::graph::distributed; 1939 using boost::graph::distributed::ilu_default::process_group_type; 1940 using boost::graph::ilu_permuted; 1941 1942 PetscTruth row_identity, col_identity; 1943 PetscContainer c; 1944 PetscInt m, n, M, N; 1945 PetscErrorCode ierr; 1946 1947 PetscFunctionBegin; 1948 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1949 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1950 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1951 if (!row_identity || !col_identity) { 1952 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1953 } 1954 1955 process_group_type pg; 1956 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1957 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1958 lgraph_type& level_graph = *lgraph_p; 1959 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1960 1961 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1962 ilu_permuted(level_graph); 1963 1964 /* put together the new matrix */ 1965 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1966 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1967 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1968 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1969 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1970 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1971 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1972 1973 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1974 ierr = PetscContainerSetPointer(c, lgraph_p); 1975 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1981 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1982 { 1983 PetscFunctionBegin; 1984 PetscFunctionReturn(0); 1985 } 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatSolve_MPIAIJ" 1989 /* 1990 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1991 */ 1992 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1993 { 1994 namespace graph_dist = boost::graph::distributed; 1995 1996 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1997 lgraph_type* lgraph_p; 1998 PetscContainer c; 1999 PetscErrorCode ierr; 2000 2001 PetscFunctionBegin; 2002 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2003 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2004 ierr = VecCopy(b, x);CHKERRQ(ierr); 2005 2006 PetscScalar* array_x; 2007 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2008 PetscInt sx; 2009 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2010 2011 PetscScalar* array_b; 2012 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2013 PetscInt sb; 2014 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2015 2016 lgraph_type& level_graph = *lgraph_p; 2017 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2018 2019 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2020 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2021 ref_x(array_x, boost::extents[num_vertices(graph)]); 2022 2023 typedef boost::iterator_property_map<array_ref_type::iterator, 2024 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2025 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2026 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2027 2028 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2029 2030 PetscFunctionReturn(0); 2031 } 2032 #endif 2033 2034 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2035 PetscInt nzlocal,nsends,nrecvs; 2036 PetscMPIInt *send_rank,*recv_rank; 2037 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2038 PetscScalar *sbuf_a,**rbuf_a; 2039 PetscErrorCode (*MatDestroy)(Mat); 2040 } Mat_Redundant; 2041 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2044 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2045 { 2046 PetscErrorCode ierr; 2047 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2048 PetscInt i; 2049 2050 PetscFunctionBegin; 2051 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2052 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2053 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2054 for (i=0; i<redund->nrecvs; i++){ 2055 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2056 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2057 } 2058 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2059 ierr = PetscFree(redund);CHKERRQ(ierr); 2060 PetscFunctionReturn(0); 2061 } 2062 2063 #undef __FUNCT__ 2064 #define __FUNCT__ "MatDestroy_MatRedundant" 2065 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2066 { 2067 PetscErrorCode ierr; 2068 PetscContainer container; 2069 Mat_Redundant *redund=PETSC_NULL; 2070 2071 PetscFunctionBegin; 2072 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2073 if (container) { 2074 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2075 } else { 2076 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2077 } 2078 A->ops->destroy = redund->MatDestroy; 2079 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2080 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2081 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2082 PetscFunctionReturn(0); 2083 } 2084 2085 #undef __FUNCT__ 2086 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2087 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2088 { 2089 PetscMPIInt rank,size; 2090 MPI_Comm comm=((PetscObject)mat)->comm; 2091 PetscErrorCode ierr; 2092 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2093 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2094 PetscInt *rowrange=mat->rmap->range; 2095 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2096 Mat A=aij->A,B=aij->B,C=*matredundant; 2097 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2098 PetscScalar *sbuf_a; 2099 PetscInt nzlocal=a->nz+b->nz; 2100 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2101 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2102 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2103 MatScalar *aworkA,*aworkB; 2104 PetscScalar *vals; 2105 PetscMPIInt tag1,tag2,tag3,imdex; 2106 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2107 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2108 MPI_Status recv_status,*send_status; 2109 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2110 PetscInt **rbuf_j=PETSC_NULL; 2111 PetscScalar **rbuf_a=PETSC_NULL; 2112 Mat_Redundant *redund=PETSC_NULL; 2113 PetscContainer container; 2114 2115 PetscFunctionBegin; 2116 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2117 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2118 2119 if (reuse == MAT_REUSE_MATRIX) { 2120 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2121 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2122 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2123 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2124 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2125 if (container) { 2126 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2127 } else { 2128 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2129 } 2130 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2131 2132 nsends = redund->nsends; 2133 nrecvs = redund->nrecvs; 2134 send_rank = redund->send_rank; 2135 recv_rank = redund->recv_rank; 2136 sbuf_nz = redund->sbuf_nz; 2137 rbuf_nz = redund->rbuf_nz; 2138 sbuf_j = redund->sbuf_j; 2139 sbuf_a = redund->sbuf_a; 2140 rbuf_j = redund->rbuf_j; 2141 rbuf_a = redund->rbuf_a; 2142 } 2143 2144 if (reuse == MAT_INITIAL_MATRIX){ 2145 PetscMPIInt subrank,subsize; 2146 PetscInt nleftover,np_subcomm; 2147 /* get the destination processors' id send_rank, nsends and nrecvs */ 2148 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2149 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2150 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2151 np_subcomm = size/nsubcomm; 2152 nleftover = size - nsubcomm*np_subcomm; 2153 nsends = 0; nrecvs = 0; 2154 for (i=0; i<size; i++){ /* i=rank*/ 2155 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2156 send_rank[nsends] = i; nsends++; 2157 recv_rank[nrecvs++] = i; 2158 } 2159 } 2160 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2161 i = size-nleftover-1; 2162 j = 0; 2163 while (j < nsubcomm - nleftover){ 2164 send_rank[nsends++] = i; 2165 i--; j++; 2166 } 2167 } 2168 2169 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2170 for (i=0; i<nleftover; i++){ 2171 recv_rank[nrecvs++] = size-nleftover+i; 2172 } 2173 } 2174 2175 /* allocate sbuf_j, sbuf_a */ 2176 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2177 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2178 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2179 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2180 2181 /* copy mat's local entries into the buffers */ 2182 if (reuse == MAT_INITIAL_MATRIX){ 2183 rownz_max = 0; 2184 rptr = sbuf_j; 2185 cols = sbuf_j + rend-rstart + 1; 2186 vals = sbuf_a; 2187 rptr[0] = 0; 2188 for (i=0; i<rend-rstart; i++){ 2189 row = i + rstart; 2190 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2191 ncols = nzA + nzB; 2192 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2193 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2194 /* load the column indices for this row into cols */ 2195 lwrite = 0; 2196 for (l=0; l<nzB; l++) { 2197 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2198 vals[lwrite] = aworkB[l]; 2199 cols[lwrite++] = ctmp; 2200 } 2201 } 2202 for (l=0; l<nzA; l++){ 2203 vals[lwrite] = aworkA[l]; 2204 cols[lwrite++] = cstart + cworkA[l]; 2205 } 2206 for (l=0; l<nzB; l++) { 2207 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2208 vals[lwrite] = aworkB[l]; 2209 cols[lwrite++] = ctmp; 2210 } 2211 } 2212 vals += ncols; 2213 cols += ncols; 2214 rptr[i+1] = rptr[i] + ncols; 2215 if (rownz_max < ncols) rownz_max = ncols; 2216 } 2217 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2218 } else { /* only copy matrix values into sbuf_a */ 2219 rptr = sbuf_j; 2220 vals = sbuf_a; 2221 rptr[0] = 0; 2222 for (i=0; i<rend-rstart; i++){ 2223 row = i + rstart; 2224 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2225 ncols = nzA + nzB; 2226 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2227 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2228 lwrite = 0; 2229 for (l=0; l<nzB; l++) { 2230 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2231 } 2232 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2233 for (l=0; l<nzB; l++) { 2234 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2235 } 2236 vals += ncols; 2237 rptr[i+1] = rptr[i] + ncols; 2238 } 2239 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2240 2241 /* send nzlocal to others, and recv other's nzlocal */ 2242 /*--------------------------------------------------*/ 2243 if (reuse == MAT_INITIAL_MATRIX){ 2244 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2245 s_waits2 = s_waits3 + nsends; 2246 s_waits1 = s_waits2 + nsends; 2247 r_waits1 = s_waits1 + nsends; 2248 r_waits2 = r_waits1 + nrecvs; 2249 r_waits3 = r_waits2 + nrecvs; 2250 } else { 2251 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2252 r_waits3 = s_waits3 + nsends; 2253 } 2254 2255 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2256 if (reuse == MAT_INITIAL_MATRIX){ 2257 /* get new tags to keep the communication clean */ 2258 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2259 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2260 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2261 2262 /* post receives of other's nzlocal */ 2263 for (i=0; i<nrecvs; i++){ 2264 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2265 } 2266 /* send nzlocal to others */ 2267 for (i=0; i<nsends; i++){ 2268 sbuf_nz[i] = nzlocal; 2269 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2270 } 2271 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2272 count = nrecvs; 2273 while (count) { 2274 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2275 recv_rank[imdex] = recv_status.MPI_SOURCE; 2276 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2277 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2278 2279 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2280 rbuf_nz[imdex] += i + 2; 2281 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2282 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2283 count--; 2284 } 2285 /* wait on sends of nzlocal */ 2286 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2287 /* send mat->i,j to others, and recv from other's */ 2288 /*------------------------------------------------*/ 2289 for (i=0; i<nsends; i++){ 2290 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2291 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2292 } 2293 /* wait on receives of mat->i,j */ 2294 /*------------------------------*/ 2295 count = nrecvs; 2296 while (count) { 2297 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2298 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2299 count--; 2300 } 2301 /* wait on sends of mat->i,j */ 2302 /*---------------------------*/ 2303 if (nsends) { 2304 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2305 } 2306 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2307 2308 /* post receives, send and receive mat->a */ 2309 /*----------------------------------------*/ 2310 for (imdex=0; imdex<nrecvs; imdex++) { 2311 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2312 } 2313 for (i=0; i<nsends; i++){ 2314 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2315 } 2316 count = nrecvs; 2317 while (count) { 2318 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2319 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2320 count--; 2321 } 2322 if (nsends) { 2323 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2324 } 2325 2326 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2327 2328 /* create redundant matrix */ 2329 /*-------------------------*/ 2330 if (reuse == MAT_INITIAL_MATRIX){ 2331 /* compute rownz_max for preallocation */ 2332 for (imdex=0; imdex<nrecvs; imdex++){ 2333 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2334 rptr = rbuf_j[imdex]; 2335 for (i=0; i<j; i++){ 2336 ncols = rptr[i+1] - rptr[i]; 2337 if (rownz_max < ncols) rownz_max = ncols; 2338 } 2339 } 2340 2341 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2342 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2343 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2344 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2345 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2346 } else { 2347 C = *matredundant; 2348 } 2349 2350 /* insert local matrix entries */ 2351 rptr = sbuf_j; 2352 cols = sbuf_j + rend-rstart + 1; 2353 vals = sbuf_a; 2354 for (i=0; i<rend-rstart; i++){ 2355 row = i + rstart; 2356 ncols = rptr[i+1] - rptr[i]; 2357 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2358 vals += ncols; 2359 cols += ncols; 2360 } 2361 /* insert received matrix entries */ 2362 for (imdex=0; imdex<nrecvs; imdex++){ 2363 rstart = rowrange[recv_rank[imdex]]; 2364 rend = rowrange[recv_rank[imdex]+1]; 2365 rptr = rbuf_j[imdex]; 2366 cols = rbuf_j[imdex] + rend-rstart + 1; 2367 vals = rbuf_a[imdex]; 2368 for (i=0; i<rend-rstart; i++){ 2369 row = i + rstart; 2370 ncols = rptr[i+1] - rptr[i]; 2371 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2372 vals += ncols; 2373 cols += ncols; 2374 } 2375 } 2376 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2377 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2378 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2379 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2380 if (reuse == MAT_INITIAL_MATRIX){ 2381 PetscContainer container; 2382 *matredundant = C; 2383 /* create a supporting struct and attach it to C for reuse */ 2384 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2385 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2386 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2387 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2388 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2389 2390 redund->nzlocal = nzlocal; 2391 redund->nsends = nsends; 2392 redund->nrecvs = nrecvs; 2393 redund->send_rank = send_rank; 2394 redund->recv_rank = recv_rank; 2395 redund->sbuf_nz = sbuf_nz; 2396 redund->rbuf_nz = rbuf_nz; 2397 redund->sbuf_j = sbuf_j; 2398 redund->sbuf_a = sbuf_a; 2399 redund->rbuf_j = rbuf_j; 2400 redund->rbuf_a = rbuf_a; 2401 2402 redund->MatDestroy = C->ops->destroy; 2403 C->ops->destroy = MatDestroy_MatRedundant; 2404 } 2405 PetscFunctionReturn(0); 2406 } 2407 2408 #undef __FUNCT__ 2409 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2410 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2411 { 2412 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2413 PetscErrorCode ierr; 2414 PetscInt i,*idxb = 0; 2415 PetscScalar *va,*vb; 2416 Vec vtmp; 2417 2418 PetscFunctionBegin; 2419 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2420 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2421 if (idx) { 2422 for (i=0; i<A->rmap->n; i++) { 2423 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2424 } 2425 } 2426 2427 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2428 if (idx) { 2429 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2430 } 2431 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2432 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2433 2434 for (i=0; i<A->rmap->n; i++){ 2435 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2436 va[i] = vb[i]; 2437 if (idx) idx[i] = a->garray[idxb[i]]; 2438 } 2439 } 2440 2441 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2442 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2443 if (idxb) { 2444 ierr = PetscFree(idxb);CHKERRQ(ierr); 2445 } 2446 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2447 PetscFunctionReturn(0); 2448 } 2449 2450 #undef __FUNCT__ 2451 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2452 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2453 { 2454 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2455 PetscErrorCode ierr; 2456 PetscInt i,*idxb = 0; 2457 PetscScalar *va,*vb; 2458 Vec vtmp; 2459 2460 PetscFunctionBegin; 2461 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2462 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2463 if (idx) { 2464 for (i=0; i<A->cmap->n; i++) { 2465 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2466 } 2467 } 2468 2469 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2470 if (idx) { 2471 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2472 } 2473 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2474 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2475 2476 for (i=0; i<A->rmap->n; i++){ 2477 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2478 va[i] = vb[i]; 2479 if (idx) idx[i] = a->garray[idxb[i]]; 2480 } 2481 } 2482 2483 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2484 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2485 if (idxb) { 2486 ierr = PetscFree(idxb);CHKERRQ(ierr); 2487 } 2488 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #undef __FUNCT__ 2493 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2494 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2495 { 2496 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2497 PetscInt n = A->rmap->n; 2498 PetscInt cstart = A->cmap->rstart; 2499 PetscInt *cmap = mat->garray; 2500 PetscInt *diagIdx, *offdiagIdx; 2501 Vec diagV, offdiagV; 2502 PetscScalar *a, *diagA, *offdiagA; 2503 PetscInt r; 2504 PetscErrorCode ierr; 2505 2506 PetscFunctionBegin; 2507 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2508 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2509 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2510 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2511 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2512 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2513 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2514 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2515 for(r = 0; r < n; ++r) { 2516 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2517 a[r] = diagA[r]; 2518 idx[r] = cstart + diagIdx[r]; 2519 } else { 2520 a[r] = offdiagA[r]; 2521 idx[r] = cmap[offdiagIdx[r]]; 2522 } 2523 } 2524 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2525 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2526 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2527 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2528 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2529 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2530 PetscFunctionReturn(0); 2531 } 2532 2533 #undef __FUNCT__ 2534 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2535 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2536 { 2537 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2538 PetscInt n = A->rmap->n; 2539 PetscInt cstart = A->cmap->rstart; 2540 PetscInt *cmap = mat->garray; 2541 PetscInt *diagIdx, *offdiagIdx; 2542 Vec diagV, offdiagV; 2543 PetscScalar *a, *diagA, *offdiagA; 2544 PetscInt r; 2545 PetscErrorCode ierr; 2546 2547 PetscFunctionBegin; 2548 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2549 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2550 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2551 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2552 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2553 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2554 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2555 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2556 for(r = 0; r < n; ++r) { 2557 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2558 a[r] = diagA[r]; 2559 idx[r] = cstart + diagIdx[r]; 2560 } else { 2561 a[r] = offdiagA[r]; 2562 idx[r] = cmap[offdiagIdx[r]]; 2563 } 2564 } 2565 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2566 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2567 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2568 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2569 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2570 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2571 PetscFunctionReturn(0); 2572 } 2573 2574 #undef __FUNCT__ 2575 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2576 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2577 { 2578 PetscErrorCode ierr; 2579 Mat *dummy; 2580 2581 PetscFunctionBegin; 2582 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2583 *newmat = *dummy; 2584 ierr = PetscFree(dummy);CHKERRQ(ierr); 2585 PetscFunctionReturn(0); 2586 } 2587 2588 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2589 /* -------------------------------------------------------------------*/ 2590 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2591 MatGetRow_MPIAIJ, 2592 MatRestoreRow_MPIAIJ, 2593 MatMult_MPIAIJ, 2594 /* 4*/ MatMultAdd_MPIAIJ, 2595 MatMultTranspose_MPIAIJ, 2596 MatMultTransposeAdd_MPIAIJ, 2597 #ifdef PETSC_HAVE_PBGL 2598 MatSolve_MPIAIJ, 2599 #else 2600 0, 2601 #endif 2602 0, 2603 0, 2604 /*10*/ 0, 2605 0, 2606 0, 2607 MatSOR_MPIAIJ, 2608 MatTranspose_MPIAIJ, 2609 /*15*/ MatGetInfo_MPIAIJ, 2610 MatEqual_MPIAIJ, 2611 MatGetDiagonal_MPIAIJ, 2612 MatDiagonalScale_MPIAIJ, 2613 MatNorm_MPIAIJ, 2614 /*20*/ MatAssemblyBegin_MPIAIJ, 2615 MatAssemblyEnd_MPIAIJ, 2616 MatSetOption_MPIAIJ, 2617 MatZeroEntries_MPIAIJ, 2618 /*24*/ MatZeroRows_MPIAIJ, 2619 0, 2620 #ifdef PETSC_HAVE_PBGL 2621 0, 2622 #else 2623 0, 2624 #endif 2625 0, 2626 0, 2627 /*29*/ MatSetUpPreallocation_MPIAIJ, 2628 #ifdef PETSC_HAVE_PBGL 2629 0, 2630 #else 2631 0, 2632 #endif 2633 0, 2634 0, 2635 0, 2636 /*34*/ MatDuplicate_MPIAIJ, 2637 0, 2638 0, 2639 0, 2640 0, 2641 /*39*/ MatAXPY_MPIAIJ, 2642 MatGetSubMatrices_MPIAIJ, 2643 MatIncreaseOverlap_MPIAIJ, 2644 MatGetValues_MPIAIJ, 2645 MatCopy_MPIAIJ, 2646 /*44*/ MatGetRowMax_MPIAIJ, 2647 MatScale_MPIAIJ, 2648 0, 2649 0, 2650 0, 2651 /*49*/ MatSetBlockSize_MPIAIJ, 2652 0, 2653 0, 2654 0, 2655 0, 2656 /*54*/ MatFDColoringCreate_MPIAIJ, 2657 0, 2658 MatSetUnfactored_MPIAIJ, 2659 MatPermute_MPIAIJ, 2660 0, 2661 /*59*/ MatGetSubMatrix_MPIAIJ, 2662 MatDestroy_MPIAIJ, 2663 MatView_MPIAIJ, 2664 0, 2665 0, 2666 /*64*/ 0, 2667 0, 2668 0, 2669 0, 2670 0, 2671 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2672 MatGetRowMinAbs_MPIAIJ, 2673 0, 2674 MatSetColoring_MPIAIJ, 2675 #if defined(PETSC_HAVE_ADIC) 2676 MatSetValuesAdic_MPIAIJ, 2677 #else 2678 0, 2679 #endif 2680 MatSetValuesAdifor_MPIAIJ, 2681 /*75*/ MatFDColoringApply_AIJ, 2682 0, 2683 0, 2684 0, 2685 0, 2686 /*80*/ 0, 2687 0, 2688 0, 2689 /*83*/ MatLoad_MPIAIJ, 2690 0, 2691 0, 2692 0, 2693 0, 2694 0, 2695 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2696 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2697 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2698 MatPtAP_Basic, 2699 MatPtAPSymbolic_MPIAIJ, 2700 /*94*/ MatPtAPNumeric_MPIAIJ, 2701 0, 2702 0, 2703 0, 2704 0, 2705 /*99*/ 0, 2706 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2707 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2708 MatConjugate_MPIAIJ, 2709 0, 2710 /*104*/MatSetValuesRow_MPIAIJ, 2711 MatRealPart_MPIAIJ, 2712 MatImaginaryPart_MPIAIJ, 2713 0, 2714 0, 2715 /*109*/0, 2716 MatGetRedundantMatrix_MPIAIJ, 2717 MatGetRowMin_MPIAIJ, 2718 0, 2719 0, 2720 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2721 0, 2722 0, 2723 0, 2724 0, 2725 /*119*/0, 2726 0, 2727 0, 2728 0, 2729 MatGetMultiProcBlock_MPIAIJ 2730 }; 2731 2732 /* ----------------------------------------------------------------------------------------*/ 2733 2734 EXTERN_C_BEGIN 2735 #undef __FUNCT__ 2736 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2737 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2738 { 2739 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2740 PetscErrorCode ierr; 2741 2742 PetscFunctionBegin; 2743 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2744 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2745 PetscFunctionReturn(0); 2746 } 2747 EXTERN_C_END 2748 2749 EXTERN_C_BEGIN 2750 #undef __FUNCT__ 2751 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2752 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2753 { 2754 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2755 PetscErrorCode ierr; 2756 2757 PetscFunctionBegin; 2758 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2759 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2760 PetscFunctionReturn(0); 2761 } 2762 EXTERN_C_END 2763 2764 EXTERN_C_BEGIN 2765 #undef __FUNCT__ 2766 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2767 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2768 { 2769 Mat_MPIAIJ *b; 2770 PetscErrorCode ierr; 2771 PetscInt i; 2772 2773 PetscFunctionBegin; 2774 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2775 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2776 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2777 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2778 2779 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2780 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2781 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2782 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2783 if (d_nnz) { 2784 for (i=0; i<B->rmap->n; i++) { 2785 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2786 } 2787 } 2788 if (o_nnz) { 2789 for (i=0; i<B->rmap->n; i++) { 2790 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2791 } 2792 } 2793 b = (Mat_MPIAIJ*)B->data; 2794 2795 if (!B->preallocated) { 2796 /* Explicitly create 2 MATSEQAIJ matrices. */ 2797 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2798 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2799 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2800 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2801 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2802 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2803 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2804 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2805 } 2806 2807 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2808 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2809 B->preallocated = PETSC_TRUE; 2810 PetscFunctionReturn(0); 2811 } 2812 EXTERN_C_END 2813 2814 #undef __FUNCT__ 2815 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2816 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2817 { 2818 Mat mat; 2819 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2820 PetscErrorCode ierr; 2821 2822 PetscFunctionBegin; 2823 *newmat = 0; 2824 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2825 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2826 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2827 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2828 a = (Mat_MPIAIJ*)mat->data; 2829 2830 mat->factortype = matin->factortype; 2831 mat->rmap->bs = matin->rmap->bs; 2832 mat->assembled = PETSC_TRUE; 2833 mat->insertmode = NOT_SET_VALUES; 2834 mat->preallocated = PETSC_TRUE; 2835 2836 a->size = oldmat->size; 2837 a->rank = oldmat->rank; 2838 a->donotstash = oldmat->donotstash; 2839 a->roworiented = oldmat->roworiented; 2840 a->rowindices = 0; 2841 a->rowvalues = 0; 2842 a->getrowactive = PETSC_FALSE; 2843 2844 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2845 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2846 2847 if (oldmat->colmap) { 2848 #if defined (PETSC_USE_CTABLE) 2849 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2850 #else 2851 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2852 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2853 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2854 #endif 2855 } else a->colmap = 0; 2856 if (oldmat->garray) { 2857 PetscInt len; 2858 len = oldmat->B->cmap->n; 2859 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2860 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2861 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2862 } else a->garray = 0; 2863 2864 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2865 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2866 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2867 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2868 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2869 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2870 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2871 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2872 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2873 *newmat = mat; 2874 PetscFunctionReturn(0); 2875 } 2876 2877 /* 2878 Allows sending/receiving larger messages then 2 gigabytes in a single call 2879 */ 2880 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 2881 { 2882 int ierr; 2883 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2884 PetscInt i,numchunks; 2885 PetscMPIInt icnt; 2886 2887 numchunks = cnt/CHUNKSIZE + 1; 2888 for (i=0; i<numchunks; i++) { 2889 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2890 ierr = MPI_Send(mess,icnt,type,to,tag,comm); 2891 if (type == MPIU_INT) { 2892 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2893 } else if (type == MPIU_SCALAR) { 2894 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2895 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2896 } 2897 return 0; 2898 } 2899 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 2900 { 2901 int ierr; 2902 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2903 MPI_Status status; 2904 PetscInt i,numchunks; 2905 PetscMPIInt icnt; 2906 2907 numchunks = cnt/CHUNKSIZE + 1; 2908 for (i=0; i<numchunks; i++) { 2909 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2910 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status); 2911 if (type == MPIU_INT) { 2912 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2913 } else if (type == MPIU_SCALAR) { 2914 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2915 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2916 } 2917 return 0; 2918 } 2919 2920 #undef __FUNCT__ 2921 #define __FUNCT__ "MatLoad_MPIAIJ" 2922 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2923 { 2924 PetscScalar *vals,*svals; 2925 MPI_Comm comm = ((PetscObject)viewer)->comm; 2926 PetscErrorCode ierr; 2927 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2928 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2929 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2930 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2931 PetscInt cend,cstart,n,*rowners,sizesset=1; 2932 int fd; 2933 2934 PetscFunctionBegin; 2935 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2936 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2937 if (!rank) { 2938 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2939 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2940 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2941 } 2942 2943 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2944 2945 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2946 M = header[1]; N = header[2]; 2947 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2948 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2949 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2950 2951 /* If global sizes are set, check if they are consistent with that given in the file */ 2952 if (sizesset) { 2953 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2954 } 2955 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2956 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2957 2958 /* determine ownership of all rows */ 2959 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 2960 else m = newMat->rmap->n; /* Set by user */ 2961 2962 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2963 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2964 2965 /* First process needs enough room for process with most rows */ 2966 if (!rank) { 2967 mmax = rowners[1]; 2968 for (i=2; i<size; i++) { 2969 mmax = PetscMax(mmax,rowners[i]); 2970 } 2971 } else mmax = m; 2972 2973 rowners[0] = 0; 2974 for (i=2; i<=size; i++) { 2975 rowners[i] += rowners[i-1]; 2976 } 2977 rstart = rowners[rank]; 2978 rend = rowners[rank+1]; 2979 2980 /* distribute row lengths to all processors */ 2981 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2982 if (!rank) { 2983 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2984 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2985 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2986 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2987 for (j=0; j<m; j++) { 2988 procsnz[0] += ourlens[j]; 2989 } 2990 for (i=1; i<size; i++) { 2991 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2992 /* calculate the number of nonzeros on each processor */ 2993 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2994 procsnz[i] += rowlengths[j]; 2995 } 2996 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2997 } 2998 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2999 } else { 3000 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3001 } 3002 3003 if (!rank) { 3004 /* determine max buffer needed and allocate it */ 3005 maxnz = 0; 3006 for (i=0; i<size; i++) { 3007 maxnz = PetscMax(maxnz,procsnz[i]); 3008 } 3009 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3010 3011 /* read in my part of the matrix column indices */ 3012 nz = procsnz[0]; 3013 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3014 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3015 3016 /* read in every one elses and ship off */ 3017 for (i=1; i<size; i++) { 3018 nz = procsnz[i]; 3019 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3020 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3021 } 3022 ierr = PetscFree(cols);CHKERRQ(ierr); 3023 } else { 3024 /* determine buffer space needed for message */ 3025 nz = 0; 3026 for (i=0; i<m; i++) { 3027 nz += ourlens[i]; 3028 } 3029 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3030 3031 /* receive message of column indices*/ 3032 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3033 } 3034 3035 /* determine column ownership if matrix is not square */ 3036 if (N != M) { 3037 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3038 else n = newMat->cmap->n; 3039 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3040 cstart = cend - n; 3041 } else { 3042 cstart = rstart; 3043 cend = rend; 3044 n = cend - cstart; 3045 } 3046 3047 /* loop over local rows, determining number of off diagonal entries */ 3048 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3049 jj = 0; 3050 for (i=0; i<m; i++) { 3051 for (j=0; j<ourlens[i]; j++) { 3052 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3053 jj++; 3054 } 3055 } 3056 3057 for (i=0; i<m; i++) { 3058 ourlens[i] -= offlens[i]; 3059 } 3060 if (!sizesset) { 3061 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3062 } 3063 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3064 3065 for (i=0; i<m; i++) { 3066 ourlens[i] += offlens[i]; 3067 } 3068 3069 if (!rank) { 3070 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3071 3072 /* read in my part of the matrix numerical values */ 3073 nz = procsnz[0]; 3074 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3075 3076 /* insert into matrix */ 3077 jj = rstart; 3078 smycols = mycols; 3079 svals = vals; 3080 for (i=0; i<m; i++) { 3081 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3082 smycols += ourlens[i]; 3083 svals += ourlens[i]; 3084 jj++; 3085 } 3086 3087 /* read in other processors and ship out */ 3088 for (i=1; i<size; i++) { 3089 nz = procsnz[i]; 3090 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3091 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3092 } 3093 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3094 } else { 3095 /* receive numeric values */ 3096 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3097 3098 /* receive message of values*/ 3099 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3100 3101 /* insert into matrix */ 3102 jj = rstart; 3103 smycols = mycols; 3104 svals = vals; 3105 for (i=0; i<m; i++) { 3106 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3107 smycols += ourlens[i]; 3108 svals += ourlens[i]; 3109 jj++; 3110 } 3111 } 3112 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3113 ierr = PetscFree(vals);CHKERRQ(ierr); 3114 ierr = PetscFree(mycols);CHKERRQ(ierr); 3115 ierr = PetscFree(rowners);CHKERRQ(ierr); 3116 3117 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3118 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3119 PetscFunctionReturn(0); 3120 } 3121 3122 #undef __FUNCT__ 3123 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3124 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3125 { 3126 PetscErrorCode ierr; 3127 IS iscol_local; 3128 PetscInt csize; 3129 3130 PetscFunctionBegin; 3131 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3132 if (call == MAT_REUSE_MATRIX) { 3133 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3134 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3135 } else { 3136 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3137 } 3138 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3139 if (call == MAT_INITIAL_MATRIX) { 3140 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3141 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3142 } 3143 PetscFunctionReturn(0); 3144 } 3145 3146 #undef __FUNCT__ 3147 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3148 /* 3149 Not great since it makes two copies of the submatrix, first an SeqAIJ 3150 in local and then by concatenating the local matrices the end result. 3151 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3152 3153 Note: This requires a sequential iscol with all indices. 3154 */ 3155 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3156 { 3157 PetscErrorCode ierr; 3158 PetscMPIInt rank,size; 3159 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3160 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3161 Mat *local,M,Mreuse; 3162 MatScalar *vwork,*aa; 3163 MPI_Comm comm = ((PetscObject)mat)->comm; 3164 Mat_SeqAIJ *aij; 3165 3166 3167 PetscFunctionBegin; 3168 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3169 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3170 3171 if (call == MAT_REUSE_MATRIX) { 3172 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3173 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3174 local = &Mreuse; 3175 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3176 } else { 3177 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3178 Mreuse = *local; 3179 ierr = PetscFree(local);CHKERRQ(ierr); 3180 } 3181 3182 /* 3183 m - number of local rows 3184 n - number of columns (same on all processors) 3185 rstart - first row in new global matrix generated 3186 */ 3187 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3188 if (call == MAT_INITIAL_MATRIX) { 3189 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3190 ii = aij->i; 3191 jj = aij->j; 3192 3193 /* 3194 Determine the number of non-zeros in the diagonal and off-diagonal 3195 portions of the matrix in order to do correct preallocation 3196 */ 3197 3198 /* first get start and end of "diagonal" columns */ 3199 if (csize == PETSC_DECIDE) { 3200 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3201 if (mglobal == n) { /* square matrix */ 3202 nlocal = m; 3203 } else { 3204 nlocal = n/size + ((n % size) > rank); 3205 } 3206 } else { 3207 nlocal = csize; 3208 } 3209 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3210 rstart = rend - nlocal; 3211 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3212 3213 /* next, compute all the lengths */ 3214 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3215 olens = dlens + m; 3216 for (i=0; i<m; i++) { 3217 jend = ii[i+1] - ii[i]; 3218 olen = 0; 3219 dlen = 0; 3220 for (j=0; j<jend; j++) { 3221 if (*jj < rstart || *jj >= rend) olen++; 3222 else dlen++; 3223 jj++; 3224 } 3225 olens[i] = olen; 3226 dlens[i] = dlen; 3227 } 3228 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3229 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3230 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3231 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3232 ierr = PetscFree(dlens);CHKERRQ(ierr); 3233 } else { 3234 PetscInt ml,nl; 3235 3236 M = *newmat; 3237 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3238 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3239 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3240 /* 3241 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3242 rather than the slower MatSetValues(). 3243 */ 3244 M->was_assembled = PETSC_TRUE; 3245 M->assembled = PETSC_FALSE; 3246 } 3247 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3248 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3249 ii = aij->i; 3250 jj = aij->j; 3251 aa = aij->a; 3252 for (i=0; i<m; i++) { 3253 row = rstart + i; 3254 nz = ii[i+1] - ii[i]; 3255 cwork = jj; jj += nz; 3256 vwork = aa; aa += nz; 3257 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3258 } 3259 3260 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3261 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3262 *newmat = M; 3263 3264 /* save submatrix used in processor for next request */ 3265 if (call == MAT_INITIAL_MATRIX) { 3266 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3267 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3268 } 3269 3270 PetscFunctionReturn(0); 3271 } 3272 3273 EXTERN_C_BEGIN 3274 #undef __FUNCT__ 3275 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3276 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3277 { 3278 PetscInt m,cstart, cend,j,nnz,i,d; 3279 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3280 const PetscInt *JJ; 3281 PetscScalar *values; 3282 PetscErrorCode ierr; 3283 3284 PetscFunctionBegin; 3285 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3286 3287 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3288 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3289 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3290 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3291 m = B->rmap->n; 3292 cstart = B->cmap->rstart; 3293 cend = B->cmap->rend; 3294 rstart = B->rmap->rstart; 3295 3296 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3297 3298 #if defined(PETSC_USE_DEBUGGING) 3299 for (i=0; i<m; i++) { 3300 nnz = Ii[i+1]- Ii[i]; 3301 JJ = J + Ii[i]; 3302 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3303 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3304 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3305 } 3306 #endif 3307 3308 for (i=0; i<m; i++) { 3309 nnz = Ii[i+1]- Ii[i]; 3310 JJ = J + Ii[i]; 3311 nnz_max = PetscMax(nnz_max,nnz); 3312 d = 0; 3313 for (j=0; j<nnz; j++) { 3314 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3315 } 3316 d_nnz[i] = d; 3317 o_nnz[i] = nnz - d; 3318 } 3319 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3320 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3321 3322 if (v) values = (PetscScalar*)v; 3323 else { 3324 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3325 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3326 } 3327 3328 for (i=0; i<m; i++) { 3329 ii = i + rstart; 3330 nnz = Ii[i+1]- Ii[i]; 3331 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3332 } 3333 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3334 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3335 3336 if (!v) { 3337 ierr = PetscFree(values);CHKERRQ(ierr); 3338 } 3339 PetscFunctionReturn(0); 3340 } 3341 EXTERN_C_END 3342 3343 #undef __FUNCT__ 3344 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3345 /*@ 3346 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3347 (the default parallel PETSc format). 3348 3349 Collective on MPI_Comm 3350 3351 Input Parameters: 3352 + B - the matrix 3353 . i - the indices into j for the start of each local row (starts with zero) 3354 . j - the column indices for each local row (starts with zero) 3355 - v - optional values in the matrix 3356 3357 Level: developer 3358 3359 Notes: 3360 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3361 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3362 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3363 3364 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3365 3366 The format which is used for the sparse matrix input, is equivalent to a 3367 row-major ordering.. i.e for the following matrix, the input data expected is 3368 as shown: 3369 3370 1 0 0 3371 2 0 3 P0 3372 ------- 3373 4 5 6 P1 3374 3375 Process0 [P0]: rows_owned=[0,1] 3376 i = {0,1,3} [size = nrow+1 = 2+1] 3377 j = {0,0,2} [size = nz = 6] 3378 v = {1,2,3} [size = nz = 6] 3379 3380 Process1 [P1]: rows_owned=[2] 3381 i = {0,3} [size = nrow+1 = 1+1] 3382 j = {0,1,2} [size = nz = 6] 3383 v = {4,5,6} [size = nz = 6] 3384 3385 .keywords: matrix, aij, compressed row, sparse, parallel 3386 3387 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3388 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3389 @*/ 3390 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3391 { 3392 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3393 3394 PetscFunctionBegin; 3395 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3396 if (f) { 3397 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3398 } 3399 PetscFunctionReturn(0); 3400 } 3401 3402 #undef __FUNCT__ 3403 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3404 /*@C 3405 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3406 (the default parallel PETSc format). For good matrix assembly performance 3407 the user should preallocate the matrix storage by setting the parameters 3408 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3409 performance can be increased by more than a factor of 50. 3410 3411 Collective on MPI_Comm 3412 3413 Input Parameters: 3414 + A - the matrix 3415 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3416 (same value is used for all local rows) 3417 . d_nnz - array containing the number of nonzeros in the various rows of the 3418 DIAGONAL portion of the local submatrix (possibly different for each row) 3419 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3420 The size of this array is equal to the number of local rows, i.e 'm'. 3421 You must leave room for the diagonal entry even if it is zero. 3422 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3423 submatrix (same value is used for all local rows). 3424 - o_nnz - array containing the number of nonzeros in the various rows of the 3425 OFF-DIAGONAL portion of the local submatrix (possibly different for 3426 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3427 structure. The size of this array is equal to the number 3428 of local rows, i.e 'm'. 3429 3430 If the *_nnz parameter is given then the *_nz parameter is ignored 3431 3432 The AIJ format (also called the Yale sparse matrix format or 3433 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3434 storage. The stored row and column indices begin with zero. 3435 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3436 3437 The parallel matrix is partitioned such that the first m0 rows belong to 3438 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3439 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3440 3441 The DIAGONAL portion of the local submatrix of a processor can be defined 3442 as the submatrix which is obtained by extraction the part corresponding to 3443 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3444 first row that belongs to the processor, r2 is the last row belonging to 3445 the this processor, and c1-c2 is range of indices of the local part of a 3446 vector suitable for applying the matrix to. This is an mxn matrix. In the 3447 common case of a square matrix, the row and column ranges are the same and 3448 the DIAGONAL part is also square. The remaining portion of the local 3449 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3450 3451 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3452 3453 You can call MatGetInfo() to get information on how effective the preallocation was; 3454 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3455 You can also run with the option -info and look for messages with the string 3456 malloc in them to see if additional memory allocation was needed. 3457 3458 Example usage: 3459 3460 Consider the following 8x8 matrix with 34 non-zero values, that is 3461 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3462 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3463 as follows: 3464 3465 .vb 3466 1 2 0 | 0 3 0 | 0 4 3467 Proc0 0 5 6 | 7 0 0 | 8 0 3468 9 0 10 | 11 0 0 | 12 0 3469 ------------------------------------- 3470 13 0 14 | 15 16 17 | 0 0 3471 Proc1 0 18 0 | 19 20 21 | 0 0 3472 0 0 0 | 22 23 0 | 24 0 3473 ------------------------------------- 3474 Proc2 25 26 27 | 0 0 28 | 29 0 3475 30 0 0 | 31 32 33 | 0 34 3476 .ve 3477 3478 This can be represented as a collection of submatrices as: 3479 3480 .vb 3481 A B C 3482 D E F 3483 G H I 3484 .ve 3485 3486 Where the submatrices A,B,C are owned by proc0, D,E,F are 3487 owned by proc1, G,H,I are owned by proc2. 3488 3489 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3490 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3491 The 'M','N' parameters are 8,8, and have the same values on all procs. 3492 3493 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3494 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3495 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3496 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3497 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3498 matrix, ans [DF] as another SeqAIJ matrix. 3499 3500 When d_nz, o_nz parameters are specified, d_nz storage elements are 3501 allocated for every row of the local diagonal submatrix, and o_nz 3502 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3503 One way to choose d_nz and o_nz is to use the max nonzerors per local 3504 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3505 In this case, the values of d_nz,o_nz are: 3506 .vb 3507 proc0 : dnz = 2, o_nz = 2 3508 proc1 : dnz = 3, o_nz = 2 3509 proc2 : dnz = 1, o_nz = 4 3510 .ve 3511 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3512 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3513 for proc3. i.e we are using 12+15+10=37 storage locations to store 3514 34 values. 3515 3516 When d_nnz, o_nnz parameters are specified, the storage is specified 3517 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3518 In the above case the values for d_nnz,o_nnz are: 3519 .vb 3520 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3521 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3522 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3523 .ve 3524 Here the space allocated is sum of all the above values i.e 34, and 3525 hence pre-allocation is perfect. 3526 3527 Level: intermediate 3528 3529 .keywords: matrix, aij, compressed row, sparse, parallel 3530 3531 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3532 MPIAIJ, MatGetInfo() 3533 @*/ 3534 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3535 { 3536 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3537 3538 PetscFunctionBegin; 3539 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3540 if (f) { 3541 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3542 } 3543 PetscFunctionReturn(0); 3544 } 3545 3546 #undef __FUNCT__ 3547 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3548 /*@ 3549 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3550 CSR format the local rows. 3551 3552 Collective on MPI_Comm 3553 3554 Input Parameters: 3555 + comm - MPI communicator 3556 . m - number of local rows (Cannot be PETSC_DECIDE) 3557 . n - This value should be the same as the local size used in creating the 3558 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3559 calculated if N is given) For square matrices n is almost always m. 3560 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3561 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3562 . i - row indices 3563 . j - column indices 3564 - a - matrix values 3565 3566 Output Parameter: 3567 . mat - the matrix 3568 3569 Level: intermediate 3570 3571 Notes: 3572 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3573 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3574 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3575 3576 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3577 3578 The format which is used for the sparse matrix input, is equivalent to a 3579 row-major ordering.. i.e for the following matrix, the input data expected is 3580 as shown: 3581 3582 1 0 0 3583 2 0 3 P0 3584 ------- 3585 4 5 6 P1 3586 3587 Process0 [P0]: rows_owned=[0,1] 3588 i = {0,1,3} [size = nrow+1 = 2+1] 3589 j = {0,0,2} [size = nz = 6] 3590 v = {1,2,3} [size = nz = 6] 3591 3592 Process1 [P1]: rows_owned=[2] 3593 i = {0,3} [size = nrow+1 = 1+1] 3594 j = {0,1,2} [size = nz = 6] 3595 v = {4,5,6} [size = nz = 6] 3596 3597 .keywords: matrix, aij, compressed row, sparse, parallel 3598 3599 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3600 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3601 @*/ 3602 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3603 { 3604 PetscErrorCode ierr; 3605 3606 PetscFunctionBegin; 3607 if (i[0]) { 3608 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3609 } 3610 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3611 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3612 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3613 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3614 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3615 PetscFunctionReturn(0); 3616 } 3617 3618 #undef __FUNCT__ 3619 #define __FUNCT__ "MatCreateMPIAIJ" 3620 /*@C 3621 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3622 (the default parallel PETSc format). For good matrix assembly performance 3623 the user should preallocate the matrix storage by setting the parameters 3624 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3625 performance can be increased by more than a factor of 50. 3626 3627 Collective on MPI_Comm 3628 3629 Input Parameters: 3630 + comm - MPI communicator 3631 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3632 This value should be the same as the local size used in creating the 3633 y vector for the matrix-vector product y = Ax. 3634 . n - This value should be the same as the local size used in creating the 3635 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3636 calculated if N is given) For square matrices n is almost always m. 3637 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3638 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3639 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3640 (same value is used for all local rows) 3641 . d_nnz - array containing the number of nonzeros in the various rows of the 3642 DIAGONAL portion of the local submatrix (possibly different for each row) 3643 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3644 The size of this array is equal to the number of local rows, i.e 'm'. 3645 You must leave room for the diagonal entry even if it is zero. 3646 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3647 submatrix (same value is used for all local rows). 3648 - o_nnz - array containing the number of nonzeros in the various rows of the 3649 OFF-DIAGONAL portion of the local submatrix (possibly different for 3650 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3651 structure. The size of this array is equal to the number 3652 of local rows, i.e 'm'. 3653 3654 Output Parameter: 3655 . A - the matrix 3656 3657 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3658 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3659 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3660 3661 Notes: 3662 If the *_nnz parameter is given then the *_nz parameter is ignored 3663 3664 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3665 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3666 storage requirements for this matrix. 3667 3668 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3669 processor than it must be used on all processors that share the object for 3670 that argument. 3671 3672 The user MUST specify either the local or global matrix dimensions 3673 (possibly both). 3674 3675 The parallel matrix is partitioned across processors such that the 3676 first m0 rows belong to process 0, the next m1 rows belong to 3677 process 1, the next m2 rows belong to process 2 etc.. where 3678 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3679 values corresponding to [m x N] submatrix. 3680 3681 The columns are logically partitioned with the n0 columns belonging 3682 to 0th partition, the next n1 columns belonging to the next 3683 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3684 3685 The DIAGONAL portion of the local submatrix on any given processor 3686 is the submatrix corresponding to the rows and columns m,n 3687 corresponding to the given processor. i.e diagonal matrix on 3688 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3689 etc. The remaining portion of the local submatrix [m x (N-n)] 3690 constitute the OFF-DIAGONAL portion. The example below better 3691 illustrates this concept. 3692 3693 For a square global matrix we define each processor's diagonal portion 3694 to be its local rows and the corresponding columns (a square submatrix); 3695 each processor's off-diagonal portion encompasses the remainder of the 3696 local matrix (a rectangular submatrix). 3697 3698 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3699 3700 When calling this routine with a single process communicator, a matrix of 3701 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3702 type of communicator, use the construction mechanism: 3703 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3704 3705 By default, this format uses inodes (identical nodes) when possible. 3706 We search for consecutive rows with the same nonzero structure, thereby 3707 reusing matrix information to achieve increased efficiency. 3708 3709 Options Database Keys: 3710 + -mat_no_inode - Do not use inodes 3711 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3712 - -mat_aij_oneindex - Internally use indexing starting at 1 3713 rather than 0. Note that when calling MatSetValues(), 3714 the user still MUST index entries starting at 0! 3715 3716 3717 Example usage: 3718 3719 Consider the following 8x8 matrix with 34 non-zero values, that is 3720 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3721 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3722 as follows: 3723 3724 .vb 3725 1 2 0 | 0 3 0 | 0 4 3726 Proc0 0 5 6 | 7 0 0 | 8 0 3727 9 0 10 | 11 0 0 | 12 0 3728 ------------------------------------- 3729 13 0 14 | 15 16 17 | 0 0 3730 Proc1 0 18 0 | 19 20 21 | 0 0 3731 0 0 0 | 22 23 0 | 24 0 3732 ------------------------------------- 3733 Proc2 25 26 27 | 0 0 28 | 29 0 3734 30 0 0 | 31 32 33 | 0 34 3735 .ve 3736 3737 This can be represented as a collection of submatrices as: 3738 3739 .vb 3740 A B C 3741 D E F 3742 G H I 3743 .ve 3744 3745 Where the submatrices A,B,C are owned by proc0, D,E,F are 3746 owned by proc1, G,H,I are owned by proc2. 3747 3748 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3749 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3750 The 'M','N' parameters are 8,8, and have the same values on all procs. 3751 3752 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3753 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3754 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3755 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3756 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3757 matrix, ans [DF] as another SeqAIJ matrix. 3758 3759 When d_nz, o_nz parameters are specified, d_nz storage elements are 3760 allocated for every row of the local diagonal submatrix, and o_nz 3761 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3762 One way to choose d_nz and o_nz is to use the max nonzerors per local 3763 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3764 In this case, the values of d_nz,o_nz are: 3765 .vb 3766 proc0 : dnz = 2, o_nz = 2 3767 proc1 : dnz = 3, o_nz = 2 3768 proc2 : dnz = 1, o_nz = 4 3769 .ve 3770 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3771 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3772 for proc3. i.e we are using 12+15+10=37 storage locations to store 3773 34 values. 3774 3775 When d_nnz, o_nnz parameters are specified, the storage is specified 3776 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3777 In the above case the values for d_nnz,o_nnz are: 3778 .vb 3779 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3780 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3781 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3782 .ve 3783 Here the space allocated is sum of all the above values i.e 34, and 3784 hence pre-allocation is perfect. 3785 3786 Level: intermediate 3787 3788 .keywords: matrix, aij, compressed row, sparse, parallel 3789 3790 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3791 MPIAIJ, MatCreateMPIAIJWithArrays() 3792 @*/ 3793 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3794 { 3795 PetscErrorCode ierr; 3796 PetscMPIInt size; 3797 3798 PetscFunctionBegin; 3799 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3800 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3801 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3802 if (size > 1) { 3803 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3804 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3805 } else { 3806 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3807 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3808 } 3809 PetscFunctionReturn(0); 3810 } 3811 3812 #undef __FUNCT__ 3813 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3814 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3815 { 3816 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3817 3818 PetscFunctionBegin; 3819 *Ad = a->A; 3820 *Ao = a->B; 3821 *colmap = a->garray; 3822 PetscFunctionReturn(0); 3823 } 3824 3825 #undef __FUNCT__ 3826 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3827 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3828 { 3829 PetscErrorCode ierr; 3830 PetscInt i; 3831 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3832 3833 PetscFunctionBegin; 3834 if (coloring->ctype == IS_COLORING_GLOBAL) { 3835 ISColoringValue *allcolors,*colors; 3836 ISColoring ocoloring; 3837 3838 /* set coloring for diagonal portion */ 3839 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3840 3841 /* set coloring for off-diagonal portion */ 3842 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3843 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3844 for (i=0; i<a->B->cmap->n; i++) { 3845 colors[i] = allcolors[a->garray[i]]; 3846 } 3847 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3848 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3849 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3850 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3851 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3852 ISColoringValue *colors; 3853 PetscInt *larray; 3854 ISColoring ocoloring; 3855 3856 /* set coloring for diagonal portion */ 3857 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3858 for (i=0; i<a->A->cmap->n; i++) { 3859 larray[i] = i + A->cmap->rstart; 3860 } 3861 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3862 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3863 for (i=0; i<a->A->cmap->n; i++) { 3864 colors[i] = coloring->colors[larray[i]]; 3865 } 3866 ierr = PetscFree(larray);CHKERRQ(ierr); 3867 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3868 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3869 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3870 3871 /* set coloring for off-diagonal portion */ 3872 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3873 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3874 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3875 for (i=0; i<a->B->cmap->n; i++) { 3876 colors[i] = coloring->colors[larray[i]]; 3877 } 3878 ierr = PetscFree(larray);CHKERRQ(ierr); 3879 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3880 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3881 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3882 } else { 3883 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3884 } 3885 3886 PetscFunctionReturn(0); 3887 } 3888 3889 #if defined(PETSC_HAVE_ADIC) 3890 #undef __FUNCT__ 3891 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3892 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3893 { 3894 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3895 PetscErrorCode ierr; 3896 3897 PetscFunctionBegin; 3898 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3899 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3900 PetscFunctionReturn(0); 3901 } 3902 #endif 3903 3904 #undef __FUNCT__ 3905 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3906 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3907 { 3908 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3909 PetscErrorCode ierr; 3910 3911 PetscFunctionBegin; 3912 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3913 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3914 PetscFunctionReturn(0); 3915 } 3916 3917 #undef __FUNCT__ 3918 #define __FUNCT__ "MatMerge" 3919 /*@ 3920 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3921 matrices from each processor 3922 3923 Collective on MPI_Comm 3924 3925 Input Parameters: 3926 + comm - the communicators the parallel matrix will live on 3927 . inmat - the input sequential matrices 3928 . n - number of local columns (or PETSC_DECIDE) 3929 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3930 3931 Output Parameter: 3932 . outmat - the parallel matrix generated 3933 3934 Level: advanced 3935 3936 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3937 3938 @*/ 3939 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3940 { 3941 PetscErrorCode ierr; 3942 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3943 PetscInt *indx; 3944 PetscScalar *values; 3945 3946 PetscFunctionBegin; 3947 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3948 if (scall == MAT_INITIAL_MATRIX){ 3949 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3950 if (n == PETSC_DECIDE){ 3951 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3952 } 3953 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3954 rstart -= m; 3955 3956 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3957 for (i=0;i<m;i++) { 3958 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3959 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3960 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3961 } 3962 /* This routine will ONLY return MPIAIJ type matrix */ 3963 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3964 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3965 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3966 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3967 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3968 3969 } else if (scall == MAT_REUSE_MATRIX){ 3970 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3971 } else { 3972 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3973 } 3974 3975 for (i=0;i<m;i++) { 3976 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3977 Ii = i + rstart; 3978 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3979 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3980 } 3981 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3982 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3983 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3984 3985 PetscFunctionReturn(0); 3986 } 3987 3988 #undef __FUNCT__ 3989 #define __FUNCT__ "MatFileSplit" 3990 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3991 { 3992 PetscErrorCode ierr; 3993 PetscMPIInt rank; 3994 PetscInt m,N,i,rstart,nnz; 3995 size_t len; 3996 const PetscInt *indx; 3997 PetscViewer out; 3998 char *name; 3999 Mat B; 4000 const PetscScalar *values; 4001 4002 PetscFunctionBegin; 4003 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4004 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4005 /* Should this be the type of the diagonal block of A? */ 4006 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4007 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4008 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4009 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4010 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4011 for (i=0;i<m;i++) { 4012 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4013 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4014 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4015 } 4016 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4017 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4018 4019 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4020 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4021 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4022 sprintf(name,"%s.%d",outfile,rank); 4023 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4024 ierr = PetscFree(name); 4025 ierr = MatView(B,out);CHKERRQ(ierr); 4026 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4027 ierr = MatDestroy(B);CHKERRQ(ierr); 4028 PetscFunctionReturn(0); 4029 } 4030 4031 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 4032 #undef __FUNCT__ 4033 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4034 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4035 { 4036 PetscErrorCode ierr; 4037 Mat_Merge_SeqsToMPI *merge; 4038 PetscContainer container; 4039 4040 PetscFunctionBegin; 4041 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4042 if (container) { 4043 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4044 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4045 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4046 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4047 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4048 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4049 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4050 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4051 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4052 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4053 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4054 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4055 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4056 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4057 4058 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4059 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4060 } 4061 ierr = PetscFree(merge);CHKERRQ(ierr); 4062 4063 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4064 PetscFunctionReturn(0); 4065 } 4066 4067 #include "../src/mat/utils/freespace.h" 4068 #include "petscbt.h" 4069 4070 #undef __FUNCT__ 4071 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4072 /*@C 4073 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4074 matrices from each processor 4075 4076 Collective on MPI_Comm 4077 4078 Input Parameters: 4079 + comm - the communicators the parallel matrix will live on 4080 . seqmat - the input sequential matrices 4081 . m - number of local rows (or PETSC_DECIDE) 4082 . n - number of local columns (or PETSC_DECIDE) 4083 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4084 4085 Output Parameter: 4086 . mpimat - the parallel matrix generated 4087 4088 Level: advanced 4089 4090 Notes: 4091 The dimensions of the sequential matrix in each processor MUST be the same. 4092 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4093 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4094 @*/ 4095 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4096 { 4097 PetscErrorCode ierr; 4098 MPI_Comm comm=((PetscObject)mpimat)->comm; 4099 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4100 PetscMPIInt size,rank,taga,*len_s; 4101 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4102 PetscInt proc,m; 4103 PetscInt **buf_ri,**buf_rj; 4104 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4105 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4106 MPI_Request *s_waits,*r_waits; 4107 MPI_Status *status; 4108 MatScalar *aa=a->a; 4109 MatScalar **abuf_r,*ba_i; 4110 Mat_Merge_SeqsToMPI *merge; 4111 PetscContainer container; 4112 4113 PetscFunctionBegin; 4114 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4115 4116 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4117 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4118 4119 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4120 if (container) { 4121 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4122 } 4123 bi = merge->bi; 4124 bj = merge->bj; 4125 buf_ri = merge->buf_ri; 4126 buf_rj = merge->buf_rj; 4127 4128 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4129 owners = merge->rowmap->range; 4130 len_s = merge->len_s; 4131 4132 /* send and recv matrix values */ 4133 /*-----------------------------*/ 4134 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4135 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4136 4137 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4138 for (proc=0,k=0; proc<size; proc++){ 4139 if (!len_s[proc]) continue; 4140 i = owners[proc]; 4141 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4142 k++; 4143 } 4144 4145 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4146 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4147 ierr = PetscFree(status);CHKERRQ(ierr); 4148 4149 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4150 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4151 4152 /* insert mat values of mpimat */ 4153 /*----------------------------*/ 4154 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4155 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4156 4157 for (k=0; k<merge->nrecv; k++){ 4158 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4159 nrows = *(buf_ri_k[k]); 4160 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4161 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4162 } 4163 4164 /* set values of ba */ 4165 m = merge->rowmap->n; 4166 for (i=0; i<m; i++) { 4167 arow = owners[rank] + i; 4168 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4169 bnzi = bi[i+1] - bi[i]; 4170 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4171 4172 /* add local non-zero vals of this proc's seqmat into ba */ 4173 anzi = ai[arow+1] - ai[arow]; 4174 aj = a->j + ai[arow]; 4175 aa = a->a + ai[arow]; 4176 nextaj = 0; 4177 for (j=0; nextaj<anzi; j++){ 4178 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4179 ba_i[j] += aa[nextaj++]; 4180 } 4181 } 4182 4183 /* add received vals into ba */ 4184 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4185 /* i-th row */ 4186 if (i == *nextrow[k]) { 4187 anzi = *(nextai[k]+1) - *nextai[k]; 4188 aj = buf_rj[k] + *(nextai[k]); 4189 aa = abuf_r[k] + *(nextai[k]); 4190 nextaj = 0; 4191 for (j=0; nextaj<anzi; j++){ 4192 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4193 ba_i[j] += aa[nextaj++]; 4194 } 4195 } 4196 nextrow[k]++; nextai[k]++; 4197 } 4198 } 4199 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4200 } 4201 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4202 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4203 4204 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4205 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4206 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4207 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4208 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4209 PetscFunctionReturn(0); 4210 } 4211 4212 #undef __FUNCT__ 4213 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4214 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4215 { 4216 PetscErrorCode ierr; 4217 Mat B_mpi; 4218 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4219 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4220 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4221 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4222 PetscInt len,proc,*dnz,*onz; 4223 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4224 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4225 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4226 MPI_Status *status; 4227 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4228 PetscBT lnkbt; 4229 Mat_Merge_SeqsToMPI *merge; 4230 PetscContainer container; 4231 4232 PetscFunctionBegin; 4233 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4234 4235 /* make sure it is a PETSc comm */ 4236 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4237 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4238 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4239 4240 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4241 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4242 4243 /* determine row ownership */ 4244 /*---------------------------------------------------------*/ 4245 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4246 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4247 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4248 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4249 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4250 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4251 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4252 4253 m = merge->rowmap->n; 4254 M = merge->rowmap->N; 4255 owners = merge->rowmap->range; 4256 4257 /* determine the number of messages to send, their lengths */ 4258 /*---------------------------------------------------------*/ 4259 len_s = merge->len_s; 4260 4261 len = 0; /* length of buf_si[] */ 4262 merge->nsend = 0; 4263 for (proc=0; proc<size; proc++){ 4264 len_si[proc] = 0; 4265 if (proc == rank){ 4266 len_s[proc] = 0; 4267 } else { 4268 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4269 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4270 } 4271 if (len_s[proc]) { 4272 merge->nsend++; 4273 nrows = 0; 4274 for (i=owners[proc]; i<owners[proc+1]; i++){ 4275 if (ai[i+1] > ai[i]) nrows++; 4276 } 4277 len_si[proc] = 2*(nrows+1); 4278 len += len_si[proc]; 4279 } 4280 } 4281 4282 /* determine the number and length of messages to receive for ij-structure */ 4283 /*-------------------------------------------------------------------------*/ 4284 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4285 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4286 4287 /* post the Irecv of j-structure */ 4288 /*-------------------------------*/ 4289 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4290 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4291 4292 /* post the Isend of j-structure */ 4293 /*--------------------------------*/ 4294 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4295 4296 for (proc=0, k=0; proc<size; proc++){ 4297 if (!len_s[proc]) continue; 4298 i = owners[proc]; 4299 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4300 k++; 4301 } 4302 4303 /* receives and sends of j-structure are complete */ 4304 /*------------------------------------------------*/ 4305 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4306 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4307 4308 /* send and recv i-structure */ 4309 /*---------------------------*/ 4310 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4311 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4312 4313 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4314 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4315 for (proc=0,k=0; proc<size; proc++){ 4316 if (!len_s[proc]) continue; 4317 /* form outgoing message for i-structure: 4318 buf_si[0]: nrows to be sent 4319 [1:nrows]: row index (global) 4320 [nrows+1:2*nrows+1]: i-structure index 4321 */ 4322 /*-------------------------------------------*/ 4323 nrows = len_si[proc]/2 - 1; 4324 buf_si_i = buf_si + nrows+1; 4325 buf_si[0] = nrows; 4326 buf_si_i[0] = 0; 4327 nrows = 0; 4328 for (i=owners[proc]; i<owners[proc+1]; i++){ 4329 anzi = ai[i+1] - ai[i]; 4330 if (anzi) { 4331 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4332 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4333 nrows++; 4334 } 4335 } 4336 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4337 k++; 4338 buf_si += len_si[proc]; 4339 } 4340 4341 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4342 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4343 4344 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4345 for (i=0; i<merge->nrecv; i++){ 4346 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4347 } 4348 4349 ierr = PetscFree(len_si);CHKERRQ(ierr); 4350 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4351 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4352 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4353 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4354 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4355 ierr = PetscFree(status);CHKERRQ(ierr); 4356 4357 /* compute a local seq matrix in each processor */ 4358 /*----------------------------------------------*/ 4359 /* allocate bi array and free space for accumulating nonzero column info */ 4360 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4361 bi[0] = 0; 4362 4363 /* create and initialize a linked list */ 4364 nlnk = N+1; 4365 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4366 4367 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4368 len = 0; 4369 len = ai[owners[rank+1]] - ai[owners[rank]]; 4370 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4371 current_space = free_space; 4372 4373 /* determine symbolic info for each local row */ 4374 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4375 4376 for (k=0; k<merge->nrecv; k++){ 4377 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4378 nrows = *buf_ri_k[k]; 4379 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4380 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4381 } 4382 4383 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4384 len = 0; 4385 for (i=0;i<m;i++) { 4386 bnzi = 0; 4387 /* add local non-zero cols of this proc's seqmat into lnk */ 4388 arow = owners[rank] + i; 4389 anzi = ai[arow+1] - ai[arow]; 4390 aj = a->j + ai[arow]; 4391 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4392 bnzi += nlnk; 4393 /* add received col data into lnk */ 4394 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4395 if (i == *nextrow[k]) { /* i-th row */ 4396 anzi = *(nextai[k]+1) - *nextai[k]; 4397 aj = buf_rj[k] + *nextai[k]; 4398 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4399 bnzi += nlnk; 4400 nextrow[k]++; nextai[k]++; 4401 } 4402 } 4403 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4404 4405 /* if free space is not available, make more free space */ 4406 if (current_space->local_remaining<bnzi) { 4407 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4408 nspacedouble++; 4409 } 4410 /* copy data into free space, then initialize lnk */ 4411 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4412 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4413 4414 current_space->array += bnzi; 4415 current_space->local_used += bnzi; 4416 current_space->local_remaining -= bnzi; 4417 4418 bi[i+1] = bi[i] + bnzi; 4419 } 4420 4421 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4422 4423 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4424 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4425 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4426 4427 /* create symbolic parallel matrix B_mpi */ 4428 /*---------------------------------------*/ 4429 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4430 if (n==PETSC_DECIDE) { 4431 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4432 } else { 4433 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4434 } 4435 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4436 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4437 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4438 4439 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4440 B_mpi->assembled = PETSC_FALSE; 4441 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4442 merge->bi = bi; 4443 merge->bj = bj; 4444 merge->buf_ri = buf_ri; 4445 merge->buf_rj = buf_rj; 4446 merge->coi = PETSC_NULL; 4447 merge->coj = PETSC_NULL; 4448 merge->owners_co = PETSC_NULL; 4449 4450 /* attach the supporting struct to B_mpi for reuse */ 4451 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4452 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4453 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4454 *mpimat = B_mpi; 4455 4456 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4457 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4458 PetscFunctionReturn(0); 4459 } 4460 4461 #undef __FUNCT__ 4462 #define __FUNCT__ "MatMerge_SeqsToMPI" 4463 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4464 { 4465 PetscErrorCode ierr; 4466 4467 PetscFunctionBegin; 4468 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4469 if (scall == MAT_INITIAL_MATRIX){ 4470 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4471 } 4472 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4473 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4474 PetscFunctionReturn(0); 4475 } 4476 4477 #undef __FUNCT__ 4478 #define __FUNCT__ "MatGetLocalMat" 4479 /*@ 4480 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4481 4482 Not Collective 4483 4484 Input Parameters: 4485 + A - the matrix 4486 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4487 4488 Output Parameter: 4489 . A_loc - the local sequential matrix generated 4490 4491 Level: developer 4492 4493 @*/ 4494 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4495 { 4496 PetscErrorCode ierr; 4497 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4498 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4499 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4500 MatScalar *aa=a->a,*ba=b->a,*cam; 4501 PetscScalar *ca; 4502 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4503 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4504 4505 PetscFunctionBegin; 4506 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4507 if (scall == MAT_INITIAL_MATRIX){ 4508 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4509 ci[0] = 0; 4510 for (i=0; i<am; i++){ 4511 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4512 } 4513 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4514 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4515 k = 0; 4516 for (i=0; i<am; i++) { 4517 ncols_o = bi[i+1] - bi[i]; 4518 ncols_d = ai[i+1] - ai[i]; 4519 /* off-diagonal portion of A */ 4520 for (jo=0; jo<ncols_o; jo++) { 4521 col = cmap[*bj]; 4522 if (col >= cstart) break; 4523 cj[k] = col; bj++; 4524 ca[k++] = *ba++; 4525 } 4526 /* diagonal portion of A */ 4527 for (j=0; j<ncols_d; j++) { 4528 cj[k] = cstart + *aj++; 4529 ca[k++] = *aa++; 4530 } 4531 /* off-diagonal portion of A */ 4532 for (j=jo; j<ncols_o; j++) { 4533 cj[k] = cmap[*bj++]; 4534 ca[k++] = *ba++; 4535 } 4536 } 4537 /* put together the new matrix */ 4538 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4539 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4540 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4541 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4542 mat->free_a = PETSC_TRUE; 4543 mat->free_ij = PETSC_TRUE; 4544 mat->nonew = 0; 4545 } else if (scall == MAT_REUSE_MATRIX){ 4546 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4547 ci = mat->i; cj = mat->j; cam = mat->a; 4548 for (i=0; i<am; i++) { 4549 /* off-diagonal portion of A */ 4550 ncols_o = bi[i+1] - bi[i]; 4551 for (jo=0; jo<ncols_o; jo++) { 4552 col = cmap[*bj]; 4553 if (col >= cstart) break; 4554 *cam++ = *ba++; bj++; 4555 } 4556 /* diagonal portion of A */ 4557 ncols_d = ai[i+1] - ai[i]; 4558 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4559 /* off-diagonal portion of A */ 4560 for (j=jo; j<ncols_o; j++) { 4561 *cam++ = *ba++; bj++; 4562 } 4563 } 4564 } else { 4565 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4566 } 4567 4568 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4569 PetscFunctionReturn(0); 4570 } 4571 4572 #undef __FUNCT__ 4573 #define __FUNCT__ "MatGetLocalMatCondensed" 4574 /*@C 4575 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4576 4577 Not Collective 4578 4579 Input Parameters: 4580 + A - the matrix 4581 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4582 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4583 4584 Output Parameter: 4585 . A_loc - the local sequential matrix generated 4586 4587 Level: developer 4588 4589 @*/ 4590 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4591 { 4592 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4593 PetscErrorCode ierr; 4594 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4595 IS isrowa,iscola; 4596 Mat *aloc; 4597 4598 PetscFunctionBegin; 4599 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4600 if (!row){ 4601 start = A->rmap->rstart; end = A->rmap->rend; 4602 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4603 } else { 4604 isrowa = *row; 4605 } 4606 if (!col){ 4607 start = A->cmap->rstart; 4608 cmap = a->garray; 4609 nzA = a->A->cmap->n; 4610 nzB = a->B->cmap->n; 4611 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4612 ncols = 0; 4613 for (i=0; i<nzB; i++) { 4614 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4615 else break; 4616 } 4617 imark = i; 4618 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4619 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4620 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4621 ierr = PetscFree(idx);CHKERRQ(ierr); 4622 } else { 4623 iscola = *col; 4624 } 4625 if (scall != MAT_INITIAL_MATRIX){ 4626 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4627 aloc[0] = *A_loc; 4628 } 4629 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4630 *A_loc = aloc[0]; 4631 ierr = PetscFree(aloc);CHKERRQ(ierr); 4632 if (!row){ 4633 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4634 } 4635 if (!col){ 4636 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4637 } 4638 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4639 PetscFunctionReturn(0); 4640 } 4641 4642 #undef __FUNCT__ 4643 #define __FUNCT__ "MatGetBrowsOfAcols" 4644 /*@C 4645 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4646 4647 Collective on Mat 4648 4649 Input Parameters: 4650 + A,B - the matrices in mpiaij format 4651 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4652 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4653 4654 Output Parameter: 4655 + rowb, colb - index sets of rows and columns of B to extract 4656 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4657 - B_seq - the sequential matrix generated 4658 4659 Level: developer 4660 4661 @*/ 4662 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4663 { 4664 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4665 PetscErrorCode ierr; 4666 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4667 IS isrowb,iscolb; 4668 Mat *bseq; 4669 4670 PetscFunctionBegin; 4671 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4672 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4673 } 4674 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4675 4676 if (scall == MAT_INITIAL_MATRIX){ 4677 start = A->cmap->rstart; 4678 cmap = a->garray; 4679 nzA = a->A->cmap->n; 4680 nzB = a->B->cmap->n; 4681 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4682 ncols = 0; 4683 for (i=0; i<nzB; i++) { /* row < local row index */ 4684 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4685 else break; 4686 } 4687 imark = i; 4688 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4689 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4690 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4691 ierr = PetscFree(idx);CHKERRQ(ierr); 4692 *brstart = imark; 4693 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4694 } else { 4695 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4696 isrowb = *rowb; iscolb = *colb; 4697 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4698 bseq[0] = *B_seq; 4699 } 4700 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4701 *B_seq = bseq[0]; 4702 ierr = PetscFree(bseq);CHKERRQ(ierr); 4703 if (!rowb){ 4704 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4705 } else { 4706 *rowb = isrowb; 4707 } 4708 if (!colb){ 4709 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4710 } else { 4711 *colb = iscolb; 4712 } 4713 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4714 PetscFunctionReturn(0); 4715 } 4716 4717 #undef __FUNCT__ 4718 #define __FUNCT__ "MatGetBrowsOfAoCols" 4719 /*@C 4720 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4721 of the OFF-DIAGONAL portion of local A 4722 4723 Collective on Mat 4724 4725 Input Parameters: 4726 + A,B - the matrices in mpiaij format 4727 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4728 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4729 . startsj_r - similar to startsj for receives 4730 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4731 4732 Output Parameter: 4733 + B_oth - the sequential matrix generated 4734 4735 Level: developer 4736 4737 @*/ 4738 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4739 { 4740 VecScatter_MPI_General *gen_to,*gen_from; 4741 PetscErrorCode ierr; 4742 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4743 Mat_SeqAIJ *b_oth; 4744 VecScatter ctx=a->Mvctx; 4745 MPI_Comm comm=((PetscObject)ctx)->comm; 4746 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4747 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4748 PetscScalar *rvalues,*svalues; 4749 MatScalar *b_otha,*bufa,*bufA; 4750 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4751 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4752 MPI_Status *sstatus,rstatus; 4753 PetscMPIInt jj; 4754 PetscInt *cols,sbs,rbs; 4755 PetscScalar *vals; 4756 4757 PetscFunctionBegin; 4758 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4759 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4760 } 4761 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4762 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4763 4764 gen_to = (VecScatter_MPI_General*)ctx->todata; 4765 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4766 rvalues = gen_from->values; /* holds the length of receiving row */ 4767 svalues = gen_to->values; /* holds the length of sending row */ 4768 nrecvs = gen_from->n; 4769 nsends = gen_to->n; 4770 4771 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4772 srow = gen_to->indices; /* local row index to be sent */ 4773 sstarts = gen_to->starts; 4774 sprocs = gen_to->procs; 4775 sstatus = gen_to->sstatus; 4776 sbs = gen_to->bs; 4777 rstarts = gen_from->starts; 4778 rprocs = gen_from->procs; 4779 rbs = gen_from->bs; 4780 4781 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4782 if (scall == MAT_INITIAL_MATRIX){ 4783 /* i-array */ 4784 /*---------*/ 4785 /* post receives */ 4786 for (i=0; i<nrecvs; i++){ 4787 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4788 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4789 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4790 } 4791 4792 /* pack the outgoing message */ 4793 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4794 sstartsj[0] = 0; rstartsj[0] = 0; 4795 len = 0; /* total length of j or a array to be sent */ 4796 k = 0; 4797 for (i=0; i<nsends; i++){ 4798 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4799 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4800 for (j=0; j<nrows; j++) { 4801 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4802 for (l=0; l<sbs; l++){ 4803 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4804 rowlen[j*sbs+l] = ncols; 4805 len += ncols; 4806 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4807 } 4808 k++; 4809 } 4810 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4811 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4812 } 4813 /* recvs and sends of i-array are completed */ 4814 i = nrecvs; 4815 while (i--) { 4816 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4817 } 4818 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4819 4820 /* allocate buffers for sending j and a arrays */ 4821 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4822 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4823 4824 /* create i-array of B_oth */ 4825 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4826 b_othi[0] = 0; 4827 len = 0; /* total length of j or a array to be received */ 4828 k = 0; 4829 for (i=0; i<nrecvs; i++){ 4830 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4831 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4832 for (j=0; j<nrows; j++) { 4833 b_othi[k+1] = b_othi[k] + rowlen[j]; 4834 len += rowlen[j]; k++; 4835 } 4836 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4837 } 4838 4839 /* allocate space for j and a arrrays of B_oth */ 4840 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4841 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4842 4843 /* j-array */ 4844 /*---------*/ 4845 /* post receives of j-array */ 4846 for (i=0; i<nrecvs; i++){ 4847 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4848 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4849 } 4850 4851 /* pack the outgoing message j-array */ 4852 k = 0; 4853 for (i=0; i<nsends; i++){ 4854 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4855 bufJ = bufj+sstartsj[i]; 4856 for (j=0; j<nrows; j++) { 4857 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4858 for (ll=0; ll<sbs; ll++){ 4859 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4860 for (l=0; l<ncols; l++){ 4861 *bufJ++ = cols[l]; 4862 } 4863 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4864 } 4865 } 4866 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4867 } 4868 4869 /* recvs and sends of j-array are completed */ 4870 i = nrecvs; 4871 while (i--) { 4872 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4873 } 4874 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4875 } else if (scall == MAT_REUSE_MATRIX){ 4876 sstartsj = *startsj; 4877 rstartsj = *startsj_r; 4878 bufa = *bufa_ptr; 4879 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4880 b_otha = b_oth->a; 4881 } else { 4882 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4883 } 4884 4885 /* a-array */ 4886 /*---------*/ 4887 /* post receives of a-array */ 4888 for (i=0; i<nrecvs; i++){ 4889 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4890 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4891 } 4892 4893 /* pack the outgoing message a-array */ 4894 k = 0; 4895 for (i=0; i<nsends; i++){ 4896 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4897 bufA = bufa+sstartsj[i]; 4898 for (j=0; j<nrows; j++) { 4899 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4900 for (ll=0; ll<sbs; ll++){ 4901 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4902 for (l=0; l<ncols; l++){ 4903 *bufA++ = vals[l]; 4904 } 4905 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4906 } 4907 } 4908 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4909 } 4910 /* recvs and sends of a-array are completed */ 4911 i = nrecvs; 4912 while (i--) { 4913 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4914 } 4915 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4916 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4917 4918 if (scall == MAT_INITIAL_MATRIX){ 4919 /* put together the new matrix */ 4920 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4921 4922 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4923 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4924 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4925 b_oth->free_a = PETSC_TRUE; 4926 b_oth->free_ij = PETSC_TRUE; 4927 b_oth->nonew = 0; 4928 4929 ierr = PetscFree(bufj);CHKERRQ(ierr); 4930 if (!startsj || !bufa_ptr){ 4931 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4932 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4933 } else { 4934 *startsj = sstartsj; 4935 *startsj_r = rstartsj; 4936 *bufa_ptr = bufa; 4937 } 4938 } 4939 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4940 PetscFunctionReturn(0); 4941 } 4942 4943 #undef __FUNCT__ 4944 #define __FUNCT__ "MatGetCommunicationStructs" 4945 /*@C 4946 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4947 4948 Not Collective 4949 4950 Input Parameters: 4951 . A - The matrix in mpiaij format 4952 4953 Output Parameter: 4954 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4955 . colmap - A map from global column index to local index into lvec 4956 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4957 4958 Level: developer 4959 4960 @*/ 4961 #if defined (PETSC_USE_CTABLE) 4962 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4963 #else 4964 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4965 #endif 4966 { 4967 Mat_MPIAIJ *a; 4968 4969 PetscFunctionBegin; 4970 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4971 PetscValidPointer(lvec, 2); 4972 PetscValidPointer(colmap, 3); 4973 PetscValidPointer(multScatter, 4); 4974 a = (Mat_MPIAIJ *) A->data; 4975 if (lvec) *lvec = a->lvec; 4976 if (colmap) *colmap = a->colmap; 4977 if (multScatter) *multScatter = a->Mvctx; 4978 PetscFunctionReturn(0); 4979 } 4980 4981 EXTERN_C_BEGIN 4982 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 4983 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 4984 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4985 EXTERN_C_END 4986 4987 #undef __FUNCT__ 4988 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4989 /* 4990 Computes (B'*A')' since computing B*A directly is untenable 4991 4992 n p p 4993 ( ) ( ) ( ) 4994 m ( A ) * n ( B ) = m ( C ) 4995 ( ) ( ) ( ) 4996 4997 */ 4998 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4999 { 5000 PetscErrorCode ierr; 5001 Mat At,Bt,Ct; 5002 5003 PetscFunctionBegin; 5004 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5005 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5006 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5007 ierr = MatDestroy(At);CHKERRQ(ierr); 5008 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5009 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5010 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5011 PetscFunctionReturn(0); 5012 } 5013 5014 #undef __FUNCT__ 5015 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5016 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5017 { 5018 PetscErrorCode ierr; 5019 PetscInt m=A->rmap->n,n=B->cmap->n; 5020 Mat Cmat; 5021 5022 PetscFunctionBegin; 5023 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5024 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5025 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5026 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5027 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5028 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5029 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5030 *C = Cmat; 5031 PetscFunctionReturn(0); 5032 } 5033 5034 /* ----------------------------------------------------------------*/ 5035 #undef __FUNCT__ 5036 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5037 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5038 { 5039 PetscErrorCode ierr; 5040 5041 PetscFunctionBegin; 5042 if (scall == MAT_INITIAL_MATRIX){ 5043 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5044 } 5045 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5046 PetscFunctionReturn(0); 5047 } 5048 5049 EXTERN_C_BEGIN 5050 #if defined(PETSC_HAVE_MUMPS) 5051 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5052 #endif 5053 #if defined(PETSC_HAVE_PASTIX) 5054 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5055 #endif 5056 #if defined(PETSC_HAVE_SUPERLU_DIST) 5057 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5058 #endif 5059 #if defined(PETSC_HAVE_SPOOLES) 5060 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5061 #endif 5062 EXTERN_C_END 5063 5064 /*MC 5065 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5066 5067 Options Database Keys: 5068 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5069 5070 Level: beginner 5071 5072 .seealso: MatCreateMPIAIJ() 5073 M*/ 5074 5075 EXTERN_C_BEGIN 5076 #undef __FUNCT__ 5077 #define __FUNCT__ "MatCreate_MPIAIJ" 5078 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5079 { 5080 Mat_MPIAIJ *b; 5081 PetscErrorCode ierr; 5082 PetscMPIInt size; 5083 5084 PetscFunctionBegin; 5085 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5086 5087 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5088 B->data = (void*)b; 5089 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5090 B->rmap->bs = 1; 5091 B->assembled = PETSC_FALSE; 5092 B->mapping = 0; 5093 5094 B->insertmode = NOT_SET_VALUES; 5095 b->size = size; 5096 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5097 5098 /* build cache for off array entries formed */ 5099 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5100 b->donotstash = PETSC_FALSE; 5101 b->colmap = 0; 5102 b->garray = 0; 5103 b->roworiented = PETSC_TRUE; 5104 5105 /* stuff used for matrix vector multiply */ 5106 b->lvec = PETSC_NULL; 5107 b->Mvctx = PETSC_NULL; 5108 5109 /* stuff for MatGetRow() */ 5110 b->rowindices = 0; 5111 b->rowvalues = 0; 5112 b->getrowactive = PETSC_FALSE; 5113 5114 #if defined(PETSC_HAVE_SPOOLES) 5115 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5116 "MatGetFactor_mpiaij_spooles", 5117 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5118 #endif 5119 #if defined(PETSC_HAVE_MUMPS) 5120 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5121 "MatGetFactor_aij_mumps", 5122 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5123 #endif 5124 #if defined(PETSC_HAVE_PASTIX) 5125 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5126 "MatGetFactor_mpiaij_pastix", 5127 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5128 #endif 5129 #if defined(PETSC_HAVE_SUPERLU_DIST) 5130 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5131 "MatGetFactor_mpiaij_superlu_dist", 5132 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5133 #endif 5134 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5135 "MatStoreValues_MPIAIJ", 5136 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5137 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5138 "MatRetrieveValues_MPIAIJ", 5139 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5140 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5141 "MatGetDiagonalBlock_MPIAIJ", 5142 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5143 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5144 "MatIsTranspose_MPIAIJ", 5145 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5146 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5147 "MatMPIAIJSetPreallocation_MPIAIJ", 5148 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5149 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5150 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5151 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5152 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5153 "MatDiagonalScaleLocal_MPIAIJ", 5154 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5155 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5156 "MatConvert_MPIAIJ_MPIAIJPERM", 5157 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5158 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5159 "MatConvert_MPIAIJ_MPIAIJCRL", 5160 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5161 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5162 "MatConvert_MPIAIJ_MPISBAIJ", 5163 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5164 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5165 "MatMatMult_MPIDense_MPIAIJ", 5166 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5167 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5168 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5169 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5170 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5171 "MatMatMultNumeric_MPIDense_MPIAIJ", 5172 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5173 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5174 PetscFunctionReturn(0); 5175 } 5176 EXTERN_C_END 5177 5178 #undef __FUNCT__ 5179 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5180 /*@ 5181 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5182 and "off-diagonal" part of the matrix in CSR format. 5183 5184 Collective on MPI_Comm 5185 5186 Input Parameters: 5187 + comm - MPI communicator 5188 . m - number of local rows (Cannot be PETSC_DECIDE) 5189 . n - This value should be the same as the local size used in creating the 5190 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5191 calculated if N is given) For square matrices n is almost always m. 5192 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5193 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5194 . i - row indices for "diagonal" portion of matrix 5195 . j - column indices 5196 . a - matrix values 5197 . oi - row indices for "off-diagonal" portion of matrix 5198 . oj - column indices 5199 - oa - matrix values 5200 5201 Output Parameter: 5202 . mat - the matrix 5203 5204 Level: advanced 5205 5206 Notes: 5207 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5208 5209 The i and j indices are 0 based 5210 5211 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5212 5213 This sets local rows and cannot be used to set off-processor values. 5214 5215 You cannot later use MatSetValues() to change values in this matrix. 5216 5217 .keywords: matrix, aij, compressed row, sparse, parallel 5218 5219 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5220 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5221 @*/ 5222 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5223 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5224 { 5225 PetscErrorCode ierr; 5226 Mat_MPIAIJ *maij; 5227 5228 PetscFunctionBegin; 5229 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5230 if (i[0]) { 5231 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5232 } 5233 if (oi[0]) { 5234 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5235 } 5236 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5237 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5238 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5239 maij = (Mat_MPIAIJ*) (*mat)->data; 5240 maij->donotstash = PETSC_TRUE; 5241 (*mat)->preallocated = PETSC_TRUE; 5242 5243 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5244 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5245 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5246 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5247 5248 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5249 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5250 5251 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5252 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5253 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5254 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5255 5256 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5257 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5258 PetscFunctionReturn(0); 5259 } 5260 5261 /* 5262 Special version for direct calls from Fortran 5263 */ 5264 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5265 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5266 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5267 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5268 #endif 5269 5270 /* Change these macros so can be used in void function */ 5271 #undef CHKERRQ 5272 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5273 #undef SETERRQ2 5274 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5275 #undef SETERRQ 5276 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5277 5278 EXTERN_C_BEGIN 5279 #undef __FUNCT__ 5280 #define __FUNCT__ "matsetvaluesmpiaij_" 5281 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5282 { 5283 Mat mat = *mmat; 5284 PetscInt m = *mm, n = *mn; 5285 InsertMode addv = *maddv; 5286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5287 PetscScalar value; 5288 PetscErrorCode ierr; 5289 5290 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5291 if (mat->insertmode == NOT_SET_VALUES) { 5292 mat->insertmode = addv; 5293 } 5294 #if defined(PETSC_USE_DEBUG) 5295 else if (mat->insertmode != addv) { 5296 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5297 } 5298 #endif 5299 { 5300 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5301 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5302 PetscTruth roworiented = aij->roworiented; 5303 5304 /* Some Variables required in the macro */ 5305 Mat A = aij->A; 5306 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5307 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5308 MatScalar *aa = a->a; 5309 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5310 Mat B = aij->B; 5311 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5312 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5313 MatScalar *ba = b->a; 5314 5315 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5316 PetscInt nonew = a->nonew; 5317 MatScalar *ap1,*ap2; 5318 5319 PetscFunctionBegin; 5320 for (i=0; i<m; i++) { 5321 if (im[i] < 0) continue; 5322 #if defined(PETSC_USE_DEBUG) 5323 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5324 #endif 5325 if (im[i] >= rstart && im[i] < rend) { 5326 row = im[i] - rstart; 5327 lastcol1 = -1; 5328 rp1 = aj + ai[row]; 5329 ap1 = aa + ai[row]; 5330 rmax1 = aimax[row]; 5331 nrow1 = ailen[row]; 5332 low1 = 0; 5333 high1 = nrow1; 5334 lastcol2 = -1; 5335 rp2 = bj + bi[row]; 5336 ap2 = ba + bi[row]; 5337 rmax2 = bimax[row]; 5338 nrow2 = bilen[row]; 5339 low2 = 0; 5340 high2 = nrow2; 5341 5342 for (j=0; j<n; j++) { 5343 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5344 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5345 if (in[j] >= cstart && in[j] < cend){ 5346 col = in[j] - cstart; 5347 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5348 } else if (in[j] < 0) continue; 5349 #if defined(PETSC_USE_DEBUG) 5350 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5351 #endif 5352 else { 5353 if (mat->was_assembled) { 5354 if (!aij->colmap) { 5355 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5356 } 5357 #if defined (PETSC_USE_CTABLE) 5358 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5359 col--; 5360 #else 5361 col = aij->colmap[in[j]] - 1; 5362 #endif 5363 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5364 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5365 col = in[j]; 5366 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5367 B = aij->B; 5368 b = (Mat_SeqAIJ*)B->data; 5369 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5370 rp2 = bj + bi[row]; 5371 ap2 = ba + bi[row]; 5372 rmax2 = bimax[row]; 5373 nrow2 = bilen[row]; 5374 low2 = 0; 5375 high2 = nrow2; 5376 bm = aij->B->rmap->n; 5377 ba = b->a; 5378 } 5379 } else col = in[j]; 5380 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5381 } 5382 } 5383 } else { 5384 if (!aij->donotstash) { 5385 if (roworiented) { 5386 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5387 } else { 5388 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5389 } 5390 } 5391 } 5392 }} 5393 PetscFunctionReturnVoid(); 5394 } 5395 EXTERN_C_END 5396 5397